diff --git a/nova/exception.py b/nova/exception.py index 01499a1f17f9..b60f4e0c0e9d 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2122,6 +2122,11 @@ class ConcurrentUpdateDetected(NovaException): "Please retry your update") +class ResourceProviderConcurrentUpdateDetected(ConcurrentUpdateDetected): + msg_fmt = _("Another thread concurrently updated the resource provider " + "data. Please retry your update") + + class ResourceClassNotFound(NotFound): msg_fmt = _("No such resource class %(resource_class)s.") diff --git a/nova/objects/resource_provider.py b/nova/objects/resource_provider.py index b81aa1805f00..7ce5ec9685e4 100644 --- a/nova/objects/resource_provider.py +++ b/nova/objects/resource_provider.py @@ -279,7 +279,7 @@ def _increment_provider_generation(ctx, rp): res = ctx.session.execute(upd_stmt) if res.rowcount != 1: - raise exception.ConcurrentUpdateDetected + raise exception.ResourceProviderConcurrentUpdateDetected() return new_generation @@ -1992,6 +1992,10 @@ def _get_allocations_by_consumer_uuid(ctx, consumer_uuid): @base.NovaObjectRegistry.register_if(False) class AllocationList(base.ObjectListBase, base.NovaObject): + # The number of times to retry set_allocations if there has + # been a resource provider (not consumer) generation coflict. + RP_CONFLICT_RETRY_COUNT = 10 + fields = { 'objects': fields.ListOfObjectsField('Allocation'), } @@ -2139,9 +2143,41 @@ class AllocationList(base.ObjectListBase, base.NovaObject): returned to the caller, nor are their database ids set. If those ids are required use one of the get_all_by* methods. """ - # TODO(jaypipes): Retry the allocation writes on - # ConcurrentUpdateDetected - self._set_allocations(self._context, self.objects) + # Retry _set_allocations server side if there is a + # ResourceProviderConcurrentUpdateDetected. We don't care about + # sleeping, we simply want to reset the resource provider objects + # and try again. For sake of simplicity (and because we don't have + # easy access to the information) we reload all the resource + # providers that may be present. + retries = self.RP_CONFLICT_RETRY_COUNT + while retries: + retries -= 1 + try: + self._set_allocations(self._context, self.objects) + break + except exception.ResourceProviderConcurrentUpdateDetected: + LOG.debug('Retrying allocations write on resource provider ' + 'generation conflict') + # We only want to reload each unique resource provider once. + alloc_rp_uuids = set( + alloc.resource_provider.uuid for alloc in self.objects) + seen_rps = {} + for rp_uuid in alloc_rp_uuids: + seen_rps[rp_uuid] = ResourceProvider.get_by_uuid( + self._context, rp_uuid) + for alloc in self.objects: + rp_uuid = alloc.resource_provider.uuid + alloc.resource_provider = seen_rps[rp_uuid] + else: + # We ran out of retries so we need to raise again. + # The log will automatically have request id info associated with + # it that will allow tracing back to specific allocations. + # Attempting to extract specific consumer or resource provider + # information from the allocations is not coherent as this + # could be multiple consumers and providers. + LOG.warning('Exceeded retry limit of %d on allocations write', + self.RP_CONFLICT_RETRY_COUNT) + raise exception.ResourceProviderConcurrentUpdateDetected() def delete_all(self): # Allocations can only have a single consumer, so take advantage of diff --git a/nova/tests/functional/db/test_resource_provider.py b/nova/tests/functional/db/test_resource_provider.py index 60ec04ca2574..adce2419ee3f 100644 --- a/nova/tests/functional/db/test_resource_provider.py +++ b/nova/tests/functional/db/test_resource_provider.py @@ -11,6 +11,7 @@ # under the License. +import functools import mock import os_traits from oslo_db import exception as db_exc @@ -42,6 +43,15 @@ DISK_ALLOCATION = dict( ) +def add_inventory(rp, rc, total, **kwargs): + kwargs.setdefault('max_unit', total) + inv = rp_obj.Inventory(rp._context, resource_provider=rp, + resource_class=rc, total=total, **kwargs) + inv.obj_set_defaults() + rp.add_inventory(inv) + return inv + + class ResourceProviderBaseCase(test.NoDBTestCase): USES_DB_SELF = True @@ -1866,6 +1876,89 @@ class TestAllocationListCreateDelete(ResourceProviderBaseCase): self.ctx, migration_uuid) self.assertEqual(0, len(allocations)) + @mock.patch('nova.objects.resource_provider.LOG') + def test_set_allocations_retry(self, mock_log): + """Test server side allocation write retry handling.""" + + # Create a single resource provider and give it some inventory. + rp1 = rp_obj.ResourceProvider( + self.ctx, name='rp1', uuid=uuidsentinel.rp1) + rp1.create() + add_inventory(rp1, fields.ResourceClass.VCPU, 24, + allocation_ratio=16.0) + add_inventory(rp1, fields.ResourceClass.MEMORY_MB, 1024, + min_unit=64, + max_unit=1024, + step_size=64) + original_generation = rp1.generation + # Verify the generation is what we expect (we'll be checking again + # later). + self.assertEqual(2, original_generation) + + inst_consumer = uuidsentinel.instance + + alloc_list = rp_obj.AllocationList(context=self.ctx, + objects=[ + rp_obj.Allocation( + context=self.ctx, + consumer_id=inst_consumer, + resource_provider=rp1, + resource_class=fields.ResourceClass.VCPU, + used=12), + rp_obj.Allocation( + context=self.ctx, + consumer_id=inst_consumer, + resource_provider=rp1, + resource_class=fields.ResourceClass.MEMORY_MB, + used=1024) + ]) + + # Make sure the right exception happens when the retry loop expires. + with mock.patch.object(rp_obj.AllocationList, + 'RP_CONFLICT_RETRY_COUNT', 0): + self.assertRaises( + exception.ResourceProviderConcurrentUpdateDetected, + alloc_list.create_all) + mock_log.warning.assert_called_with( + 'Exceeded retry limit of %d on allocations write', 0) + + # Make sure the right thing happens after a small number of failures. + # There's a bit of mock magic going on here to enusre that we can + # both do some side effects on _set_allocations as well as have the + # real behavior. Two generation conflicts and then a success. + mock_log.reset_mock() + with mock.patch.object(rp_obj.AllocationList, + 'RP_CONFLICT_RETRY_COUNT', 3): + unmocked_set = functools.partial( + rp_obj.AllocationList._set_allocations, alloc_list) + with mock.patch( + 'nova.objects.resource_provider.' + 'AllocationList._set_allocations') as mock_set: + exceptions = iter([ + exception.ResourceProviderConcurrentUpdateDetected(), + exception.ResourceProviderConcurrentUpdateDetected(), + ]) + + def side_effect(*args, **kwargs): + try: + raise next(exceptions) + except StopIteration: + return unmocked_set(*args, **kwargs) + + mock_set.side_effect = side_effect + alloc_list.create_all() + self.assertEqual(2, mock_log.debug.call_count) + mock_log.debug.called_with( + 'Retrying allocations write on resource provider ' + 'generation conflict') + self.assertEqual(3, mock_set.call_count) + + # Confirm we're using a different rp object after the change + # and that it has a higher generation. + new_rp = alloc_list[0].resource_provider + self.assertEqual(original_generation, rp1.generation) + self.assertEqual(original_generation + 1, new_rp.generation) + class UsageListTestCase(ResourceProviderBaseCase):