Merge "[placement] Retry allocation writes server side" into stable/queens
This commit is contained in:
commit
ff200e12ef
|
@ -2122,6 +2122,11 @@ class ConcurrentUpdateDetected(NovaException):
|
|||
"Please retry your update")
|
||||
|
||||
|
||||
class ResourceProviderConcurrentUpdateDetected(ConcurrentUpdateDetected):
|
||||
msg_fmt = _("Another thread concurrently updated the resource provider "
|
||||
"data. Please retry your update")
|
||||
|
||||
|
||||
class ResourceClassNotFound(NotFound):
|
||||
msg_fmt = _("No such resource class %(resource_class)s.")
|
||||
|
||||
|
|
|
@ -279,7 +279,7 @@ def _increment_provider_generation(ctx, rp):
|
|||
|
||||
res = ctx.session.execute(upd_stmt)
|
||||
if res.rowcount != 1:
|
||||
raise exception.ConcurrentUpdateDetected
|
||||
raise exception.ResourceProviderConcurrentUpdateDetected()
|
||||
return new_generation
|
||||
|
||||
|
||||
|
@ -1992,6 +1992,10 @@ def _get_allocations_by_consumer_uuid(ctx, consumer_uuid):
|
|||
@base.NovaObjectRegistry.register_if(False)
|
||||
class AllocationList(base.ObjectListBase, base.NovaObject):
|
||||
|
||||
# The number of times to retry set_allocations if there has
|
||||
# been a resource provider (not consumer) generation coflict.
|
||||
RP_CONFLICT_RETRY_COUNT = 10
|
||||
|
||||
fields = {
|
||||
'objects': fields.ListOfObjectsField('Allocation'),
|
||||
}
|
||||
|
@ -2139,9 +2143,41 @@ class AllocationList(base.ObjectListBase, base.NovaObject):
|
|||
returned to the caller, nor are their database ids set. If
|
||||
those ids are required use one of the get_all_by* methods.
|
||||
"""
|
||||
# TODO(jaypipes): Retry the allocation writes on
|
||||
# ConcurrentUpdateDetected
|
||||
self._set_allocations(self._context, self.objects)
|
||||
# Retry _set_allocations server side if there is a
|
||||
# ResourceProviderConcurrentUpdateDetected. We don't care about
|
||||
# sleeping, we simply want to reset the resource provider objects
|
||||
# and try again. For sake of simplicity (and because we don't have
|
||||
# easy access to the information) we reload all the resource
|
||||
# providers that may be present.
|
||||
retries = self.RP_CONFLICT_RETRY_COUNT
|
||||
while retries:
|
||||
retries -= 1
|
||||
try:
|
||||
self._set_allocations(self._context, self.objects)
|
||||
break
|
||||
except exception.ResourceProviderConcurrentUpdateDetected:
|
||||
LOG.debug('Retrying allocations write on resource provider '
|
||||
'generation conflict')
|
||||
# We only want to reload each unique resource provider once.
|
||||
alloc_rp_uuids = set(
|
||||
alloc.resource_provider.uuid for alloc in self.objects)
|
||||
seen_rps = {}
|
||||
for rp_uuid in alloc_rp_uuids:
|
||||
seen_rps[rp_uuid] = ResourceProvider.get_by_uuid(
|
||||
self._context, rp_uuid)
|
||||
for alloc in self.objects:
|
||||
rp_uuid = alloc.resource_provider.uuid
|
||||
alloc.resource_provider = seen_rps[rp_uuid]
|
||||
else:
|
||||
# We ran out of retries so we need to raise again.
|
||||
# The log will automatically have request id info associated with
|
||||
# it that will allow tracing back to specific allocations.
|
||||
# Attempting to extract specific consumer or resource provider
|
||||
# information from the allocations is not coherent as this
|
||||
# could be multiple consumers and providers.
|
||||
LOG.warning('Exceeded retry limit of %d on allocations write',
|
||||
self.RP_CONFLICT_RETRY_COUNT)
|
||||
raise exception.ResourceProviderConcurrentUpdateDetected()
|
||||
|
||||
def delete_all(self):
|
||||
# Allocations can only have a single consumer, so take advantage of
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
# under the License.
|
||||
|
||||
|
||||
import functools
|
||||
import mock
|
||||
import os_traits
|
||||
from oslo_db import exception as db_exc
|
||||
|
@ -42,6 +43,15 @@ DISK_ALLOCATION = dict(
|
|||
)
|
||||
|
||||
|
||||
def add_inventory(rp, rc, total, **kwargs):
|
||||
kwargs.setdefault('max_unit', total)
|
||||
inv = rp_obj.Inventory(rp._context, resource_provider=rp,
|
||||
resource_class=rc, total=total, **kwargs)
|
||||
inv.obj_set_defaults()
|
||||
rp.add_inventory(inv)
|
||||
return inv
|
||||
|
||||
|
||||
class ResourceProviderBaseCase(test.NoDBTestCase):
|
||||
|
||||
USES_DB_SELF = True
|
||||
|
@ -1866,6 +1876,89 @@ class TestAllocationListCreateDelete(ResourceProviderBaseCase):
|
|||
self.ctx, migration_uuid)
|
||||
self.assertEqual(0, len(allocations))
|
||||
|
||||
@mock.patch('nova.objects.resource_provider.LOG')
|
||||
def test_set_allocations_retry(self, mock_log):
|
||||
"""Test server side allocation write retry handling."""
|
||||
|
||||
# Create a single resource provider and give it some inventory.
|
||||
rp1 = rp_obj.ResourceProvider(
|
||||
self.ctx, name='rp1', uuid=uuidsentinel.rp1)
|
||||
rp1.create()
|
||||
add_inventory(rp1, fields.ResourceClass.VCPU, 24,
|
||||
allocation_ratio=16.0)
|
||||
add_inventory(rp1, fields.ResourceClass.MEMORY_MB, 1024,
|
||||
min_unit=64,
|
||||
max_unit=1024,
|
||||
step_size=64)
|
||||
original_generation = rp1.generation
|
||||
# Verify the generation is what we expect (we'll be checking again
|
||||
# later).
|
||||
self.assertEqual(2, original_generation)
|
||||
|
||||
inst_consumer = uuidsentinel.instance
|
||||
|
||||
alloc_list = rp_obj.AllocationList(context=self.ctx,
|
||||
objects=[
|
||||
rp_obj.Allocation(
|
||||
context=self.ctx,
|
||||
consumer_id=inst_consumer,
|
||||
resource_provider=rp1,
|
||||
resource_class=fields.ResourceClass.VCPU,
|
||||
used=12),
|
||||
rp_obj.Allocation(
|
||||
context=self.ctx,
|
||||
consumer_id=inst_consumer,
|
||||
resource_provider=rp1,
|
||||
resource_class=fields.ResourceClass.MEMORY_MB,
|
||||
used=1024)
|
||||
])
|
||||
|
||||
# Make sure the right exception happens when the retry loop expires.
|
||||
with mock.patch.object(rp_obj.AllocationList,
|
||||
'RP_CONFLICT_RETRY_COUNT', 0):
|
||||
self.assertRaises(
|
||||
exception.ResourceProviderConcurrentUpdateDetected,
|
||||
alloc_list.create_all)
|
||||
mock_log.warning.assert_called_with(
|
||||
'Exceeded retry limit of %d on allocations write', 0)
|
||||
|
||||
# Make sure the right thing happens after a small number of failures.
|
||||
# There's a bit of mock magic going on here to enusre that we can
|
||||
# both do some side effects on _set_allocations as well as have the
|
||||
# real behavior. Two generation conflicts and then a success.
|
||||
mock_log.reset_mock()
|
||||
with mock.patch.object(rp_obj.AllocationList,
|
||||
'RP_CONFLICT_RETRY_COUNT', 3):
|
||||
unmocked_set = functools.partial(
|
||||
rp_obj.AllocationList._set_allocations, alloc_list)
|
||||
with mock.patch(
|
||||
'nova.objects.resource_provider.'
|
||||
'AllocationList._set_allocations') as mock_set:
|
||||
exceptions = iter([
|
||||
exception.ResourceProviderConcurrentUpdateDetected(),
|
||||
exception.ResourceProviderConcurrentUpdateDetected(),
|
||||
])
|
||||
|
||||
def side_effect(*args, **kwargs):
|
||||
try:
|
||||
raise next(exceptions)
|
||||
except StopIteration:
|
||||
return unmocked_set(*args, **kwargs)
|
||||
|
||||
mock_set.side_effect = side_effect
|
||||
alloc_list.create_all()
|
||||
self.assertEqual(2, mock_log.debug.call_count)
|
||||
mock_log.debug.called_with(
|
||||
'Retrying allocations write on resource provider '
|
||||
'generation conflict')
|
||||
self.assertEqual(3, mock_set.call_count)
|
||||
|
||||
# Confirm we're using a different rp object after the change
|
||||
# and that it has a higher generation.
|
||||
new_rp = alloc_list[0].resource_provider
|
||||
self.assertEqual(original_generation, rp1.generation)
|
||||
self.assertEqual(original_generation + 1, new_rp.generation)
|
||||
|
||||
|
||||
class UsageListTestCase(ResourceProviderBaseCase):
|
||||
|
||||
|
|
Loading…
Reference in New Issue