Merge "[placement] Retry allocation writes server side" into stable/queens

This commit is contained in:
Zuul 2018-08-09 22:35:09 +00:00 committed by Gerrit Code Review
commit ff200e12ef
3 changed files with 138 additions and 4 deletions

View File

@ -2122,6 +2122,11 @@ class ConcurrentUpdateDetected(NovaException):
"Please retry your update")
class ResourceProviderConcurrentUpdateDetected(ConcurrentUpdateDetected):
msg_fmt = _("Another thread concurrently updated the resource provider "
"data. Please retry your update")
class ResourceClassNotFound(NotFound):
msg_fmt = _("No such resource class %(resource_class)s.")

View File

@ -279,7 +279,7 @@ def _increment_provider_generation(ctx, rp):
res = ctx.session.execute(upd_stmt)
if res.rowcount != 1:
raise exception.ConcurrentUpdateDetected
raise exception.ResourceProviderConcurrentUpdateDetected()
return new_generation
@ -1992,6 +1992,10 @@ def _get_allocations_by_consumer_uuid(ctx, consumer_uuid):
@base.NovaObjectRegistry.register_if(False)
class AllocationList(base.ObjectListBase, base.NovaObject):
# The number of times to retry set_allocations if there has
# been a resource provider (not consumer) generation coflict.
RP_CONFLICT_RETRY_COUNT = 10
fields = {
'objects': fields.ListOfObjectsField('Allocation'),
}
@ -2139,9 +2143,41 @@ class AllocationList(base.ObjectListBase, base.NovaObject):
returned to the caller, nor are their database ids set. If
those ids are required use one of the get_all_by* methods.
"""
# TODO(jaypipes): Retry the allocation writes on
# ConcurrentUpdateDetected
self._set_allocations(self._context, self.objects)
# Retry _set_allocations server side if there is a
# ResourceProviderConcurrentUpdateDetected. We don't care about
# sleeping, we simply want to reset the resource provider objects
# and try again. For sake of simplicity (and because we don't have
# easy access to the information) we reload all the resource
# providers that may be present.
retries = self.RP_CONFLICT_RETRY_COUNT
while retries:
retries -= 1
try:
self._set_allocations(self._context, self.objects)
break
except exception.ResourceProviderConcurrentUpdateDetected:
LOG.debug('Retrying allocations write on resource provider '
'generation conflict')
# We only want to reload each unique resource provider once.
alloc_rp_uuids = set(
alloc.resource_provider.uuid for alloc in self.objects)
seen_rps = {}
for rp_uuid in alloc_rp_uuids:
seen_rps[rp_uuid] = ResourceProvider.get_by_uuid(
self._context, rp_uuid)
for alloc in self.objects:
rp_uuid = alloc.resource_provider.uuid
alloc.resource_provider = seen_rps[rp_uuid]
else:
# We ran out of retries so we need to raise again.
# The log will automatically have request id info associated with
# it that will allow tracing back to specific allocations.
# Attempting to extract specific consumer or resource provider
# information from the allocations is not coherent as this
# could be multiple consumers and providers.
LOG.warning('Exceeded retry limit of %d on allocations write',
self.RP_CONFLICT_RETRY_COUNT)
raise exception.ResourceProviderConcurrentUpdateDetected()
def delete_all(self):
# Allocations can only have a single consumer, so take advantage of

View File

@ -11,6 +11,7 @@
# under the License.
import functools
import mock
import os_traits
from oslo_db import exception as db_exc
@ -42,6 +43,15 @@ DISK_ALLOCATION = dict(
)
def add_inventory(rp, rc, total, **kwargs):
kwargs.setdefault('max_unit', total)
inv = rp_obj.Inventory(rp._context, resource_provider=rp,
resource_class=rc, total=total, **kwargs)
inv.obj_set_defaults()
rp.add_inventory(inv)
return inv
class ResourceProviderBaseCase(test.NoDBTestCase):
USES_DB_SELF = True
@ -1866,6 +1876,89 @@ class TestAllocationListCreateDelete(ResourceProviderBaseCase):
self.ctx, migration_uuid)
self.assertEqual(0, len(allocations))
@mock.patch('nova.objects.resource_provider.LOG')
def test_set_allocations_retry(self, mock_log):
"""Test server side allocation write retry handling."""
# Create a single resource provider and give it some inventory.
rp1 = rp_obj.ResourceProvider(
self.ctx, name='rp1', uuid=uuidsentinel.rp1)
rp1.create()
add_inventory(rp1, fields.ResourceClass.VCPU, 24,
allocation_ratio=16.0)
add_inventory(rp1, fields.ResourceClass.MEMORY_MB, 1024,
min_unit=64,
max_unit=1024,
step_size=64)
original_generation = rp1.generation
# Verify the generation is what we expect (we'll be checking again
# later).
self.assertEqual(2, original_generation)
inst_consumer = uuidsentinel.instance
alloc_list = rp_obj.AllocationList(context=self.ctx,
objects=[
rp_obj.Allocation(
context=self.ctx,
consumer_id=inst_consumer,
resource_provider=rp1,
resource_class=fields.ResourceClass.VCPU,
used=12),
rp_obj.Allocation(
context=self.ctx,
consumer_id=inst_consumer,
resource_provider=rp1,
resource_class=fields.ResourceClass.MEMORY_MB,
used=1024)
])
# Make sure the right exception happens when the retry loop expires.
with mock.patch.object(rp_obj.AllocationList,
'RP_CONFLICT_RETRY_COUNT', 0):
self.assertRaises(
exception.ResourceProviderConcurrentUpdateDetected,
alloc_list.create_all)
mock_log.warning.assert_called_with(
'Exceeded retry limit of %d on allocations write', 0)
# Make sure the right thing happens after a small number of failures.
# There's a bit of mock magic going on here to enusre that we can
# both do some side effects on _set_allocations as well as have the
# real behavior. Two generation conflicts and then a success.
mock_log.reset_mock()
with mock.patch.object(rp_obj.AllocationList,
'RP_CONFLICT_RETRY_COUNT', 3):
unmocked_set = functools.partial(
rp_obj.AllocationList._set_allocations, alloc_list)
with mock.patch(
'nova.objects.resource_provider.'
'AllocationList._set_allocations') as mock_set:
exceptions = iter([
exception.ResourceProviderConcurrentUpdateDetected(),
exception.ResourceProviderConcurrentUpdateDetected(),
])
def side_effect(*args, **kwargs):
try:
raise next(exceptions)
except StopIteration:
return unmocked_set(*args, **kwargs)
mock_set.side_effect = side_effect
alloc_list.create_all()
self.assertEqual(2, mock_log.debug.call_count)
mock_log.debug.called_with(
'Retrying allocations write on resource provider '
'generation conflict')
self.assertEqual(3, mock_set.call_count)
# Confirm we're using a different rp object after the change
# and that it has a higher generation.
new_rp = alloc_list[0].resource_provider
self.assertEqual(original_generation, rp1.generation)
self.assertEqual(original_generation + 1, new_rp.generation)
class UsageListTestCase(ResourceProviderBaseCase):