do not assume 1 consumer in AllocList.delete_all()

Ever since we introduced support for setting multiple consumers in a
single POST /allocations, the AllocationList.delete_all() method has
been housing a latent bad assumption and bug.

The AllocationList.delete_all() method used to assume that the
AllocationList's Allocation objects were only ever for a single
consumer, and took a shortcut in deleting the allocation by deleting all
allocations with the "first" Allocation's consumer UUID:

```python
    def delete_all(self):
        # Allocations can only have a single consumer, so take advantage of
        # that fact and do an efficient batch delete
        consumer_uuid = self.objects[0].consumer.uuid
        _delete_allocations_for_consumer(self._context, consumer_uuid)
        consumer_obj.delete_consumers_if_no_allocations(
            self._context, [consumer_uuid])
```

The problem with the above is that if you get all the allocations for a
single resource provider, using
AllocationList.get_all_by_resource_provider() and there are more than
one consumer allocating resources against that provider, then calling
AllocationList.delete_all() will only delete *some* of the resource
provider's allocations, not all of them.

Luckily, the handler code has never used AllocationList.delete_all()
after calling AllocationList.get_all_by_resource_provider(), and so
we've not hit this latent bug in production.

However, in the next patch in this series (the reshaper DB work), we
*do* call AllocationList.delete_all() for allocation lists for each
provider involved in the reshape operation, which is why this fix is
important to get done correctly.

Note that this patch renames AllocationList.create_all() to
AllocationList.replace_all() to make it absolutely clear that all of
the allocations for all consumers in the list are first *deleted* by the
codebase and then re-created. We also remove the check in
AllocationList.create_all() that the Allocation objects in the list must
not have an 'id' field set. The reason for that is because in order to
properly implement AllocationList.delete_all() to call DELETE FROM
allocations WHERE id IN (<...>) we need the list of allocation record
internal IDs. These id field values are now properly set on the
Allocation objects when AllocationList.get_all_by_resource_provider()
and AllocationList.get_all_by_consumer_id() are called. This allows that
returned object to have delete_all() called on it and the DELETE
statement to work properly.

Change-Id: I12393b033054683bcc3e6f20da14e6243b4d5577
Closes-bug: #1781430
This commit is contained in:
Jay Pipes 2018-07-12 14:57:35 -04:00
parent da16690f4d
commit 11c29ae470
7 changed files with 89 additions and 76 deletions

View File

@ -388,7 +388,7 @@ def _set_allocations_for_consumer(req, schema):
def _create_allocations(alloc_list):
try:
alloc_list.create_all()
alloc_list.replace_all()
LOG.debug("Successfully wrote allocations %s", alloc_list)
except Exception:
if created_new_consumer:
@ -461,7 +461,7 @@ def set_allocations(req):
# First, ensure that all consumers referenced in the payload actually
# exist. And if not, create them. Keep a record of auto-created consumers
# so we can clean them up if the end allocation create_all() fails.
# so we can clean them up if the end allocation replace_all() fails.
consumers = {} # dict of Consumer objects, keyed by consumer UUID
new_consumers_created = []
for consumer_uuid in data:
@ -483,14 +483,14 @@ def set_allocations(req):
raise
# Create a sequence of allocation objects to be used in one
# AllocationList.create_all() call, which will mean all the changes
# AllocationList.replace_all() call, which will mean all the changes
# happen within a single transaction and with resource provider
# and consumer generations (if applicable) check all in one go.
allocations = create_allocation_list(context, data, consumers)
def _create_allocations(alloc_list):
try:
alloc_list.create_all()
alloc_list.replace_all()
LOG.debug("Successfully wrote allocations %s", alloc_list)
except Exception:
_delete_consumers(new_consumers_created)

View File

@ -1597,6 +1597,15 @@ def _delete_allocations_for_consumer(ctx, consumer_id):
ctx.session.execute(del_sql)
@db_api.placement_context_manager.writer
def _delete_allocations_by_ids(ctx, alloc_ids):
"""Deletes allocations having an internal id value in the set of supplied
IDs
"""
del_sql = _ALLOC_TBL.delete().where(_ALLOC_TBL.c.id.in_(alloc_ids))
ctx.session.execute(del_sql)
def _check_capacity_exceeded(ctx, allocs):
"""Checks to see if the supplied allocation records would result in any of
the inventories involved having their capacity exceeded.
@ -1770,6 +1779,7 @@ def _get_allocations_by_provider_id(ctx, rp_id):
projects = sa.alias(_PROJECT_TBL, name="p")
users = sa.alias(_PROJECT_TBL, name="u")
cols = [
allocs.c.id,
allocs.c.resource_class_id,
allocs.c.used,
allocs.c.updated_at,
@ -1804,6 +1814,7 @@ def _get_allocations_by_consumer_uuid(ctx, consumer_uuid):
project = sa.alias(_PROJECT_TBL, name="p")
user = sa.alias(_USER_TBL, name="u")
cols = [
allocs.c.id,
allocs.c.resource_provider_id,
rp.c.name.label("resource_provider_name"),
rp.c.uuid.label("resource_provider_uuid"),
@ -1937,11 +1948,6 @@ class AllocationList(base.ObjectListBase, base.VersionedObject):
provider or consumer failed its increment check.
"""
_ensure_rc_cache(context)
# Make sure that all of the allocations are new.
for alloc in allocs:
if 'id' in alloc:
raise exception.ObjectActionError(action='create',
reason='already created')
# First delete any existing allocations for any consumers. This
# provides a clean slate for the consumers mentioned in the list of
@ -1989,7 +1995,9 @@ class AllocationList(base.ObjectListBase, base.VersionedObject):
resource_class_id=rc_id,
consumer_id=consumer_id,
used=alloc.used)
context.session.execute(ins_stmt)
res = context.session.execute(ins_stmt)
alloc.id = res.lastrowid
alloc.obj_reset_changes()
# Generation checking happens here. If the inventory for this resource
# provider changed out from under us, this will raise a
@ -2034,7 +2042,7 @@ class AllocationList(base.ObjectListBase, base.VersionedObject):
external_id=rec['user_external_id']))
objs.append(
Allocation(
context, resource_provider=rp,
context, id=rec['id'], resource_provider=rp,
resource_class=_RC_CACHE.string_from_id(
rec['resource_class_id']),
consumer=consumer,
@ -2073,7 +2081,8 @@ class AllocationList(base.ObjectListBase, base.VersionedObject):
# fields returned by _get_allocations_by_consumer_id().
objs = [
Allocation(
context, resource_provider=ResourceProvider(
context, id=rec['id'],
resource_provider=ResourceProvider(
context,
id=rec['resource_provider_id'],
uuid=rec['resource_provider_uuid'],
@ -2088,24 +2097,25 @@ class AllocationList(base.ObjectListBase, base.VersionedObject):
alloc_list = cls(context, objects=objs)
return alloc_list
def create_all(self):
"""Create the supplied allocations.
def replace_all(self):
"""Replace the supplied allocations.
Note that the Allocation objects that are created are not
returned to the caller, nor are their database ids set. If
those ids are required use one of the get_all_by* methods.
:note: This method always deletes all allocations for all consumers
referenced in the list of Allocation objects and then replaces
the consumer's allocations with the Allocation objects. In doing
so, it will end up setting the Allocation.id attribute of each
Allocation object.
"""
# TODO(jaypipes): Retry the allocation writes on
# ConcurrentUpdateDetected
self._set_allocations(self._context, self.objects)
def delete_all(self):
# Allocations can only have a single consumer, so take advantage of
# that fact and do an efficient batch delete
consumer_uuid = self.objects[0].consumer.uuid
_delete_allocations_for_consumer(self._context, consumer_uuid)
consumer_uuids = set(alloc.consumer.uuid for alloc in self.objects)
alloc_ids = [alloc.id for alloc in self.objects]
_delete_allocations_by_ids(self._context, alloc_ids)
consumer_obj.delete_consumers_if_no_allocations(
self._context, [consumer_uuid])
self._context, consumer_uuids)
def __repr__(self):
strings = [repr(x) for x in self.objects]

View File

@ -108,7 +108,7 @@ class PlacementDbBaseTestCase(test.NoDBTestCase):
self.ctx, resource_provider=rp, resource_class=rc,
consumer=consumer, used=used)]
)
alloc_list.create_all()
alloc_list.replace_all()
return alloc_list
def _make_allocation(self, inv_dict, alloc_dict):
@ -128,5 +128,5 @@ class PlacementDbBaseTestCase(test.NoDBTestCase):
alloc = rp_obj.Allocation(self.ctx, resource_provider=rp,
consumer=c, **alloc_dict)
alloc_list = rp_obj.AllocationList(self.ctx, objects=[alloc])
alloc_list.create_all()
alloc_list.replace_all()
return rp, alloc

View File

@ -224,7 +224,7 @@ class CreateIncompleteConsumersTestCase(test.NoDBTestCase):
class DeleteConsumerIfNoAllocsTestCase(tb.PlacementDbBaseTestCase):
def test_delete_consumer_if_no_allocs(self):
"""AllocationList.create_all() should attempt to delete consumers that
"""AllocationList.replace_all() should attempt to delete consumers that
no longer have any allocations. Due to the REST API not having any way
to query for consumers directly (only via the GET
/allocations/{consumer_uuid} endpoint which returns an empty dict even
@ -265,7 +265,7 @@ class DeleteConsumerIfNoAllocsTestCase(tb.PlacementDbBaseTestCase):
resource_class=fields.ResourceClass.MEMORY_MB, used=512),
]
alloc_list = rp_obj.AllocationList(self.ctx, objects=allocs)
alloc_list.create_all()
alloc_list.replace_all()
# Validate that we have consumer records for both consumers
for c_uuid in (uuids.consumer1, uuids.consumer2):
@ -274,8 +274,8 @@ class DeleteConsumerIfNoAllocsTestCase(tb.PlacementDbBaseTestCase):
# OK, now "remove" the allocation for consumer2 by setting the used
# value for both allocated resources to 0 and re-running the
# AllocationList.create_all(). This should end up deleting the consumer
# record for consumer2
# AllocationList.replace_all(). This should end up deleting the
# consumer record for consumer2
allocs = [
rp_obj.Allocation(
self.ctx, consumer=c2, resource_provider=cn1,
@ -285,7 +285,7 @@ class DeleteConsumerIfNoAllocsTestCase(tb.PlacementDbBaseTestCase):
resource_class=fields.ResourceClass.MEMORY_MB, used=0),
]
alloc_list = rp_obj.AllocationList(self.ctx, objects=allocs)
alloc_list.create_all()
alloc_list.replace_all()
# consumer1 should still exist...
c_obj = consumer_obj.Consumer.get_by_uuid(self.ctx, uuids.consumer1)

View File

@ -1100,6 +1100,38 @@ class TestAllocation(tb.PlacementDbBaseTestCase):
self.assertEqual(0, len(allocations))
def test_delete_all_with_multiple_consumers(self):
"""Tests fix for LP #1781430 where AllocationList.delete_all() when
issued for an AllocationList returned by
AllocationList.get_by_resource_provider() where the resource provider
had multiple consumers allocated against it, left the DB in an
inconsistent state.
"""
# Create a single resource provider and allocate resources for two
# instances from it. Then grab all the provider's allocations with
# AllocationList.get_all_by_resource_provider() and attempt to delete
# them all with AllocationList.delete_all(). After which, another call
# to AllocationList.get_all_by_resource_provider() should return an
# empty list.
cn1 = self._create_provider('cn1')
tb.add_inventory(cn1, 'VCPU', 8)
c1_uuid = uuidsentinel.consumer1
c2_uuid = uuidsentinel.consumer2
for c_uuid in (c1_uuid, c2_uuid):
self.allocate_from_provider(cn1, 'VCPU', 1, consumer_id=c_uuid)
allocs = rp_obj.AllocationList.get_all_by_resource_provider(
self.ctx, cn1)
self.assertEqual(2, len(allocs))
allocs.delete_all()
allocs = rp_obj.AllocationList.get_all_by_resource_provider(
self.ctx, cn1)
self.assertEqual(0, len(allocs))
def test_multi_provider_allocation(self):
"""Tests that an allocation that includes more than one resource
provider can be created, listed and deleted properly.
@ -1158,7 +1190,7 @@ class TestAllocation(tb.PlacementDbBaseTestCase):
resource_class=fields.ResourceClass.MEMORY_MB,
used=256),
])
alloc_list.create_all()
alloc_list.replace_all()
src_allocs = rp_obj.AllocationList.get_all_by_resource_provider(
self.ctx, cn_source)
@ -1196,7 +1228,7 @@ class TestAllocation(tb.PlacementDbBaseTestCase):
resource_class=fields.ResourceClass.MEMORY_MB,
used=256),
])
new_alloc_list.create_all()
new_alloc_list.replace_all()
src_allocs = rp_obj.AllocationList.get_all_by_resource_provider(
self.ctx, cn_source)
@ -1229,7 +1261,7 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
"""Test that allocation check logic works with 2 resource classes on
one provider.
If this fails, we get a KeyError at create_all()
If this fails, we get a KeyError at replace_all()
"""
max_unit = 10
@ -1270,7 +1302,7 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
used=rp2_used)
allocation_list = rp_obj.AllocationList(
self.ctx, objects=[allocation_1, allocation_2])
allocation_list.create_all()
allocation_list.replace_all()
# create the allocations for a second consumer, until we have
# allocations for more than one consumer in the db, then we
@ -1287,7 +1319,7 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
allocation_list = rp_obj.AllocationList(
self.ctx, objects=[allocation_1, allocation_2])
# If we are joining wrong, this will be a KeyError
allocation_list.create_all()
allocation_list.replace_all()
def test_allocation_list_create(self):
max_unit = 10
@ -1327,7 +1359,7 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
# There's no inventory, we have a failure.
error = self.assertRaises(exception.InvalidInventory,
allocation_list.create_all)
allocation_list.replace_all)
# Confirm that the resource class string, not index, is in
# the exception and resource providers are listed by uuid.
self.assertIn(rp1_class, str(error))
@ -1339,14 +1371,14 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
# fail, since rp2 has no inventory.
tb.add_inventory(rp1, rp1_class, 1024, max_unit=1)
self.assertRaises(exception.InvalidInventory,
allocation_list.create_all)
allocation_list.replace_all)
# Add inventory for the second resource provider
tb.add_inventory(rp2, rp2_class, 255, reserved=2, max_unit=1)
# Now the allocations will still fail because max_unit 1
self.assertRaises(exception.InvalidAllocationConstraintsViolated,
allocation_list.create_all)
allocation_list.replace_all)
inv1 = rp_obj.Inventory(resource_provider=rp1,
resource_class=rp1_class,
total=1024, max_unit=max_unit)
@ -1359,7 +1391,7 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
rp2.set_inventory(rp_obj.InventoryList(objects=[inv2]))
# Now we can finally allocate.
allocation_list.create_all()
allocation_list.replace_all()
# Check that those allocations changed usage on each
# resource provider.
@ -1479,7 +1511,7 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
self.ctx,
objects=[allocation1, allocation2],
)
allocation_list.create_all()
allocation_list.replace_all()
allocations = rp_obj.AllocationList.get_all_by_consumer_id(
self.ctx, consumer_uuid)
@ -1501,7 +1533,7 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
self.ctx,
objects=[allocation1, allocation2],
)
allocation_list.create_all()
allocation_list.replace_all()
allocations = rp_obj.AllocationList.get_all_by_consumer_id(
self.ctx, consumer_uuid)
@ -1536,7 +1568,7 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
self.ctx,
objects=[allocation1, allocation2],
)
allocation_list.create_all()
allocation_list.replace_all()
# Check primary consumer allocations.
allocations = rp_obj.AllocationList.get_all_by_consumer_id(
@ -1565,7 +1597,7 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
self.ctx,
objects=[allocation1, allocation2],
)
allocation_list.create_all()
allocation_list.replace_all()
allocations = rp_obj.AllocationList.get_all_by_consumer_id(
self.ctx, consumer_uuid)
@ -1618,7 +1650,7 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
resource_class=fields.ResourceClass.MEMORY_MB,
used=1024)
])
alloc_list.create_all()
alloc_list.replace_all()
# Create a consumer representing the second instance
inst2_consumer = consumer_obj.Consumer(
@ -1657,7 +1689,7 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
])
self.assertRaises(exception.InvalidAllocationCapacityExceeded,
alloc_list.create_all)
alloc_list.replace_all)
# Make sure that allocations of both empty_rp and full_rp remain
# unchanged.

View File

@ -185,7 +185,7 @@ class AllocationFixture(APIFixture):
self.context,
objects=[alloc1, alloc2]
)
alloc_list.create_all()
alloc_list.replace_all()
# Create a second consumer for the VCPU
consumer_id = uuidutils.generate_uuid()
@ -215,7 +215,7 @@ class AllocationFixture(APIFixture):
alloc_list = rp_obj.AllocationList(
self.context,
objects=[alloc1, alloc2])
alloc_list.create_all()
alloc_list.replace_all()
# Create a consumer object for a different user
alt_consumer_id = uuidutils.generate_uuid()
@ -239,7 +239,7 @@ class AllocationFixture(APIFixture):
alloc_list = rp_obj.AllocationList(
self.context,
objects=[alloc1, alloc2])
alloc_list.create_all()
alloc_list.replace_all()
# The ALT_RP_XXX variables are for a resource provider that has
# not been created in the Allocation fixture

View File

@ -17,10 +17,7 @@ import testtools
from nova.api.openstack.placement import context
from nova.api.openstack.placement import exception
from nova.api.openstack.placement.objects import consumer as consumer_obj
from nova.api.openstack.placement.objects import project as project_obj
from nova.api.openstack.placement.objects import resource_provider
from nova.api.openstack.placement.objects import user as user_obj
from nova import rc_fields as fields
from nova.tests import uuidsentinel as uuids
@ -264,32 +261,6 @@ class TestInventoryList(_TestCase):
self.assertIsNone(inv_list.find('HOUSE'))
class TestAllocation(_TestCase):
@mock.patch('nova.api.openstack.placement.objects.resource_provider.'
'_ensure_rc_cache')
def test_create_with_id_fails(self, mock_rc_cache):
rp = resource_provider.ResourceProvider(context=self.context,
uuid=_RESOURCE_PROVIDER_UUID,
name=_RESOURCE_PROVIDER_NAME)
self.project = project_obj.Project(
self.context, external_id='fake-project')
self.user = user_obj.User(
self.context, external_id='fake-user')
self.consumer = consumer_obj.Consumer(
self.context, uuid=uuids.fake_instance, project=self.project,
user=self.user)
obj = resource_provider.Allocation(context=self.context,
id=99,
resource_provider=rp,
resource_class=_RESOURCE_CLASS_NAME,
consumer=self.consumer,
used=8)
alloc_list = resource_provider.AllocationList(self.context,
objects=[obj])
self.assertRaises(exception.ObjectActionError, alloc_list.create_all)
class TestAllocationListNoDB(_TestCase):
@mock.patch('nova.api.openstack.placement.objects.resource_provider.'