Avoid deleting allocations for instances being built
The resource tracker's _remove_deleted_instances_allocations() assumes that InstanceNotFound means that an instance was deleted. That's not quite accurate, as we would also see that in the window between creating allocations and actually creating the instance in the cell database. So, the code now will kill allocations for those instances before they are created. This change makes us look up the instance with read_deleted=yes, and if we find it with deleted=True, then we do the allocation removal. This does mean that someone running a full DB archive at the instant an instance is deleted in some way that didn't result in allocation removal as well could leak those. However, we can log that (unlikely) situation. Closes-Bug: #1729371 Conflicts: nova/compute/resource_tracker.py nova/tests/unit/compute/test_resource_tracker.py NOTE(mriedem): Conflicts were due to not having change1ff1310683
or changee3b7f43e3a
in Pike. Change-Id: I4482ac2ecf8e07c197fd24c520b7f11fd5a10945 (cherry picked from commitd176175db4
)
This commit is contained in:
parent
60d6e87cac
commit
70ef1fa159
|
@ -1156,6 +1156,7 @@ class ResourceTracker(object):
|
|||
known_instances = set(self.tracked_instances.keys())
|
||||
allocations = self.reportclient.get_allocations_for_resource_provider(
|
||||
cn.uuid) or {}
|
||||
read_deleted_context = context.elevated(read_deleted='yes')
|
||||
for instance_uuid, alloc in allocations.items():
|
||||
if instance_uuid in known_instances:
|
||||
LOG.debug("Instance %s actively managed on this compute host "
|
||||
|
@ -1163,9 +1164,23 @@ class ResourceTracker(object):
|
|||
instance_uuid, alloc)
|
||||
continue
|
||||
try:
|
||||
instance = objects.Instance.get_by_uuid(context, instance_uuid,
|
||||
instance = objects.Instance.get_by_uuid(read_deleted_context,
|
||||
instance_uuid,
|
||||
expected_attrs=[])
|
||||
except exception.InstanceNotFound:
|
||||
# The instance isn't even in the database. Either the scheduler
|
||||
# _just_ created an allocation for it and we're racing with the
|
||||
# creation in the cell database, or the instance was deleted
|
||||
# and fully archived before we got a chance to run this. The
|
||||
# former is far more likely than the latter. Avoid deleting
|
||||
# allocations for a building instance here.
|
||||
LOG.info("Instance %(uuid)s has allocations against this "
|
||||
"compute host but is not found in the database.",
|
||||
{'uuid': instance_uuid},
|
||||
exc_info=False)
|
||||
continue
|
||||
|
||||
if instance.deleted:
|
||||
# The instance is gone, so we definitely want to remove
|
||||
# allocations associated with it.
|
||||
# NOTE(jaypipes): This will not be true if/when we support
|
||||
|
|
|
@ -108,6 +108,7 @@ _INSTANCE_TYPE_FIXTURES = {
|
|||
'rxtx_factor': 0,
|
||||
'vcpu_weight': 1,
|
||||
'extra_specs': {},
|
||||
'deleted': 0,
|
||||
},
|
||||
2: {
|
||||
'id': 2,
|
||||
|
@ -121,6 +122,7 @@ _INSTANCE_TYPE_FIXTURES = {
|
|||
'rxtx_factor': 0,
|
||||
'vcpu_weight': 1,
|
||||
'extra_specs': {},
|
||||
'deleted': 0,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -129,11 +131,11 @@ _INSTANCE_TYPE_OBJ_FIXTURES = {
|
|||
1: objects.Flavor(id=1, flavorid='fakeid-1', name='fake1.small',
|
||||
memory_mb=128, vcpus=1, root_gb=1,
|
||||
ephemeral_gb=0, swap=0, rxtx_factor=0,
|
||||
vcpu_weight=1, extra_specs={}),
|
||||
vcpu_weight=1, extra_specs={}, deleted=False),
|
||||
2: objects.Flavor(id=2, flavorid='fakeid-2', name='fake1.medium',
|
||||
memory_mb=256, vcpus=2, root_gb=5,
|
||||
ephemeral_gb=0, swap=0, rxtx_factor=0,
|
||||
vcpu_weight=1, extra_specs={}),
|
||||
vcpu_weight=1, extra_specs={}, deleted=False),
|
||||
}
|
||||
|
||||
|
||||
|
@ -193,6 +195,7 @@ _INSTANCE_FIXTURES = [
|
|||
flavor = _INSTANCE_TYPE_OBJ_FIXTURES[1],
|
||||
old_flavor = _INSTANCE_TYPE_OBJ_FIXTURES[1],
|
||||
new_flavor = _INSTANCE_TYPE_OBJ_FIXTURES[1],
|
||||
deleted = False,
|
||||
),
|
||||
objects.Instance(
|
||||
id=2,
|
||||
|
@ -216,6 +219,7 @@ _INSTANCE_FIXTURES = [
|
|||
flavor = _INSTANCE_TYPE_OBJ_FIXTURES[2],
|
||||
old_flavor = _INSTANCE_TYPE_OBJ_FIXTURES[2],
|
||||
new_flavor = _INSTANCE_TYPE_OBJ_FIXTURES[2],
|
||||
deleted = False,
|
||||
),
|
||||
]
|
||||
|
||||
|
@ -478,7 +482,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
|||
# parameter that update_available_resource() eventually passes
|
||||
# to _update().
|
||||
with mock.patch.object(self.rt, '_update') as update_mock:
|
||||
self.rt.update_available_resource(mock.sentinel.ctx, _NODENAME)
|
||||
self.rt.update_available_resource(mock.MagicMock(), _NODENAME)
|
||||
return update_mock
|
||||
|
||||
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance',
|
||||
|
@ -534,16 +538,16 @@ class TestUpdateAvailableResources(BaseTestCase):
|
|||
|
||||
vd = self.driver_mock
|
||||
vd.get_available_resource.assert_called_once_with(_NODENAME)
|
||||
get_mock.assert_called_once_with(mock.sentinel.ctx, _HOSTNAME,
|
||||
get_mock.assert_called_once_with(mock.ANY, _HOSTNAME,
|
||||
_NODENAME,
|
||||
expected_attrs=[
|
||||
'system_metadata',
|
||||
'numa_topology',
|
||||
'flavor',
|
||||
'migration_context'])
|
||||
get_cn_mock.assert_called_once_with(mock.sentinel.ctx, _HOSTNAME,
|
||||
get_cn_mock.assert_called_once_with(mock.ANY, _HOSTNAME,
|
||||
_NODENAME)
|
||||
migr_mock.assert_called_once_with(mock.sentinel.ctx, _HOSTNAME,
|
||||
migr_mock.assert_called_once_with(mock.ANY, _HOSTNAME,
|
||||
_NODENAME)
|
||||
|
||||
expected_resources = copy.deepcopy(_COMPUTE_NODE_FIXTURES[0])
|
||||
|
@ -585,8 +589,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
|||
|
||||
update_mock = self._update_available_resources()
|
||||
|
||||
get_cn_mock.assert_called_once_with(mock.sentinel.ctx, _HOSTNAME,
|
||||
_NODENAME)
|
||||
get_cn_mock.assert_called_once_with(mock.ANY, _HOSTNAME, _NODENAME)
|
||||
expected_resources = copy.deepcopy(_COMPUTE_NODE_FIXTURES[0])
|
||||
vals = {
|
||||
'free_disk_gb': 5, # 6GB avail - 1 GB reserved
|
||||
|
@ -633,8 +636,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
|||
|
||||
update_mock = self._update_available_resources()
|
||||
|
||||
get_cn_mock.assert_called_once_with(mock.sentinel.ctx, _HOSTNAME,
|
||||
_NODENAME)
|
||||
get_cn_mock.assert_called_once_with(mock.ANY, _HOSTNAME, _NODENAME)
|
||||
expected_resources = copy.deepcopy(_COMPUTE_NODE_FIXTURES[0])
|
||||
vals = {
|
||||
'free_disk_gb': 5, # 6 - 1 used
|
||||
|
@ -695,8 +697,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
|||
|
||||
update_mock = self._update_available_resources()
|
||||
|
||||
get_cn_mock.assert_called_once_with(mock.sentinel.ctx, _HOSTNAME,
|
||||
_NODENAME)
|
||||
get_cn_mock.assert_called_once_with(mock.ANY, _HOSTNAME, _NODENAME)
|
||||
expected_resources = copy.deepcopy(_COMPUTE_NODE_FIXTURES[0])
|
||||
vals = {
|
||||
'free_disk_gb': 6,
|
||||
|
@ -760,8 +761,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
|||
|
||||
update_mock = self._update_available_resources()
|
||||
|
||||
get_cn_mock.assert_called_once_with(mock.sentinel.ctx, _HOSTNAME,
|
||||
_NODENAME)
|
||||
get_cn_mock.assert_called_once_with(mock.ANY, _HOSTNAME, _NODENAME)
|
||||
expected_resources = copy.deepcopy(_COMPUTE_NODE_FIXTURES[0])
|
||||
vals = {
|
||||
'free_disk_gb': 5,
|
||||
|
@ -820,8 +820,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
|||
|
||||
update_mock = self._update_available_resources()
|
||||
|
||||
get_cn_mock.assert_called_once_with(mock.sentinel.ctx, _HOSTNAME,
|
||||
_NODENAME)
|
||||
get_cn_mock.assert_called_once_with(mock.ANY, _HOSTNAME, _NODENAME)
|
||||
expected_resources = copy.deepcopy(_COMPUTE_NODE_FIXTURES[0])
|
||||
vals = {
|
||||
'free_disk_gb': 1,
|
||||
|
@ -877,8 +876,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
|||
|
||||
update_mock = self._update_available_resources()
|
||||
|
||||
get_cn_mock.assert_called_once_with(mock.sentinel.ctx, _HOSTNAME,
|
||||
_NODENAME)
|
||||
get_cn_mock.assert_called_once_with(mock.ANY, _HOSTNAME, _NODENAME)
|
||||
expected_resources = copy.deepcopy(_COMPUTE_NODE_FIXTURES[0])
|
||||
vals = {
|
||||
'free_disk_gb': 1,
|
||||
|
@ -943,8 +941,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
|||
|
||||
update_mock = self._update_available_resources()
|
||||
|
||||
get_cn_mock.assert_called_once_with(mock.sentinel.ctx, _HOSTNAME,
|
||||
_NODENAME)
|
||||
get_cn_mock.assert_called_once_with(mock.ANY, _HOSTNAME, _NODENAME)
|
||||
expected_resources = copy.deepcopy(_COMPUTE_NODE_FIXTURES[0])
|
||||
vals = {
|
||||
# 6 total - 1G existing - 5G new flav - 1G old flav
|
||||
|
@ -1729,7 +1726,7 @@ class TestResize(BaseTestCase):
|
|||
mig_context_obj = _MIGRATION_CONTEXT_FIXTURES[instance.uuid]
|
||||
instance.migration_context = mig_context_obj
|
||||
|
||||
self.rt.update_available_resource(mock.sentinel.ctx, _NODENAME)
|
||||
self.rt.update_available_resource(mock.MagicMock(), _NODENAME)
|
||||
|
||||
migration = objects.Migration(
|
||||
id=3,
|
||||
|
@ -1830,7 +1827,7 @@ class TestResize(BaseTestCase):
|
|||
ctx = mock.MagicMock()
|
||||
|
||||
# Init compute node
|
||||
self.rt.update_available_resource(mock.sentinel.ctx, _NODENAME)
|
||||
self.rt.update_available_resource(mock.MagicMock(), _NODENAME)
|
||||
expected = self.rt.compute_nodes[_NODENAME].obj_clone()
|
||||
|
||||
instance = _INSTANCE_FIXTURES[0].obj_clone()
|
||||
|
@ -1955,7 +1952,7 @@ class TestResize(BaseTestCase):
|
|||
migr_mock.return_value = []
|
||||
get_cn_mock.return_value = _COMPUTE_NODE_FIXTURES[0]
|
||||
|
||||
self.rt.update_available_resource(mock.sentinel.ctx, _NODENAME)
|
||||
self.rt.update_available_resource(mock.MagicMock(), _NODENAME)
|
||||
|
||||
instance = _INSTANCE_FIXTURES[0].obj_clone()
|
||||
instance.task_state = task_states.RESIZE_MIGRATING
|
||||
|
@ -2086,7 +2083,7 @@ class TestResize(BaseTestCase):
|
|||
migr_mock.return_value = []
|
||||
get_cn_mock.return_value = _COMPUTE_NODE_FIXTURES[0].obj_clone()
|
||||
|
||||
self.rt.update_available_resource(mock.sentinel.ctx, _NODENAME)
|
||||
self.rt.update_available_resource(mock.MagicMock(), _NODENAME)
|
||||
|
||||
# Instance #1 is resizing to instance type 2 which has 2 vCPUs, 256MB
|
||||
# RAM and 5GB root disk.
|
||||
|
@ -2222,7 +2219,8 @@ class TestRebuild(BaseTestCase):
|
|||
migr_mock.return_value = []
|
||||
get_cn_mock.return_value = _COMPUTE_NODE_FIXTURES[0].obj_clone()
|
||||
|
||||
self.rt.update_available_resource(mock.sentinel.ctx, _NODENAME)
|
||||
ctx = mock.MagicMock()
|
||||
self.rt.update_available_resource(ctx, _NODENAME)
|
||||
|
||||
# Now emulate the evacuate command by calling rebuild_claim() on the
|
||||
# resource tracker as the compute manager does, supplying a Migration
|
||||
|
@ -2440,16 +2438,40 @@ class TestUpdateUsageFromInstance(BaseTestCase):
|
|||
rc.get_allocations_for_resource_provider = mock.MagicMock(
|
||||
return_value=allocs)
|
||||
rc.delete_allocation_for_instance = mock.MagicMock()
|
||||
mock_inst_get.side_effect = exc.InstanceNotFound(
|
||||
instance_id=uuids.deleted)
|
||||
mock_inst_get.return_value = objects.Instance(
|
||||
uuid=uuids.deleted, deleted=True)
|
||||
cn = self.rt.compute_nodes[_NODENAME]
|
||||
ctx = mock.sentinel.ctx
|
||||
ctx = mock.MagicMock()
|
||||
# Call the method.
|
||||
self.rt._remove_deleted_instances_allocations(ctx, cn)
|
||||
# Only one call should be made to delete allocations, and that should
|
||||
# be for the first instance created above
|
||||
rc.delete_allocation_for_instance.assert_called_once_with(
|
||||
uuids.deleted)
|
||||
mock_inst_get.assert_called_once_with(
|
||||
ctx.elevated.return_value,
|
||||
uuids.deleted,
|
||||
expected_attrs=[])
|
||||
ctx.elevated.assert_called_once_with(read_deleted='yes')
|
||||
|
||||
@mock.patch('nova.objects.Instance.get_by_uuid')
|
||||
def test_remove_deleted_instances_allocations_building_instance(self,
|
||||
mock_inst_get):
|
||||
rc = self.rt.reportclient
|
||||
self.rt.tracked_instances = {}
|
||||
allocs = {uuids.deleted: "fake_deleted_instance"}
|
||||
rc.get_allocations_for_resource_provider = mock.MagicMock(
|
||||
return_value=allocs)
|
||||
rc.delete_allocation_for_instance = mock.MagicMock()
|
||||
mock_inst_get.side_effect = exc.InstanceNotFound(
|
||||
instance_id=uuids.deleted)
|
||||
cn = self.rt.compute_nodes[_NODENAME]
|
||||
ctx = mock.MagicMock()
|
||||
# Call the method.
|
||||
self.rt._remove_deleted_instances_allocations(ctx, cn)
|
||||
# Instance wasn't found in the database at all, so the allocation
|
||||
# should not have been deleted
|
||||
self.assertFalse(rc.delete_allocation_for_instance.called)
|
||||
|
||||
@mock.patch('nova.objects.Instance.get_by_uuid')
|
||||
def test_remove_deleted_instances_allocations_scheduled_instance(self,
|
||||
|
@ -2469,7 +2491,7 @@ class TestUpdateUsageFromInstance(BaseTestCase):
|
|||
|
||||
mock_inst_get.side_effect = get_by_uuid
|
||||
cn = self.rt.compute_nodes[_NODENAME]
|
||||
ctx = mock.sentinel.ctx
|
||||
ctx = mock.MagicMock()
|
||||
# Call the method.
|
||||
self.rt._remove_deleted_instances_allocations(ctx, cn)
|
||||
# Scheduled instances should not have their allocations removed
|
||||
|
@ -2498,48 +2520,10 @@ class TestUpdateUsageFromInstance(BaseTestCase):
|
|||
mock_get.return_value = instance
|
||||
|
||||
cn = self.rt.compute_nodes[_NODENAME]
|
||||
ctx = mock.sentinel.ctx
|
||||
ctx = mock.MagicMock()
|
||||
self.rt._remove_deleted_instances_allocations(ctx, cn)
|
||||
mock_delete_allocs.assert_not_called()
|
||||
|
||||
@mock.patch('nova.objects.Instance.get_by_uuid')
|
||||
def test_remove_deleted_instances_allocations_no_instance(self,
|
||||
mock_inst_get):
|
||||
# If for some reason an instance is no longer available, but
|
||||
# there are allocations for it, we want to be sure those
|
||||
# allocations are removed, not that an InstanceNotFound
|
||||
# exception is not caught. Here we set up some allocations,
|
||||
# one of which is for an instance that can no longer be
|
||||
# found.
|
||||
rc = self.rt.reportclient
|
||||
self.rt.tracked_instances = {}
|
||||
# Create 1 instance
|
||||
instance = _INSTANCE_FIXTURES[0].obj_clone()
|
||||
instance.uuid = uuids.inst0
|
||||
# Mock out the allocation call
|
||||
allocs = {uuids.scheduled: "fake_scheduled_instance",
|
||||
uuids.inst0: "fake_instance_gone"}
|
||||
rc.get_allocations_for_resource_provider = mock.MagicMock(
|
||||
return_value=allocs)
|
||||
rc.delete_allocation_for_instance = mock.MagicMock()
|
||||
|
||||
def get_by_uuid(ctx, inst_uuid, expected_attrs=None):
|
||||
ret = _INSTANCE_FIXTURES[0].obj_clone()
|
||||
ret.uuid = inst_uuid
|
||||
if inst_uuid == uuids.scheduled:
|
||||
ret.host = None
|
||||
return ret
|
||||
raise exc.InstanceNotFound(instance_id=inst_uuid)
|
||||
|
||||
mock_inst_get.side_effect = get_by_uuid
|
||||
cn = self.rt.compute_nodes[_NODENAME]
|
||||
ctx = mock.sentinel.ctx
|
||||
# Call the method.
|
||||
self.rt._remove_deleted_instances_allocations(ctx, cn)
|
||||
# One call should be made to delete allocations, for our
|
||||
# instance that no longer exists.
|
||||
rc.delete_allocation_for_instance.assert_called_once_with(uuids.inst0)
|
||||
|
||||
def test_delete_allocation_for_shelve_offloaded_instance(self):
|
||||
instance = _INSTANCE_FIXTURES[0].obj_clone()
|
||||
instance.uuid = uuids.inst0
|
||||
|
|
Loading…
Reference in New Issue