diff --git a/nova/compute/api.py b/nova/compute/api.py index 2401f0e63a5e..07babade2314 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -77,6 +77,7 @@ from nova.pci import request as pci_request from nova.policies import servers as servers_policies import nova.policy from nova import profiler +from nova import quota from nova import rpc from nova.scheduler import client as scheduler_client from nova.scheduler import utils as scheduler_utils @@ -1743,16 +1744,11 @@ class API(base.Base): instance.task_state, project_id, user_id) try: - # NOTE(alaski): Though the conductor halts the build process it - # does not currently delete the instance record. This is - # because in the near future the instance record will not be - # created if the buildrequest has been deleted here. For now we - # ensure the instance has been set to deleted at this point. - # Yes this directly contradicts the comment earlier in this - # method, but this is a temporary measure. # Look up the instance because the current instance object was # stashed on the buildrequest and therefore not complete enough # to run .destroy(). + # NOTE(melwitt): _lookup_instance doesn't raise + # InstanceNotFound, it returns (None, None) instead. cell, instance = self._lookup_instance(context, instance.uuid) if instance is not None: # If instance is None it has already been deleted. @@ -1763,9 +1759,37 @@ class API(base.Base): instance.destroy() else: instance.destroy() - quotas.commit() + # NOTE(melwitt): We need to commit the quota decrement whether + # we find the instance or not because we can't get here unless + # we succeeded in deleting the build request earlier in this + # method. So, if we failed to lookup the instance here, it + # means either: + # a) Conductor didn't create the instance record yet. + # b) Conductor has deleted the instance record after finding + # the build request was deleted by us (in the API). + # In either case, conductor doesn't do anything to decrement + # quota usage, so we need to do it here. + quotas.commit() except exception.InstanceNotFound: + # InstanceNotFound can be raised by instance.destroy() if the + # instance was deleted by another racing delete request. If + # that happens, we should rollback quota here instead of + # committing it. + # We need to either rollback or commit here to get rid of the + # reservation record, in any case. quotas.rollback() + # InstanceNotFound could also be raised by instance.destroy() + # if conductor deleted the instance record after we looked it + # up but before we attempted to destroy it. If that happened, + # we are racing with a create request and the correct thing to + # do would be to commit the reservations instead of rolling + # them back. Since we can't differentiate between the two + # scenarios for InstanceNotFound, refresh the quota usage for + # instances, cores, and ram to clean things up if rollback() + # wasn't the right choice. + quota.QUOTAS.usage_refresh(context, + resource_names=['instances', + 'cores', 'ram']) return True return False diff --git a/nova/tests/functional/regressions/test_bug_1783613.py b/nova/tests/functional/regressions/test_bug_1783613.py index 781e46d5702f..e1714caac3e2 100644 --- a/nova/tests/functional/regressions/test_bug_1783613.py +++ b/nova/tests/functional/regressions/test_bug_1783613.py @@ -107,6 +107,10 @@ class TestDeleteWhileBootingInstanceNotFound( current_usage = self.api.get_limits() self.assertEqual(starting_usage['absolute']['totalInstancesUsed'] + 1, current_usage['absolute']['totalInstancesUsed']) + self.assertEqual(starting_usage['absolute']['totalCoresUsed'] + 1, + current_usage['absolute']['totalCoresUsed']) + self.assertEqual(starting_usage['absolute']['totalRAMUsed'] + 512, + current_usage['absolute']['totalRAMUsed']) # Stub out the API to make the instance lookup fail, simulating if # conductor hadn't yet created it yet or deleted it after the build @@ -118,14 +122,15 @@ class TestDeleteWhileBootingInstanceNotFound( self._delete_server(server['id']) self._wait_for_instance_delete(server['id']) - # Now check the quota again. Because of the bug, we won't have - # decremented quota. + # Now check the quota again. Since the bug is fixed, ending usage + # should be current usage - 1. ending_usage = self.api.get_limits() - self.assertEqual(current_usage['absolute']['totalInstancesUsed'], + self.assertEqual(current_usage['absolute']['totalInstancesUsed'] - 1, ending_usage['absolute']['totalInstancesUsed']) - # TODO(melwitt): Uncomment this assert when the bug is fixed. - # self.assertEqual(current_usage['absolute']['totalInstancesUsed'] - 1, - # ending_usage['absolute']['totalInstancesUsed']) + self.assertEqual(current_usage['absolute']['totalCoresUsed'] - 1, + ending_usage['absolute']['totalCoresUsed']) + self.assertEqual(current_usage['absolute']['totalRAMUsed'] - 512, + ending_usage['absolute']['totalRAMUsed']) def test_delete_while_booting_instance_destroy_fails(self): # Get the current quota usage @@ -139,6 +144,10 @@ class TestDeleteWhileBootingInstanceNotFound( current_usage = self.api.get_limits() self.assertEqual(starting_usage['absolute']['totalInstancesUsed'] + 1, current_usage['absolute']['totalInstancesUsed']) + self.assertEqual(starting_usage['absolute']['totalCoresUsed'] + 1, + current_usage['absolute']['totalCoresUsed']) + self.assertEqual(starting_usage['absolute']['totalRAMUsed'] + 512, + current_usage['absolute']['totalRAMUsed']) # Stub out the API to make the instance destroy raise InstanceNotFound, # simulating if conductor already deleted it. @@ -156,11 +165,12 @@ class TestDeleteWhileBootingInstanceNotFound( self._delete_server(server['id']) self._wait_for_instance_delete(server['id']) - # Now check the quota again. Because of the bug, we won't have - # decremented quota. + # Now check the quota again. Since the bug is fixed, ending usage + # should be current usage - 1. ending_usage = self.api.get_limits() - self.assertEqual(current_usage['absolute']['totalInstancesUsed'], + self.assertEqual(current_usage['absolute']['totalInstancesUsed'] - 1, ending_usage['absolute']['totalInstancesUsed']) - # TODO(melwitt): Uncomment this assert when the bug is fixed. - # self.assertEqual(current_usage['absolute']['totalInstancesUsed'] - 1, - # ending_usage['absolute']['totalInstancesUsed']) + self.assertEqual(current_usage['absolute']['totalCoresUsed'] - 1, + ending_usage['absolute']['totalCoresUsed']) + self.assertEqual(current_usage['absolute']['totalRAMUsed'] - 512, + ending_usage['absolute']['totalRAMUsed']) diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py index 10003de6ffc7..f10cbde5e689 100644 --- a/nova/tests/unit/compute/test_compute_api.py +++ b/nova/tests/unit/compute/test_compute_api.py @@ -1467,7 +1467,7 @@ class _ComputeAPIUnitTestMixIn(object): self.assertTrue( self.compute_api._delete_while_booting(self.context, inst)) - self.assertFalse(quota_mock.commit.called) + self.assertTrue(quota_mock.commit.called) test() @@ -1476,6 +1476,7 @@ class _ComputeAPIUnitTestMixIn(object): inst = self._create_instance_obj() quota_mock = mock.MagicMock() + @mock.patch('nova.quota.QUOTAS.usage_refresh') @mock.patch.object(self.compute_api, '_attempt_delete_of_buildrequest', return_value=True) @mock.patch.object(self.compute_api, '_lookup_instance', @@ -1483,12 +1484,14 @@ class _ComputeAPIUnitTestMixIn(object): instance_id='fake')) @mock.patch.object(self.compute_api, '_create_reservations', return_value=quota_mock) - def test(mock_create_res, mock_lookup, mock_attempt): + def test(mock_create_res, mock_lookup, mock_attempt, mock_refresh): self.assertTrue( self.compute_api._delete_while_booting(self.context, inst)) self.assertFalse(quota_mock.commit.called) self.assertTrue(quota_mock.rollback.called) + mock_refresh.assert_called_once_with( + self.context, resource_names=['instances', 'cores', 'ram']) test()