From 3120f40f636ab4cbdcb9f5cd4e9175e99e1e1c63 Mon Sep 17 00:00:00 2001 From: melanie witt Date: Fri, 3 Aug 2018 00:45:04 +0000 Subject: [PATCH] [stable only] Add functional regression test for bug 1783613 Related-Bug: #1783613 Change-Id: Ib65b0c7c5b83c53edf63f23fa522334c6a666ae4 --- .../regressions/test_bug_1783613.py | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 nova/tests/functional/regressions/test_bug_1783613.py diff --git a/nova/tests/functional/regressions/test_bug_1783613.py b/nova/tests/functional/regressions/test_bug_1783613.py new file mode 100644 index 000000000000..781e46d5702f --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_1783613.py @@ -0,0 +1,166 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import time + +from nova import exception +from nova import test +from nova.tests import fixtures as nova_fixtures +from nova.tests.functional.api import client +from nova.tests.functional import integrated_helpers +import nova.tests.unit.image.fake +from nova.tests.unit import policy_fixture + + +class TestDeleteWhileBootingInstanceNotFound( + test.TestCase, integrated_helpers.InstanceHelperMixin): + """This tests a regression in the Ocata release. + + In Ocata we started building instances in conductor for cells v2. If create + and delete requests are racing, we can have a situation where we never + decrement the quota during a delete. + + In the API: + + When the user deletes an instance while it is booting, we first try to + delete the build request record. If we succeed in doing that, we will + expect the boot process to be halted by conductor. + + When conductor finds the build request gone, it knows that the user has + requested a delete from the API and will start cleaning up resources. This + includes deleting the instance record, if it has already been created, and + it is created before the conductor tries to delete the build request. + + If the API succeeds in deleting the build request, it looks up the instance + record and will only decrement quota if it finds the instance. The problem + is that the instance can be NotFound if: + + a) Conductor has not yet created the instance record or + b) Conductor deleted the instance record after finding the API deleted it + + So if this happens during a race between create and delete, we will never + decrement the quota. + """ + + def setUp(self): + super(TestDeleteWhileBootingInstanceNotFound, self).setUp() + self.useFixture(policy_fixture.RealPolicyFixture()) + # This is needed for the network_api.validate_networks check in the API + self.useFixture(nova_fixtures.NeutronFixture(self)) + api_fixture = self.useFixture(nova_fixtures.OSAPIFixture( + api_version='v2.1')) + self.api = api_fixture.api + + # the image fake backend needed for image discovery + nova.tests.unit.image.fake.stub_out_image_service(self) + + # NOTE(melwitt): We intentionally do not start a conductor service in + # this test because we want to force the ordering such that the API + # deletes the build request during a concurrent DELETE from the user + # while the instance is being created, before the conductor has a + # chance to delete the build request as part of the create flow. + + self.image_id = self.api.get_images()[0]['id'] + self.flavor_id = self.api.get_flavors()[0]['id'] + + def _wait_for_instance_delete(self, server_id): + timeout = 0.0 + while timeout < 10.0: + try: + server = self.api.get_server(server_id) + except client.OpenStackApiNotFoundException: + # the instance is gone so we're happy + return + else: + time.sleep(.1) + timeout += .1 + + self.fail('Timed out waiting for server %s to be deleted. ' + 'Current vm_state: %s. Current task_state: %s' % + (server_id, server['OS-EXT-STS:vm_state'], + server['OS-EXT-STS:task_state'])) + + def _delete_server(self, server_id): + try: + self.api.delete_server(server_id) + except client.OpenStackApiNotFoundException: + pass + + def test_delete_while_booting_instance_lookup_fails(self): + # Get the current quota usage + starting_usage = self.api.get_limits() + + server_req = dict(name='test', imageRef=self.image_id, + flavorRef=self.flavor_id) + server = self.api.post_server({'server': server_req}) + + # Check quota to see we've incremented usage by 1. + current_usage = self.api.get_limits() + self.assertEqual(starting_usage['absolute']['totalInstancesUsed'] + 1, + current_usage['absolute']['totalInstancesUsed']) + + # Stub out the API to make the instance lookup fail, simulating if + # conductor hadn't yet created it yet or deleted it after the build + # request was deleted by the API. + self.stub_out('nova.compute.api.API._lookup_instance', + lambda *a, **k: (None, None)) + + # Now delete the server and wait for it to be gone. + self._delete_server(server['id']) + self._wait_for_instance_delete(server['id']) + + # Now check the quota again. Because of the bug, we won't have + # decremented quota. + ending_usage = self.api.get_limits() + self.assertEqual(current_usage['absolute']['totalInstancesUsed'], + ending_usage['absolute']['totalInstancesUsed']) + # TODO(melwitt): Uncomment this assert when the bug is fixed. + # self.assertEqual(current_usage['absolute']['totalInstancesUsed'] - 1, + # ending_usage['absolute']['totalInstancesUsed']) + + def test_delete_while_booting_instance_destroy_fails(self): + # Get the current quota usage + starting_usage = self.api.get_limits() + + server_req = dict(name='test', imageRef=self.image_id, + flavorRef=self.flavor_id) + server = self.api.post_server({'server': server_req}) + + # Check quota to see we've incremented usage by 1. + current_usage = self.api.get_limits() + self.assertEqual(starting_usage['absolute']['totalInstancesUsed'] + 1, + current_usage['absolute']['totalInstancesUsed']) + + # Stub out the API to make the instance destroy raise InstanceNotFound, + # simulating if conductor already deleted it. + # If conductor deleted the instance out from under us in the API + # *after* we looked up the instance and found it, we will get + # InstanceNotFound from instance.destroy() and quotas.rollback() won't + # be the right choice. It's only the right choice if we're racing with + # another delete request, not if we're racing with a create request. + def fake_destroy(*args, **kwargs): + raise exception.InstanceNotFound(instance_id=server['id']) + + self.stub_out('nova.objects.Instance.destroy', fake_destroy) + + # Now delete the server and wait for it to be gone. + self._delete_server(server['id']) + self._wait_for_instance_delete(server['id']) + + # Now check the quota again. Because of the bug, we won't have + # decremented quota. + ending_usage = self.api.get_limits() + self.assertEqual(current_usage['absolute']['totalInstancesUsed'], + ending_usage['absolute']['totalInstancesUsed']) + # TODO(melwitt): Uncomment this assert when the bug is fixed. + # self.assertEqual(current_usage['absolute']['totalInstancesUsed'] - 1, + # ending_usage['absolute']['totalInstancesUsed'])