diff --git a/nova/compute/api.py b/nova/compute/api.py index 031587db99bc..686fe63b8f88 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -249,6 +249,13 @@ class API(base.Base): self.image_api = image_api or image.API() self.network_api = network_api or network.API() self.volume_api = volume_api or cinder.API() + # NOTE(mriedem): This looks a bit weird but we get the reportclient + # via SchedulerClient since it lazy-loads SchedulerReportClient on + # the first usage which helps to avoid a bunch of lockutils spam in + # the nova-api logs every time the service is restarted (remember + # that pretty much all of the REST API controllers construct this + # API class). + self.placementclient = scheduler_client.SchedulerClient().reportclient self.security_group_api = (security_group_api or openstack_driver.get_openstack_security_group_driver()) self.consoleauth_rpcapi = consoleauth_rpcapi.ConsoleAuthAPI() @@ -2117,6 +2124,9 @@ class API(base.Base): # cleanup volumes self._local_cleanup_bdm_volumes(bdms, instance, context) + # Cleanup allocations in Placement since we can't do it from the + # compute service. + self.placementclient.delete_allocation_for_instance(instance.uuid) cb(context, instance, bdms, local=True) sys_meta = instance.system_metadata instance.destroy() diff --git a/nova/tests/functional/regressions/test_bug_1679750.py b/nova/tests/functional/regressions/test_bug_1679750.py index b93ce5107c5c..7493c92f6994 100644 --- a/nova/tests/functional/regressions/test_bug_1679750.py +++ b/nova/tests/functional/regressions/test_bug_1679750.py @@ -10,6 +10,7 @@ # License for the specific language governing permissions and limitations # under the License. +from nova.scheduler.client import report as reportclient from nova import test from nova.tests import fixtures as nova_fixtures from nova.tests.functional import integrated_helpers @@ -57,12 +58,87 @@ class TestLocalDeleteAllocations(test.TestCase, resp = self.placement_api.get(fmt % {'uuid': rp_uuid}) return resp.body['usages'] - def test_local_delete_removes_allocations(self): + # NOTE(mriedem): It would be preferable to use the PlacementFixture as + # a context manager but that causes some issues when trying to delete the + # server in test_local_delete_removes_allocations_after_compute_restart. + def _stub_compute_api_to_not_configure_placement(self): + """Prior to the compute API deleting allocations in the "local delete" + case, nova.conf for nova-api might not be configured for talking to + the placement service, so we can mock that behavior by stubbing out + the placement client in the compute API to no-op as if safe_connect + failed and returned None to the caller. + """ + orig_delete_alloc = ( + reportclient.SchedulerReportClient.delete_allocation_for_instance) + self.call_count = 0 + + def fake_delete_allocation_for_instance(*args, **kwargs): + # The first call will be from the API, so ignore that one and + # return None like the @safe_connect decorator would if nova-api + # wasn't configured to talk to placement. + if self.call_count: + orig_delete_alloc(*args, **kwargs) + else: + self.call_count += 1 + + self.stub_out('nova.scheduler.client.report.SchedulerReportClient.' + 'delete_allocation_for_instance', + fake_delete_allocation_for_instance) + + def test_local_delete_removes_allocations_after_compute_restart(self): """Tests that allocations are removed after a local delete. This tests the scenario where a server is local deleted (because the compute host is down) and we want to make sure that its allocations - have been cleaned up. + have been cleaned up once the nova-compute service restarts. + """ + self._stub_compute_api_to_not_configure_placement() + # Get allocations, make sure they are 0. + resp = self.placement_api.get('/resource_providers') + rp_uuid = resp.body['resource_providers'][0]['uuid'] + usages_before = self._get_usages(rp_uuid) + for usage in usages_before.values(): + self.assertEqual(0, usage) + + # Create a server. + server = self._build_minimal_create_server_request(self.api, + 'local-delete-test', self.image_id, self.flavor_id, 'none') + server = self.admin_api.post_server({'server': server}) + server = self._wait_for_state_change(self.api, server, 'ACTIVE') + + # Assert usages are non zero now. + usages_during = self._get_usages(rp_uuid) + for usage in usages_during.values(): + self.assertNotEqual(0, usage) + + # Force-down compute to trigger local delete. + self.compute.stop() + compute_service_id = self.admin_api.get_services( + host=self.compute.host, binary='nova-compute')[0]['id'] + self.admin_api.put_service(compute_service_id, {'forced_down': True}) + + # Delete the server (will be a local delete because compute is down). + self.api.delete_server(server['id']) + + # Assert usages are still non-zero. + usages_during = self._get_usages(rp_uuid) + for usage in usages_during.values(): + self.assertNotEqual(0, usage) + + # Start the compute service again. Before it comes up, it will call the + # update_available_resource code in the ResourceTracker which is what + # "heals" the allocations for the deleted instance. + self.compute.start() + + # Get the allocations again to check against the original. + usages_after = self._get_usages(rp_uuid) + + # They should match. + self.assertEqual(usages_before, usages_after) + + def test_local_delete_removes_allocations_from_api(self): + """Tests that the compute API deletes allocations when the compute + service on which the instance was running is down. """ # Get allocations, make sure they are 0. resp = self.placement_api.get('/resource_providers') @@ -91,12 +167,7 @@ class TestLocalDeleteAllocations(test.TestCase, # Delete the server (will be a local delete because compute is down). self.api.delete_server(server['id']) - # Start the compute service again. Before it comes up, it will call the - # update_available_resource code in the ResourceTracker which is what - # "heals" the allocations for the deleted instance. - self.compute.start() - - # Get the allocations again to check against the original. + # Get the allocations again to make sure they were deleted. usages_after = self._get_usages(rp_uuid) # They should match. diff --git a/nova/tests/functional/wsgi/test_services.py b/nova/tests/functional/wsgi/test_services.py index 26860056de28..eb3902fa468c 100644 --- a/nova/tests/functional/wsgi/test_services.py +++ b/nova/tests/functional/wsgi/test_services.py @@ -86,12 +86,7 @@ class TestServicesAPI(test_servers.ProviderUsageBaseTestCase): self.assertEqual(409, ex.response.status_code) # Now delete the instance and wait for it to be gone. - # Note that we can't use self._delete_and_check_allocations here - # because of bug 1679750 where allocations are not deleted when - # an instance is deleted and the compute service it's running on - # is down. - self.api.delete_server(server['id']) - self._wait_until_deleted(server) + self._delete_and_check_allocations(server) # Now we can delete the service. self.admin_api.api_delete('/os-services/%s' % service['id']) @@ -120,10 +115,3 @@ class TestServicesAPI(test_servers.ProviderUsageBaseTestCase): # in a 404 response. resp = self.placement_api.get('/resource_providers/%s' % rp_uuid) self.assertEqual(200, resp.status) - # Assert the allocations still exist for the server against the - # compute node resource provider. Once the bug is fixed, there should - # be no allocations for the server. - allocations = self._get_allocations_by_server_uuid(server['id']) - self.assertEqual(1, len(allocations)) - allocation = allocations[rp_uuid]['resources'] - self.assertFlavorMatchesAllocation(flavor, allocation) diff --git a/releasenotes/notes/bug-1679750-local-delete-allocations-cb7bfbcb6c36b6a2.yaml b/releasenotes/notes/bug-1679750-local-delete-allocations-cb7bfbcb6c36b6a2.yaml new file mode 100644 index 000000000000..178e810cba3a --- /dev/null +++ b/releasenotes/notes/bug-1679750-local-delete-allocations-cb7bfbcb6c36b6a2.yaml @@ -0,0 +1,12 @@ +--- +upgrade: + - | + The ``nova-api`` service now requires the ``[placement]`` section to be + configured in nova.conf if you are using a separate config file just for + that service. This is because the ``nova-api`` service now needs to talk + to the placement service in order to delete resource provider allocations + when deleting an instance and the ``nova-compute`` service on which that + instance is running is down. This change is idempotent if + ``[placement]`` is not configured in ``nova-api`` but it will result in + new warnings in the logs until configured. See bug + https://bugs.launchpad.net/nova/+bug/1679750 for more details.