Merge "Delete allocations from API if nova-compute is down" into stable/pike
This commit is contained in:
commit
826489881f
|
@ -249,6 +249,13 @@ class API(base.Base):
|
|||
self.image_api = image_api or image.API()
|
||||
self.network_api = network_api or network.API()
|
||||
self.volume_api = volume_api or cinder.API()
|
||||
# NOTE(mriedem): This looks a bit weird but we get the reportclient
|
||||
# via SchedulerClient since it lazy-loads SchedulerReportClient on
|
||||
# the first usage which helps to avoid a bunch of lockutils spam in
|
||||
# the nova-api logs every time the service is restarted (remember
|
||||
# that pretty much all of the REST API controllers construct this
|
||||
# API class).
|
||||
self.placementclient = scheduler_client.SchedulerClient().reportclient
|
||||
self.security_group_api = (security_group_api or
|
||||
openstack_driver.get_openstack_security_group_driver())
|
||||
self.consoleauth_rpcapi = consoleauth_rpcapi.ConsoleAuthAPI()
|
||||
|
@ -2117,6 +2124,9 @@ class API(base.Base):
|
|||
|
||||
# cleanup volumes
|
||||
self._local_cleanup_bdm_volumes(bdms, instance, context)
|
||||
# Cleanup allocations in Placement since we can't do it from the
|
||||
# compute service.
|
||||
self.placementclient.delete_allocation_for_instance(instance.uuid)
|
||||
cb(context, instance, bdms, local=True)
|
||||
sys_meta = instance.system_metadata
|
||||
instance.destroy()
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from nova.scheduler.client import report as reportclient
|
||||
from nova import test
|
||||
from nova.tests import fixtures as nova_fixtures
|
||||
from nova.tests.functional import integrated_helpers
|
||||
|
@ -57,12 +58,87 @@ class TestLocalDeleteAllocations(test.TestCase,
|
|||
resp = self.placement_api.get(fmt % {'uuid': rp_uuid})
|
||||
return resp.body['usages']
|
||||
|
||||
def test_local_delete_removes_allocations(self):
|
||||
# NOTE(mriedem): It would be preferable to use the PlacementFixture as
|
||||
# a context manager but that causes some issues when trying to delete the
|
||||
# server in test_local_delete_removes_allocations_after_compute_restart.
|
||||
def _stub_compute_api_to_not_configure_placement(self):
|
||||
"""Prior to the compute API deleting allocations in the "local delete"
|
||||
case, nova.conf for nova-api might not be configured for talking to
|
||||
the placement service, so we can mock that behavior by stubbing out
|
||||
the placement client in the compute API to no-op as if safe_connect
|
||||
failed and returned None to the caller.
|
||||
"""
|
||||
orig_delete_alloc = (
|
||||
reportclient.SchedulerReportClient.delete_allocation_for_instance)
|
||||
self.call_count = 0
|
||||
|
||||
def fake_delete_allocation_for_instance(*args, **kwargs):
|
||||
# The first call will be from the API, so ignore that one and
|
||||
# return None like the @safe_connect decorator would if nova-api
|
||||
# wasn't configured to talk to placement.
|
||||
if self.call_count:
|
||||
orig_delete_alloc(*args, **kwargs)
|
||||
else:
|
||||
self.call_count += 1
|
||||
|
||||
self.stub_out('nova.scheduler.client.report.SchedulerReportClient.'
|
||||
'delete_allocation_for_instance',
|
||||
fake_delete_allocation_for_instance)
|
||||
|
||||
def test_local_delete_removes_allocations_after_compute_restart(self):
|
||||
"""Tests that allocations are removed after a local delete.
|
||||
|
||||
This tests the scenario where a server is local deleted (because the
|
||||
compute host is down) and we want to make sure that its allocations
|
||||
have been cleaned up.
|
||||
have been cleaned up once the nova-compute service restarts.
|
||||
"""
|
||||
self._stub_compute_api_to_not_configure_placement()
|
||||
# Get allocations, make sure they are 0.
|
||||
resp = self.placement_api.get('/resource_providers')
|
||||
rp_uuid = resp.body['resource_providers'][0]['uuid']
|
||||
usages_before = self._get_usages(rp_uuid)
|
||||
for usage in usages_before.values():
|
||||
self.assertEqual(0, usage)
|
||||
|
||||
# Create a server.
|
||||
server = self._build_minimal_create_server_request(self.api,
|
||||
'local-delete-test', self.image_id, self.flavor_id, 'none')
|
||||
server = self.admin_api.post_server({'server': server})
|
||||
server = self._wait_for_state_change(self.api, server, 'ACTIVE')
|
||||
|
||||
# Assert usages are non zero now.
|
||||
usages_during = self._get_usages(rp_uuid)
|
||||
for usage in usages_during.values():
|
||||
self.assertNotEqual(0, usage)
|
||||
|
||||
# Force-down compute to trigger local delete.
|
||||
self.compute.stop()
|
||||
compute_service_id = self.admin_api.get_services(
|
||||
host=self.compute.host, binary='nova-compute')[0]['id']
|
||||
self.admin_api.put_service(compute_service_id, {'forced_down': True})
|
||||
|
||||
# Delete the server (will be a local delete because compute is down).
|
||||
self.api.delete_server(server['id'])
|
||||
|
||||
# Assert usages are still non-zero.
|
||||
usages_during = self._get_usages(rp_uuid)
|
||||
for usage in usages_during.values():
|
||||
self.assertNotEqual(0, usage)
|
||||
|
||||
# Start the compute service again. Before it comes up, it will call the
|
||||
# update_available_resource code in the ResourceTracker which is what
|
||||
# "heals" the allocations for the deleted instance.
|
||||
self.compute.start()
|
||||
|
||||
# Get the allocations again to check against the original.
|
||||
usages_after = self._get_usages(rp_uuid)
|
||||
|
||||
# They should match.
|
||||
self.assertEqual(usages_before, usages_after)
|
||||
|
||||
def test_local_delete_removes_allocations_from_api(self):
|
||||
"""Tests that the compute API deletes allocations when the compute
|
||||
service on which the instance was running is down.
|
||||
"""
|
||||
# Get allocations, make sure they are 0.
|
||||
resp = self.placement_api.get('/resource_providers')
|
||||
|
@ -91,12 +167,7 @@ class TestLocalDeleteAllocations(test.TestCase,
|
|||
# Delete the server (will be a local delete because compute is down).
|
||||
self.api.delete_server(server['id'])
|
||||
|
||||
# Start the compute service again. Before it comes up, it will call the
|
||||
# update_available_resource code in the ResourceTracker which is what
|
||||
# "heals" the allocations for the deleted instance.
|
||||
self.compute.start()
|
||||
|
||||
# Get the allocations again to check against the original.
|
||||
# Get the allocations again to make sure they were deleted.
|
||||
usages_after = self._get_usages(rp_uuid)
|
||||
|
||||
# They should match.
|
||||
|
|
|
@ -86,12 +86,7 @@ class TestServicesAPI(test_servers.ProviderUsageBaseTestCase):
|
|||
self.assertEqual(409, ex.response.status_code)
|
||||
|
||||
# Now delete the instance and wait for it to be gone.
|
||||
# Note that we can't use self._delete_and_check_allocations here
|
||||
# because of bug 1679750 where allocations are not deleted when
|
||||
# an instance is deleted and the compute service it's running on
|
||||
# is down.
|
||||
self.api.delete_server(server['id'])
|
||||
self._wait_until_deleted(server)
|
||||
self._delete_and_check_allocations(server)
|
||||
|
||||
# Now we can delete the service.
|
||||
self.admin_api.api_delete('/os-services/%s' % service['id'])
|
||||
|
@ -120,10 +115,3 @@ class TestServicesAPI(test_servers.ProviderUsageBaseTestCase):
|
|||
# in a 404 response.
|
||||
resp = self.placement_api.get('/resource_providers/%s' % rp_uuid)
|
||||
self.assertEqual(200, resp.status)
|
||||
# Assert the allocations still exist for the server against the
|
||||
# compute node resource provider. Once the bug is fixed, there should
|
||||
# be no allocations for the server.
|
||||
allocations = self._get_allocations_by_server_uuid(server['id'])
|
||||
self.assertEqual(1, len(allocations))
|
||||
allocation = allocations[rp_uuid]['resources']
|
||||
self.assertFlavorMatchesAllocation(flavor, allocation)
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
---
|
||||
upgrade:
|
||||
- |
|
||||
The ``nova-api`` service now requires the ``[placement]`` section to be
|
||||
configured in nova.conf if you are using a separate config file just for
|
||||
that service. This is because the ``nova-api`` service now needs to talk
|
||||
to the placement service in order to delete resource provider allocations
|
||||
when deleting an instance and the ``nova-compute`` service on which that
|
||||
instance is running is down. This change is idempotent if
|
||||
``[placement]`` is not configured in ``nova-api`` but it will result in
|
||||
new warnings in the logs until configured. See bug
|
||||
https://bugs.launchpad.net/nova/+bug/1679750 for more details.
|
Loading…
Reference in New Issue