Merge "Consumer gen support for delete instance allocations"

This commit is contained in:
Zuul 2018-09-25 16:43:07 +00:00 committed by Gerrit Code Review
commit ebab3adb28
9 changed files with 184 additions and 44 deletions

View File

@ -58,12 +58,14 @@ changed or be partially complete at this time.
* `filter allocation candidates by aggregate membership`_
* `perform granular allocation candidate requests`_
* `inventory and allocation data migration`_ (reshaping provider trees)
* `handle allocation updates in a safe way`_
.. _Nested Resource Providers: http://specs.openstack.org/openstack/nova-specs/specs/queens/approved/nested-resource-providers.html
.. _Request Traits During Scheduling: https://specs.openstack.org/openstack/nova-specs/specs/queens/approved/request-traits-in-nova.html
.. _filter allocation candidates by aggregate membership: https://specs.openstack.org/openstack/nova-specs/specs/rocky/approved/alloc-candidates-member-of.html
.. _perform granular allocation candidate requests: http://specs.openstack.org/openstack/nova-specs/specs/rocky/approved/granular-resource-requests.html
.. _inventory and allocation data migration: http://specs.openstack.org/openstack/nova-specs/specs/rocky/approved/reshape-provider-tree.html
.. _handle allocation updates in a safe way: https://specs.openstack.org/openstack/nova-specs/specs/rocky/approved/add-consumer-generation.html
Deployment
==========

View File

@ -865,7 +865,8 @@ class ServersController(wsgi.Controller):
raise exc.HTTPNotFound(explanation=msg)
except exception.InstanceUnknownCell as e:
raise exc.HTTPNotFound(explanation=e.format_message())
except exception.InstanceIsLocked as e:
except (exception.InstanceIsLocked,
exception.AllocationDeleteFailed) as e:
raise exc.HTTPConflict(explanation=e.format_message())
except exception.InstanceInvalidState as state_error:
common.raise_http_conflict_for_instance_invalid_state(state_error,

View File

@ -749,19 +749,18 @@ class ComputeManager(manager.Manager):
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
self._complete_deletion(context,
instance,
bdms)
instance)
self._notify_about_instance_usage(context, instance, "delete.end")
compute_utils.notify_about_instance_action(context, instance,
self.host, action=fields.NotificationAction.DELETE,
phase=fields.NotificationPhase.END, bdms=bdms)
def _complete_deletion(self, context, instance, bdms):
def _complete_deletion(self, context, instance):
self._update_resource_tracker(context, instance)
rt = self._get_resource_tracker()
rt.reportclient.delete_allocation_for_instance(context, instance.uuid)
self._notify_about_instance_usage(context, instance, "delete.end")
compute_utils.notify_about_instance_action(context, instance,
self.host, action=fields.NotificationAction.DELETE,
phase=fields.NotificationPhase.END, bdms=bdms)
self._clean_instance_console_tokens(context, instance)
self._delete_scheduler_instance_info(context, instance.uuid)
@ -2624,11 +2623,17 @@ class ComputeManager(manager.Manager):
instance.power_state = power_state.NOSTATE
instance.terminated_at = timeutils.utcnow()
instance.save()
self._complete_deletion(context, instance)
# only destroy the instance in the db if the _complete_deletion
# doesn't raise and therefore allocation is successfully
# deleted in placement
instance.destroy()
self._complete_deletion(context,
instance,
bdms)
self._notify_about_instance_usage(context, instance, "delete.end")
compute_utils.notify_about_instance_action(context, instance,
self.host, action=fields.NotificationAction.DELETE,
phase=fields.NotificationPhase.END, bdms=bdms)
@wrap_exception()
@reverts_task_state

View File

@ -2322,6 +2322,11 @@ class AllocationUpdateFailed(NovaException):
'Error: %(error)s')
class AllocationDeleteFailed(NovaException):
msg_fmt = _('Failed to delete allocations for consumer %(consumer_uuid)s. '
'Error: %(error)s')
class TooManyComputesForHost(NovaException):
msg_fmt = _('Unexpected number of compute node records '
'(%(num_computes)d) found for host %(host)s. There should '

View File

@ -2047,21 +2047,63 @@ class SchedulerReportClient(object):
@safe_connect
def delete_allocation_for_instance(self, context, uuid):
"""Delete the instance allocation from placement
:param context: The security context
:param uuid: the instance UUID which will be used as the consumer UUID
towards placement
:return: Returns True if the allocation is successfully deleted by this
call. Returns False if the allocation does not exist.
:raises AllocationDeleteFailed: If the allocation cannot be read from
placement or it is changed by another process while we tried to
delete it.
"""
url = '/allocations/%s' % uuid
r = self.delete(url, global_request_id=context.global_id)
if r:
# We read the consumer generation then try to put an empty allocation
# for that consumer. If between the GET and the PUT the consumer
# generation changes then we raise AllocationDeleteFailed.
# NOTE(gibi): This only detect a small portion of possible cases when
# allocation is modified outside of the delete code path. The rest can
# only be detected if nova would cache at least the consumer generation
# of the instance.
# NOTE(gibi): placement does not return 404 for non-existing consumer
# but returns an empty consumer instead. Putting an empty allocation to
# that non-existing consumer won't be 404 or other error either.
r = self.get(url, global_request_id=context.global_id,
version=CONSUMER_GENERATION_VERSION)
if not r:
# at the moment there is no way placement returns a failure so we
# could even delete this code
LOG.warning('Unable to delete allocation for instance '
'%(uuid)s: (%(code)i %(text)s)',
{'uuid': uuid,
'code': r.status_code,
'text': r.text})
raise exception.AllocationDeleteFailed(consumer_uuid=uuid,
error=r.text)
allocations = r.json()
if allocations['allocations'] == {}:
# the consumer did not exist in the first place
LOG.debug('Cannot delete allocation for %s consumer in placement '
'as consumer does not exists', uuid)
return False
# removing all resources from the allocation will auto delete the
# consumer in placement
allocations['allocations'] = {}
r = self.put(url, allocations, global_request_id=context.global_id,
version=CONSUMER_GENERATION_VERSION)
if r.status_code == 204:
LOG.info('Deleted allocation for instance %s', uuid)
return True
else:
# Check for 404 since we don't need to log a warning if we tried to
# delete something which doesn't actually exist.
if r.status_code != 404:
LOG.warning('Unable to delete allocation for instance '
'%(uuid)s: (%(code)i %(text)s)',
{'uuid': uuid,
'code': r.status_code,
'text': r.text})
return False
LOG.warning('Unable to delete allocation for instance '
'%(uuid)s: (%(code)i %(text)s)',
{'uuid': uuid,
'code': r.status_code,
'text': r.text})
raise exception.AllocationDeleteFailed(consumer_uuid=uuid,
error=r.text)
def get_allocations_for_resource_provider(self, context, rp_uuid):
"""Retrieves the allocations for a specific provider.

View File

@ -20,6 +20,7 @@ import zlib
import mock
from oslo_log import log as logging
from oslo_serialization import base64
from oslo_serialization import jsonutils
from oslo_utils.fixture import uuidsentinel as uuids
from oslo_utils import timeutils
import six
@ -40,6 +41,7 @@ from nova.tests.functional import integrated_helpers
from nova.tests.unit.api.openstack import fakes
from nova.tests.unit import fake_block_device
from nova.tests.unit import fake_notifier
from nova.tests.unit import fake_requests
import nova.tests.unit.image.fake
from nova.virt import fake
from nova import volume
@ -4529,3 +4531,87 @@ class ServerTestV256RescheduleTestCase(ServerTestV256Common):
found_server = self.api.get_server(server['id'])
# Check that rescheduling is not occurred.
self.assertNotEqual(other_host, found_server['OS-EXT-SRV-ATTR:host'])
class ConsumerGenerationConflictTest(
integrated_helpers.ProviderUsageBaseTestCase):
# we need the medium driver to be able to allocate resource not just for
# a single instance
compute_driver = 'fake.MediumFakeDriver'
def setUp(self):
super(ConsumerGenerationConflictTest, self).setUp()
flavors = self.api.get_flavors()
self.flavor = flavors[0]
self.other_flavor = flavors[1]
self.compute1 = self._start_compute('compute1')
self.compute2 = self._start_compute('compute2')
def test_server_delete_fails_due_to_conflict(self):
source_hostname = self.compute1.host
server = self._boot_and_check_allocations(self.flavor, source_hostname)
rsp = fake_requests.FakeResponse(
409, jsonutils.dumps({'text': 'consumer generation conflict'}))
with mock.patch('keystoneauth1.adapter.Adapter.put',
autospec=True) as mock_put:
mock_put.return_value = rsp
self.api.delete_server(server['id'])
server = self._wait_for_state_change(self.admin_api, server,
'ERROR')
self.assertEqual(1, mock_put.call_count)
# We still have the allocations as deletion failed
source_rp_uuid = self._get_provider_uuid_by_host(source_hostname)
source_usages = self._get_provider_usages(source_rp_uuid)
self.assertFlavorMatchesAllocation(self.flavor, source_usages)
allocations = self._get_allocations_by_server_uuid(server['id'])
self.assertEqual(1, len(allocations))
source_allocation = allocations[source_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor, source_allocation)
# retry the delete to make sure that allocations are removed this time
self._delete_and_check_allocations(server)
def test_server_local_delete_fails_due_to_conflict(self):
source_hostname = self.compute1.host
server = self._boot_and_check_allocations(self.flavor, source_hostname)
source_compute_id = self.admin_api.get_services(
host=self.compute1.host, binary='nova-compute')[0]['id']
self.compute1.stop()
self.admin_api.put_service(
source_compute_id, {'forced_down': 'true'})
rsp = fake_requests.FakeResponse(
409, jsonutils.dumps({'text': 'consumer generation conflict'}))
with mock.patch('keystoneauth1.adapter.Adapter.put',
autospec=True) as mock_put:
mock_put.return_value = rsp
ex = self.assertRaises(client.OpenStackApiException,
self.api.delete_server, server['id'])
self.assertEqual(409, ex.response.status_code)
self.assertIn('Failed to delete allocations for consumer',
jsonutils.loads(ex.response.content)[
'conflictingRequest']['message'])
self.assertEqual(1, mock_put.call_count)
# We still have the allocations as deletion failed
source_rp_uuid = self._get_provider_uuid_by_host(source_hostname)
source_usages = self._get_provider_usages(source_rp_uuid)
self.assertFlavorMatchesAllocation(self.flavor, source_usages)
allocations = self._get_allocations_by_server_uuid(server['id'])
self.assertEqual(1, len(allocations))
source_allocation = allocations[source_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor, source_allocation)
# retry the delete to make sure that allocations are removed this time
self._delete_and_check_allocations(server)

View File

@ -7646,6 +7646,12 @@ class ComputeTestCase(BaseTestCase,
self.stub_out('nova.objects.quotas.Quotas.reserve',
lambda *a, **k: None)
self.stub_out('nova.compute.utils.notify_about_instance_usage',
lambda *a, **k: None)
self.stub_out('nova.compute.utils.notify_about_instance_action',
lambda *a, **k: None)
self.compute._complete_partial_deletion(admin_context, instance)
self.assertNotEqual(0, instance.deleted)

View File

@ -232,14 +232,22 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
methods_called = [n for n, a, k in call_tracker.mock_calls]
self.assertEqual(['clear_events_for_instance',
'_notify_about_instance_usage',
'_shutdown_instance'],
'_shutdown_instance',
'_notify_about_instance_usage'],
methods_called)
mock_notify.assert_called_once_with(self.context,
mock_inst,
specd_compute.host,
action='delete',
phase='start',
bdms=mock_bdms)
mock_notify.assert_has_calls([
mock.call(self.context,
mock_inst,
specd_compute.host,
action='delete',
phase='start',
bdms=mock_bdms),
mock.call(self.context,
mock_inst,
specd_compute.host,
action='delete',
phase='end',
bdms=mock_bdms)])
def _make_compute_node(self, hyp_hostname, cn_id):
cn = mock.Mock(spec_set=['hypervisor_hostname', 'id',

View File

@ -3222,21 +3222,6 @@ class TestAllocations(SchedulerReportClientTestCase):
}
self.assertEqual(expected, result)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete')
@mock.patch('nova.scheduler.client.report.LOG')
def test_delete_allocation_for_instance_ignore_404(self, mock_log,
mock_delete):
"""Tests that we don't log a warning on a 404 response when trying to
delete an allocation record.
"""
mock_delete.return_value = fake_requests.FakeResponse(404)
self.client.delete_allocation_for_instance(self.context, uuids.rp_uuid)
# make sure we didn't screw up the logic or the mock
mock_log.info.assert_not_called()
# make sure warning wasn't called for the 404
mock_log.warning.assert_not_called()
@mock.patch("nova.scheduler.client.report.SchedulerReportClient."
"delete")
@mock.patch("nova.scheduler.client.report.SchedulerReportClient."