diff --git a/nova/compute/api.py b/nova/compute/api.py index 494dd51229bc..1db7e0da733c 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -1780,12 +1780,12 @@ class API(base.Base): return cell = None - # If there is an instance.host (or the instance is shelved-offloaded), - # the instance has been scheduled and sent to a cell/compute which - # means it was pulled from the cell db. + # If there is an instance.host (or the instance is shelved-offloaded or + # in error state), the instance has been scheduled and sent to a + # cell/compute which means it was pulled from the cell db. # Normal delete should be attempted. - if not (instance.host or - instance.vm_state == vm_states.SHELVED_OFFLOADED): + may_have_ports_or_volumes = self._may_have_ports_or_volumes(instance) + if not instance.host and not may_have_ports_or_volumes: try: if self._delete_while_booting(context, instance): return @@ -1864,9 +1864,7 @@ class API(base.Base): # which will cause a cast to the child cell. cb(context, instance, bdms) return - shelved_offloaded = (instance.vm_state - == vm_states.SHELVED_OFFLOADED) - if not instance.host and not shelved_offloaded: + if not instance.host and not may_have_ports_or_volumes: try: with compute_utils.notify_about_instance_delete( self.notifier, context, instance, @@ -1879,7 +1877,12 @@ class API(base.Base): {'state': instance.vm_state}, instance=instance) return - except exception.ObjectActionError: + except exception.ObjectActionError as ex: + # The instance's host likely changed under us as + # this instance could be building and has since been + # scheduled. Continue with attempts to delete it. + LOG.debug('Refreshing instance because: %s', ex, + instance=instance) instance.refresh() if instance.vm_state == vm_states.RESIZED: @@ -1887,7 +1890,8 @@ class API(base.Base): is_local_delete = True try: - if not shelved_offloaded: + # instance.host must be set in order to look up the service. + if instance.host is not None: service = objects.Service.get_by_compute_host( context.elevated(), instance.host) is_local_delete = not self.servicegroup_api.service_is_up( @@ -1904,7 +1908,9 @@ class API(base.Base): cb(context, instance, bdms) except exception.ComputeHostNotFound: - pass + LOG.debug('Compute host %s not found during service up check, ' + 'going to local delete instance', instance.host, + instance=instance) if is_local_delete: # If instance is in shelved_offloaded state or compute node @@ -1931,6 +1937,16 @@ class API(base.Base): # NOTE(comstud): Race condition. Instance already gone. pass + def _may_have_ports_or_volumes(self, instance): + # NOTE(melwitt): When an instance build fails in the compute manager, + # the instance host and node are set to None and the vm_state is set + # to ERROR. In the case, the instance with host = None has actually + # been scheduled and may have ports and/or volumes allocated on the + # compute node. + if instance.vm_state in (vm_states.SHELVED_OFFLOADED, vm_states.ERROR): + return True + return False + def _confirm_resize_on_deleting(self, context, instance): # If in the middle of a resize, use confirm_resize to # ensure the original instance is cleaned up too @@ -1986,6 +2002,14 @@ class API(base.Base): 'the instance host %(instance_host)s.', {'connector_host': connector.get('host'), 'instance_host': instance.host}, instance=instance) + if (instance.host is None and + self._may_have_ports_or_volumes(instance)): + LOG.debug('Allowing use of stashed volume connector with ' + 'instance host None because instance with ' + 'vm_state %(vm_state)s has been scheduled in ' + 'the past.', {'vm_state': instance.vm_state}, + instance=instance) + return connector def _local_cleanup_bdm_volumes(self, bdms, instance, context): """The method deletes the bdm records and, if a bdm is a volume, call diff --git a/nova/tests/functional/regressions/test_bug_1404867.py b/nova/tests/functional/regressions/test_bug_1404867.py index 73d3a125692c..5fc6466e0c17 100644 --- a/nova/tests/functional/regressions/test_bug_1404867.py +++ b/nova/tests/functional/regressions/test_bug_1404867.py @@ -80,12 +80,7 @@ class DeleteWithReservedVolumes(integrated_helpers._IntegratedTestBase, # The volume should no longer be reserved as the deletion of the # server should have released all the resources. - # TODO(mnaser): Uncomment this in patch resolving the issue - # self.assertNotIn(volume_id, self.cinder.reserved_volumes) - - # The volume is still reserved at this point, which it should not be. - # TODO(mnaser): Remove this in patch resolving the issue - self.assertIn(volume_id, self.cinder.reserved_volumes) + self.assertNotIn(volume_id, self.cinder.reserved_volumes) @mock.patch('nova.objects.service.get_minimum_version_all_cells', return_value=compute_api.CINDER_V3_ATTACH_MIN_COMPUTE_VERSION) @@ -109,9 +104,4 @@ class DeleteWithReservedVolumes(integrated_helpers._IntegratedTestBase, # The volume should no longer have any attachments as instance delete # should have removed them. - # TODO(mnaser): Uncomment this in patch resolving the issue - # self.assertNotIn(volume_id, self.cinder.attachments[server_id]) - - # The volume still has attachments, which it should not have. - # TODO(mnaser): Remove this in patch resolving the issue - self.assertIn(volume_id, self.cinder.attachments[server_id]) + self.assertNotIn(volume_id, self.cinder.attachments[server_id]) diff --git a/nova/tests/functional/regressions/test_bug_1670627.py b/nova/tests/functional/regressions/test_bug_1670627.py index 4c01c01ac324..de970628c74d 100644 --- a/nova/tests/functional/regressions/test_bug_1670627.py +++ b/nova/tests/functional/regressions/test_bug_1670627.py @@ -59,6 +59,7 @@ class TestDeleteFromCell0CheckQuota(test.TestCase): self.start_service('conductor') self.start_service('scheduler') + self.start_service('consoleauth') # We don't actually start a compute service; this way we don't have any # compute hosts to schedule the instance to and will go into error and diff --git a/nova/tests/functional/regressions/test_bug_1689692.py b/nova/tests/functional/regressions/test_bug_1689692.py index 1b6cf48d835e..5c9d137ae2c0 100644 --- a/nova/tests/functional/regressions/test_bug_1689692.py +++ b/nova/tests/functional/regressions/test_bug_1689692.py @@ -53,6 +53,7 @@ class ServerListLimitMarkerCell0Test(test.TestCase, self.start_service('conductor') self.start_service('scheduler') + self.start_service('consoleauth') # We don't start the compute service because we want NoValidHost so # all of the instances go into ERROR state and get put into cell0. self.useFixture(cast_as_call.CastAsCall(self)) diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py index cc9b96b10e71..aff32ec674db 100644 --- a/nova/tests/unit/compute/test_compute_api.py +++ b/nova/tests/unit/compute/test_compute_api.py @@ -1115,7 +1115,7 @@ class _ComputeAPIUnitTestMixIn(object): if self.cell_type != 'api': if inst.vm_state == vm_states.RESIZED: self._test_delete_resized_part(inst) - if inst.vm_state != vm_states.SHELVED_OFFLOADED: + if inst.host is not None: self.context.elevated().AndReturn(self.context) objects.Service.get_by_compute_host(self.context, inst.host).AndReturn(objects.Service()) @@ -1123,9 +1123,7 @@ class _ComputeAPIUnitTestMixIn(object): mox.IsA(objects.Service)).AndReturn( inst.host != 'down-host') - if (inst.host == 'down-host' or - inst.vm_state == vm_states.SHELVED_OFFLOADED): - + if inst.host == 'down-host' or inst.host is None: self._test_downed_host_part(inst, updates, delete_time, delete_type) cast = False @@ -1215,6 +1213,76 @@ class _ComputeAPIUnitTestMixIn(object): system_metadata=fake_sys_meta) self._test_delete('force_delete', vm_state=vm_state) + @mock.patch('nova.compute.api.API._delete_while_booting', + return_value=False) + @mock.patch('nova.compute.api.API._lookup_instance') + @mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid') + @mock.patch('nova.objects.Instance.save') + @mock.patch('nova.compute.utils.notify_about_instance_usage') + @mock.patch('nova.objects.Service.get_by_compute_host') + @mock.patch('nova.compute.api.API._local_delete') + def test_delete_error_state_with_no_host( + self, mock_local_delete, mock_service_get, _mock_notify, + _mock_save, mock_bdm_get, mock_lookup, _mock_del_booting): + # Instance in error state with no host should be a local delete + # for non API cells + inst = self._create_instance_obj(params=dict(vm_state=vm_states.ERROR, + host=None)) + mock_lookup.return_value = None, inst + with mock.patch.object(self.compute_api.compute_rpcapi, + 'terminate_instance') as mock_terminate: + self.compute_api.delete(self.context, inst) + if self.cell_type == 'api': + mock_terminate.assert_called_once_with( + self.context, inst, mock_bdm_get.return_value, + delete_type='delete') + mock_local_delete.assert_not_called() + else: + mock_local_delete.assert_called_once_with( + self.context, inst, mock_bdm_get.return_value, + 'delete', self.compute_api._do_delete) + mock_terminate.assert_not_called() + mock_service_get.assert_not_called() + + @mock.patch('nova.compute.api.API._delete_while_booting', + return_value=False) + @mock.patch('nova.compute.api.API._lookup_instance') + @mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid') + @mock.patch('nova.objects.Instance.save') + @mock.patch('nova.compute.utils.notify_about_instance_usage') + @mock.patch('nova.objects.Service.get_by_compute_host') + @mock.patch('nova.context.RequestContext.elevated') + @mock.patch('nova.servicegroup.api.API.service_is_up', return_value=True) + @mock.patch('nova.compute.api.API._record_action_start') + @mock.patch('nova.compute.api.API._local_delete') + def test_delete_error_state_with_host_set( + self, mock_local_delete, _mock_record, mock_service_up, + mock_elevated, mock_service_get, _mock_notify, _mock_save, + mock_bdm_get, mock_lookup, _mock_del_booting): + # Instance in error state with host set should be a non-local delete + # for non API cells if the service is up + inst = self._create_instance_obj(params=dict(vm_state=vm_states.ERROR, + host='fake-host')) + mock_lookup.return_value = inst + with mock.patch.object(self.compute_api.compute_rpcapi, + 'terminate_instance') as mock_terminate: + self.compute_api.delete(self.context, inst) + if self.cell_type == 'api': + mock_terminate.assert_called_once_with( + self.context, inst, mock_bdm_get.return_value, + delete_type='delete') + mock_local_delete.assert_not_called() + mock_service_get.assert_not_called() + else: + mock_service_get.assert_called_once_with( + mock_elevated.return_value, 'fake-host') + mock_service_up.assert_called_once_with( + mock_service_get.return_value) + mock_terminate.assert_called_once_with( + self.context, inst, mock_bdm_get.return_value, + delete_type='delete') + mock_local_delete.assert_not_called() + def test_delete_forced_when_task_state_is_not_none(self): for vm_state in self._get_vm_states(): self._test_delete('force_delete', vm_state=vm_state, @@ -1425,6 +1493,52 @@ class _ComputeAPIUnitTestMixIn(object): self.assertIsNone( self.compute_api._get_stashed_volume_connector(bdm, inst)) + @mock.patch.object(objects.BlockDeviceMapping, 'destroy') + def test_local_cleanup_bdm_volumes_stashed_connector_host_none( + self, mock_destroy): + """Tests that we call volume_api.terminate_connection when we found + a stashed connector in the bdm.connection_info dict. + + This tests the case where: + + 1) the instance host is None + 2) the instance vm_state is one where we expect host to be None + + We allow a mismatch of the host in this situation if the instance is + in a state where we expect its host to have been set to None, such + as ERROR or SHELVED_OFFLOADED. + """ + params = dict(host=None, vm_state=vm_states.ERROR) + inst = self._create_instance_obj(params=params) + conn_info = {'connector': {'host': 'orig-host'}} + vol_bdm = objects.BlockDeviceMapping(self.context, id=1, + instance_uuid=inst.uuid, + volume_id=uuids.volume_id, + source_type='volume', + destination_type='volume', + delete_on_termination=True, + connection_info=jsonutils.dumps( + conn_info), + attachment_id=None) + bdms = objects.BlockDeviceMappingList(objects=[vol_bdm]) + + @mock.patch.object(self.compute_api.volume_api, 'terminate_connection') + @mock.patch.object(self.compute_api.volume_api, 'detach') + @mock.patch.object(self.compute_api.volume_api, 'delete') + @mock.patch.object(self.context, 'elevated', return_value=self.context) + def do_test(self, mock_elevated, mock_delete, + mock_detach, mock_terminate): + self.compute_api._local_cleanup_bdm_volumes( + bdms, inst, self.context) + mock_terminate.assert_called_once_with( + self.context, uuids.volume_id, conn_info['connector']) + mock_detach.assert_called_once_with( + self.context, uuids.volume_id, inst.uuid) + mock_delete.assert_called_once_with(self.context, uuids.volume_id) + mock_destroy.assert_called_once_with() + + do_test(self) + def test_local_delete_without_info_cache(self): inst = self._create_instance_obj() @@ -5773,6 +5887,57 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase): fields=['device_id']) self.assertEqual([], instances.objects) + @mock.patch('nova.compute.api.API._delete_while_booting', + return_value=False) + @mock.patch('nova.compute.api.API._lookup_instance') + @mock.patch.object(objects.BlockDeviceMappingList, 'get_by_instance_uuid') + @mock.patch('nova.context.RequestContext.elevated') + @mock.patch.object(objects.Instance, 'save') + @mock.patch.object(compute_utils, 'notify_about_instance_usage') + @mock.patch.object(objects.BlockDeviceMapping, 'destroy') + @mock.patch.object(objects.Instance, 'destroy') + def _test_delete_volume_backed_instance( + self, vm_state, mock_instance_destroy, bdm_destroy, + notify_about_instance_usage, mock_save, mock_elevated, + bdm_get_by_instance_uuid, mock_lookup, _mock_del_booting): + volume_id = uuidutils.generate_uuid() + conn_info = {'connector': {'host': 'orig-host'}} + bdms = [objects.BlockDeviceMapping( + **fake_block_device.FakeDbBlockDeviceDict( + {'id': 42, 'volume_id': volume_id, + 'source_type': 'volume', 'destination_type': 'volume', + 'delete_on_termination': False, + 'connection_info': jsonutils.dumps(conn_info)}))] + + bdm_get_by_instance_uuid.return_value = bdms + mock_elevated.return_value = self.context + + params = {'host': None, 'vm_state': vm_state} + inst = self._create_instance_obj(params=params) + mock_lookup.return_value = None, inst + connector = conn_info['connector'] + + with mock.patch.object(self.compute_api.network_api, + 'deallocate_for_instance') as mock_deallocate, \ + mock.patch.object(self.compute_api.volume_api, + 'terminate_connection') as mock_terminate_conn, \ + mock.patch.object(self.compute_api.volume_api, + 'detach') as mock_detach: + self.compute_api.delete(self.context, inst) + + mock_deallocate.assert_called_once_with(self.context, inst) + mock_detach.assert_called_once_with(self.context, volume_id, + inst.uuid) + mock_terminate_conn.assert_called_once_with(self.context, + volume_id, connector) + bdm_destroy.assert_called_once_with() + + def test_delete_volume_backed_instance_in_error(self): + self._test_delete_volume_backed_instance(vm_states.ERROR) + + def test_delete_volume_backed_instance_in_shelved_offloaded(self): + self._test_delete_volume_backed_instance(vm_states.SHELVED_OFFLOADED) + class Cellsv1DeprecatedTestMixIn(object): @mock.patch.object(objects.BuildRequestList, 'get_by_filters')