Revert "Detach volume after deleting instance with no host"

This reverts commit 5ce74fa06c.

We think this is causing a race and the postgres job to fail
since it's erroneously doing local deletes and not cleaning
up from the computes. We're not totally sure why it would
only impact the postgres job though, but the original change
failed the job and was rechecked, and the time it was merged
coincides with when the postgres job started spiking with
failures.

Related-Bug: #1600005
Closes-Bug: #1600031

Change-Id: I0ed184b579b8a8d861e4d2a7c317bf0bc0623d50
This commit is contained in:
Matt Riedemann 2016-07-11 17:14:16 +00:00
parent 5ce74fa06c
commit c1a83a3fb8
2 changed files with 8 additions and 94 deletions

View File

@ -1588,12 +1588,9 @@ class API(base.Base):
cb(context, instance, bdms, reservations=None)
quotas.commit()
return
# NOTE(melwitt): We expect instances in certain states to have no
# host set but also have resources on a compute host. If this is
# the case, we want to try to clean up their resources. Otherwse,
# we can destroy the instance here and return.
expect_no_instance_host = self._expect_no_host(instance)
if not instance.host and not expect_no_instance_host:
shelved_offloaded = (instance.vm_state
== vm_states.SHELVED_OFFLOADED)
if not instance.host and not shelved_offloaded:
try:
compute_utils.notify_about_instance_usage(
self.notifier, context, instance,
@ -1605,17 +1602,7 @@ class API(base.Base):
system_metadata=instance.system_metadata)
quotas.commit()
return
except exception.ObjectActionError as ex:
# The instance's host likely changed under us as
# this instance could be building and has since been
# scheduled. Continue with attempts to delete it.
# NOTE(melwitt): Often instance.host is None after the
# refresh because the claim was aborted on the compute
# host. The service up check will raise ComputeHostNotFound
# in this case and we will do a local delete with compute
# host cleanup
LOG.debug('Refreshing instance because: %s', ex,
instance=instance)
except exception.ObjectActionError:
instance.refresh()
if instance.vm_state == vm_states.RESIZED:
@ -1623,7 +1610,7 @@ class API(base.Base):
is_local_delete = True
try:
if not expect_no_instance_host:
if not shelved_offloaded:
service = objects.Service.get_by_compute_host(
context.elevated(), instance.host)
is_local_delete = not self.servicegroup_api.service_is_up(
@ -1650,9 +1637,6 @@ class API(base.Base):
cb(context, instance, bdms,
reservations=quotas.reservations)
except exception.ComputeHostNotFound:
# NOTE(melwitt): We expect this if instance.host has been
# set to None by the compute host during a claim abort
# and we pick it up in the instance.refresh()
pass
if is_local_delete:
@ -1671,13 +1655,6 @@ class API(base.Base):
if quotas:
quotas.rollback()
def _expect_no_host(self, instance):
# NOTE(melwitt): Instances in ERROR state have their host and node
# set to None as part of exception handling.
if instance.vm_state in (vm_states.SHELVED_OFFLOADED, vm_states.ERROR):
return True
return False
def _confirm_resize_on_deleting(self, context, instance):
# If in the middle of a resize, use confirm_resize to
# ensure the original instance is cleaned up too
@ -1771,22 +1748,6 @@ class API(base.Base):
connector)
self.volume_api.detach(elevated, bdm.volume_id,
instance.uuid)
except Exception as exc:
err_str = _LW("Ignoring volume cleanup failure due to %s")
LOG.warn(err_str % exc, instance=instance)
# This block handles the following case:
# 1. Instance scheduled to host and fails on the host.
# 2. Compute manager's cleanup calls terminate_connection
# and detach if the spawn made it that far.
# 3. Instance fails to boot on all other reschedule attempts.
# 4. Instance is left in error state with no assigned host.
# 5. Volumes in the instance's BDMs are left in the available
# state.
# When this is the case the terminate_connection and detach
# calls above will fail. We attempt the volume delete in a
# separate try-except to clean up these volume and avoid the
# storage leak.
try:
if bdm.delete_on_termination:
self.volume_api.delete(context, bdm.volume_id)
except Exception as exc:

View File

@ -1039,7 +1039,7 @@ class _ComputeAPIUnitTestMixIn(object):
self._test_delete_resized_part(inst)
if inst.vm_state == vm_states.SOFT_DELETED:
soft_delete = True
if not self.compute_api._expect_no_host(inst):
if inst.vm_state != vm_states.SHELVED_OFFLOADED:
self.context.elevated().AndReturn(self.context)
objects.Service.get_by_compute_host(self.context,
inst.host).AndReturn(objects.Service())
@ -1048,7 +1048,8 @@ class _ComputeAPIUnitTestMixIn(object):
inst.host != 'down-host')
if (inst.host == 'down-host' or
self.compute_api._expect_no_host(inst)):
inst.vm_state == vm_states.SHELVED_OFFLOADED):
self._test_downed_host_part(inst, updates, delete_time,
delete_type)
cast = False
@ -3765,54 +3766,6 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
self.assertRaises(exception.CannotResizeToSameFlavor,
self._test_resize, same_flavor=True)
@mock.patch.object(objects.BlockDeviceMappingList, 'get_by_instance_uuid')
@mock.patch('nova.context.RequestContext.elevated')
@mock.patch.object(objects.Instance, 'save')
@mock.patch.object(quota.QUOTAS, 'reserve')
@mock.patch.object(compute_utils, 'notify_about_instance_usage')
@mock.patch.object(objects.BlockDeviceMapping, 'destroy')
@mock.patch.object(objects.Instance, 'destroy')
def _test_delete_volume_backed_instance(
self, vm_state, mock_instance_destroy, bdm_destroy,
notify_about_instance_usage, mock_reserve,
mock_save, mock_elevated, bdm_get_by_instance_uuid):
volume_id = uuidutils.generate_uuid()
bdms = [objects.BlockDeviceMapping(
**fake_block_device.FakeDbBlockDeviceDict(
{'id': 42, 'volume_id': volume_id,
'source_type': 'volume', 'destination_type': 'volume',
'delete_on_termination': False}))]
reservations = ['fake-resv']
bdm_get_by_instance_uuid.return_value = bdms
mock_reserve.return_value = reservations
mock_elevated.return_value = self.context
params = {'host': None, 'vm_state': vm_state}
inst = self._create_instance_obj(params=params)
connector = {'ip': '127.0.0.1', 'initiator': 'iqn.fake'}
with mock.patch.object(self.compute_api.network_api,
'deallocate_for_instance') as mock_deallocate, \
mock.patch.object(self.compute_api.volume_api,
'terminate_connection') as mock_terminate_conn, \
mock.patch.object(self.compute_api.volume_api,
'detach') as mock_detach:
self.compute_api.delete(self.context, inst)
mock_deallocate.assert_called_once_with(self.context, inst)
mock_detach.assert_called_once_with(self.context, volume_id,
inst.uuid)
mock_terminate_conn.assert_called_once_with(self.context,
volume_id, connector)
bdm_destroy.assert_called_once_with()
def test_delete_volume_backed_instance_in_error(self):
self._test_delete_volume_backed_instance(vm_states.ERROR)
def test_delete_volume_backed_instance_in_shelved_offloaded(self):
self._test_delete_volume_backed_instance(vm_states.SHELVED_OFFLOADED)
class ComputeAPIAPICellUnitTestCase(_ComputeAPIUnitTestMixIn,
test.NoDBTestCase):