Detach volume after deleting instance with no host
If an instance is booted from a volume, shelved, and goes into an error state due to some reason, the volume from which instance is booted remains even the instance is deleted because instance has no host associated with it. Called _local_delete() to detach volume and destroy bdm if instance is in shelved_offloaded state or has no host associated with it. This will cleanup both volumes and the networks. Note: Ankit had submitted same patch [1] earlier which was reverted [2] due to a race condition on jenkins if an instance is deleted when it is in building state. The patch was then rebumitted [3] fixing the the failure of race condition by reverting the ObjectActionError exception handling in _delete. This patch was later re-reverted [4] due to continued jenkins race conditions. The patch [5] intended to avoid the jenkins race condition by leaving the flow for instances in the BUILDING state unchanged and only calling _local_delete() on instances in the shelved_offloaded or error states when the instance has no host associated with it. It however also had to be reverted [6] because of yet another race condition. This version takes a more minimal approach of adding the ERROR state to the logic for doing a local delete plus cleanup of resources on a compute host. Comments have also been added to the existing code to explain more about the different flows. [1] Ic630ae7d026a9697afec46ac9ea40aea0f5b5ffb [2] Id4e405e7579530ed1c1f22ccc972d45b6d185f41 [3] Ic107d8edc7ee7a4ebb04eac58ef0cdbf506d6173 [4] Ibcbe35b5d329b183c4d0e8233e8ada26ebc512c2 [5] I928a397c75b857e94bf5c002e50ec43a2bed9848 [6] I6b9b886e0d6f2ec86141c048fb50969bccf5cb30 Co-Authored-By: Ankit Agrawal <ankit11.agrawal@nttdata.com> Co-Authored-By: Samuel Matzek <smatzek@us.ibm.com> Co-Authored-By: melanie witt <melwittt@gmail.com> Closes-Bug: 1404867 Closes-Bug: 1408527 Change-Id: I2192ef513a2cd15d21e9d5d5fe22c5a5fbae0941
This commit is contained in:
parent
9464c2ac1b
commit
5ce74fa06c
nova
@ -1588,9 +1588,12 @@ class API(base.Base):
|
||||
cb(context, instance, bdms, reservations=None)
|
||||
quotas.commit()
|
||||
return
|
||||
shelved_offloaded = (instance.vm_state
|
||||
== vm_states.SHELVED_OFFLOADED)
|
||||
if not instance.host and not shelved_offloaded:
|
||||
# NOTE(melwitt): We expect instances in certain states to have no
|
||||
# host set but also have resources on a compute host. If this is
|
||||
# the case, we want to try to clean up their resources. Otherwse,
|
||||
# we can destroy the instance here and return.
|
||||
expect_no_instance_host = self._expect_no_host(instance)
|
||||
if not instance.host and not expect_no_instance_host:
|
||||
try:
|
||||
compute_utils.notify_about_instance_usage(
|
||||
self.notifier, context, instance,
|
||||
@ -1602,7 +1605,17 @@ class API(base.Base):
|
||||
system_metadata=instance.system_metadata)
|
||||
quotas.commit()
|
||||
return
|
||||
except exception.ObjectActionError:
|
||||
except exception.ObjectActionError as ex:
|
||||
# The instance's host likely changed under us as
|
||||
# this instance could be building and has since been
|
||||
# scheduled. Continue with attempts to delete it.
|
||||
# NOTE(melwitt): Often instance.host is None after the
|
||||
# refresh because the claim was aborted on the compute
|
||||
# host. The service up check will raise ComputeHostNotFound
|
||||
# in this case and we will do a local delete with compute
|
||||
# host cleanup
|
||||
LOG.debug('Refreshing instance because: %s', ex,
|
||||
instance=instance)
|
||||
instance.refresh()
|
||||
|
||||
if instance.vm_state == vm_states.RESIZED:
|
||||
@ -1610,7 +1623,7 @@ class API(base.Base):
|
||||
|
||||
is_local_delete = True
|
||||
try:
|
||||
if not shelved_offloaded:
|
||||
if not expect_no_instance_host:
|
||||
service = objects.Service.get_by_compute_host(
|
||||
context.elevated(), instance.host)
|
||||
is_local_delete = not self.servicegroup_api.service_is_up(
|
||||
@ -1637,6 +1650,9 @@ class API(base.Base):
|
||||
cb(context, instance, bdms,
|
||||
reservations=quotas.reservations)
|
||||
except exception.ComputeHostNotFound:
|
||||
# NOTE(melwitt): We expect this if instance.host has been
|
||||
# set to None by the compute host during a claim abort
|
||||
# and we pick it up in the instance.refresh()
|
||||
pass
|
||||
|
||||
if is_local_delete:
|
||||
@ -1655,6 +1671,13 @@ class API(base.Base):
|
||||
if quotas:
|
||||
quotas.rollback()
|
||||
|
||||
def _expect_no_host(self, instance):
|
||||
# NOTE(melwitt): Instances in ERROR state have their host and node
|
||||
# set to None as part of exception handling.
|
||||
if instance.vm_state in (vm_states.SHELVED_OFFLOADED, vm_states.ERROR):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _confirm_resize_on_deleting(self, context, instance):
|
||||
# If in the middle of a resize, use confirm_resize to
|
||||
# ensure the original instance is cleaned up too
|
||||
@ -1748,6 +1771,22 @@ class API(base.Base):
|
||||
connector)
|
||||
self.volume_api.detach(elevated, bdm.volume_id,
|
||||
instance.uuid)
|
||||
except Exception as exc:
|
||||
err_str = _LW("Ignoring volume cleanup failure due to %s")
|
||||
LOG.warn(err_str % exc, instance=instance)
|
||||
# This block handles the following case:
|
||||
# 1. Instance scheduled to host and fails on the host.
|
||||
# 2. Compute manager's cleanup calls terminate_connection
|
||||
# and detach if the spawn made it that far.
|
||||
# 3. Instance fails to boot on all other reschedule attempts.
|
||||
# 4. Instance is left in error state with no assigned host.
|
||||
# 5. Volumes in the instance's BDMs are left in the available
|
||||
# state.
|
||||
# When this is the case the terminate_connection and detach
|
||||
# calls above will fail. We attempt the volume delete in a
|
||||
# separate try-except to clean up these volume and avoid the
|
||||
# storage leak.
|
||||
try:
|
||||
if bdm.delete_on_termination:
|
||||
self.volume_api.delete(context, bdm.volume_id)
|
||||
except Exception as exc:
|
||||
|
@ -1039,7 +1039,7 @@ class _ComputeAPIUnitTestMixIn(object):
|
||||
self._test_delete_resized_part(inst)
|
||||
if inst.vm_state == vm_states.SOFT_DELETED:
|
||||
soft_delete = True
|
||||
if inst.vm_state != vm_states.SHELVED_OFFLOADED:
|
||||
if not self.compute_api._expect_no_host(inst):
|
||||
self.context.elevated().AndReturn(self.context)
|
||||
objects.Service.get_by_compute_host(self.context,
|
||||
inst.host).AndReturn(objects.Service())
|
||||
@ -1048,8 +1048,7 @@ class _ComputeAPIUnitTestMixIn(object):
|
||||
inst.host != 'down-host')
|
||||
|
||||
if (inst.host == 'down-host' or
|
||||
inst.vm_state == vm_states.SHELVED_OFFLOADED):
|
||||
|
||||
self.compute_api._expect_no_host(inst)):
|
||||
self._test_downed_host_part(inst, updates, delete_time,
|
||||
delete_type)
|
||||
cast = False
|
||||
@ -3766,6 +3765,54 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
|
||||
self.assertRaises(exception.CannotResizeToSameFlavor,
|
||||
self._test_resize, same_flavor=True)
|
||||
|
||||
@mock.patch.object(objects.BlockDeviceMappingList, 'get_by_instance_uuid')
|
||||
@mock.patch('nova.context.RequestContext.elevated')
|
||||
@mock.patch.object(objects.Instance, 'save')
|
||||
@mock.patch.object(quota.QUOTAS, 'reserve')
|
||||
@mock.patch.object(compute_utils, 'notify_about_instance_usage')
|
||||
@mock.patch.object(objects.BlockDeviceMapping, 'destroy')
|
||||
@mock.patch.object(objects.Instance, 'destroy')
|
||||
def _test_delete_volume_backed_instance(
|
||||
self, vm_state, mock_instance_destroy, bdm_destroy,
|
||||
notify_about_instance_usage, mock_reserve,
|
||||
mock_save, mock_elevated, bdm_get_by_instance_uuid):
|
||||
volume_id = uuidutils.generate_uuid()
|
||||
bdms = [objects.BlockDeviceMapping(
|
||||
**fake_block_device.FakeDbBlockDeviceDict(
|
||||
{'id': 42, 'volume_id': volume_id,
|
||||
'source_type': 'volume', 'destination_type': 'volume',
|
||||
'delete_on_termination': False}))]
|
||||
reservations = ['fake-resv']
|
||||
|
||||
bdm_get_by_instance_uuid.return_value = bdms
|
||||
mock_reserve.return_value = reservations
|
||||
mock_elevated.return_value = self.context
|
||||
|
||||
params = {'host': None, 'vm_state': vm_state}
|
||||
inst = self._create_instance_obj(params=params)
|
||||
connector = {'ip': '127.0.0.1', 'initiator': 'iqn.fake'}
|
||||
|
||||
with mock.patch.object(self.compute_api.network_api,
|
||||
'deallocate_for_instance') as mock_deallocate, \
|
||||
mock.patch.object(self.compute_api.volume_api,
|
||||
'terminate_connection') as mock_terminate_conn, \
|
||||
mock.patch.object(self.compute_api.volume_api,
|
||||
'detach') as mock_detach:
|
||||
self.compute_api.delete(self.context, inst)
|
||||
|
||||
mock_deallocate.assert_called_once_with(self.context, inst)
|
||||
mock_detach.assert_called_once_with(self.context, volume_id,
|
||||
inst.uuid)
|
||||
mock_terminate_conn.assert_called_once_with(self.context,
|
||||
volume_id, connector)
|
||||
bdm_destroy.assert_called_once_with()
|
||||
|
||||
def test_delete_volume_backed_instance_in_error(self):
|
||||
self._test_delete_volume_backed_instance(vm_states.ERROR)
|
||||
|
||||
def test_delete_volume_backed_instance_in_shelved_offloaded(self):
|
||||
self._test_delete_volume_backed_instance(vm_states.SHELVED_OFFLOADED)
|
||||
|
||||
|
||||
class ComputeAPIAPICellUnitTestCase(_ComputeAPIUnitTestMixIn,
|
||||
test.NoDBTestCase):
|
||||
|
Loading…
x
Reference in New Issue
Block a user