Detach volume after deleting instance with no host
If an instance is booted from a volume, shelved, and goes into an error state due to some reason, the volume from which instance is booted remains even the instance is deleted because instance has no host associated with it. Called _local_delete() to detach volume and destroy bdm if instance is in shelved_offloaded state or has no host associated with it. This will cleanup both volumes and the networks. Note: Ankit had submitted same patch [1] earlier which was reverted [2] due to a race condition on jenkins if an instance is deleted when it is in building state. The patch was then rebumitted [3] fixing the the failure of race condition by reverting the ObjectActionError exception handling in _delete. This patch was later re-reverted [4] due to continued jenkins race conditions. The current patch avoids the jenkins race condition by leaving the flow for instances in the BUILDING state unchanged and only calling _local_delete() on instances in the shelved_offloaded or error states when the instance has no host associated with it. This addresses the concerns of the referenced bugs. [1] Ic630ae7d026a9697afec46ac9ea40aea0f5b5ffb [2] Id4e405e7579530ed1c1f22ccc972d45b6d185f41 [3] Ic107d8edc7ee7a4ebb04eac58ef0cdbf506d6173 [4] Ibcbe35b5d329b183c4d0e8233e8ada26ebc512c2 Co-Authored-By: Ankit Agrawal <ankit11.agrawal@nttdata.com> Closes-Bug: 1404867 Closes-Bug: 1408527 Change-Id: I928a397c75b857e94bf5c002e50ec43a2bed9848
This commit is contained in:
parent
89a2f42837
commit
b7f8333765
|
@ -1581,9 +1581,8 @@ class API(base.Base):
|
|||
cb(context, instance, bdms, reservations=None)
|
||||
quotas.commit()
|
||||
return
|
||||
shelved_offloaded = (instance.vm_state
|
||||
== vm_states.SHELVED_OFFLOADED)
|
||||
if not instance.host and not shelved_offloaded:
|
||||
local_delete = self._is_local_delete(context, instance)
|
||||
if not instance.host and not local_delete:
|
||||
try:
|
||||
compute_utils.notify_about_instance_usage(
|
||||
self.notifier, context, instance,
|
||||
|
@ -1596,48 +1595,46 @@ class API(base.Base):
|
|||
quotas.commit()
|
||||
return
|
||||
except exception.ObjectActionError:
|
||||
# The instance's host likely changed under us as
|
||||
# this instance could be building and has since been
|
||||
# scheduled. Continue with attempts to delete it.
|
||||
instance.refresh()
|
||||
|
||||
if instance.vm_state == vm_states.RESIZED:
|
||||
self._confirm_resize_on_deleting(context, instance)
|
||||
|
||||
is_local_delete = True
|
||||
try:
|
||||
if not shelved_offloaded:
|
||||
service = objects.Service.get_by_compute_host(
|
||||
context.elevated(), instance.host)
|
||||
is_local_delete = not self.servicegroup_api.service_is_up(
|
||||
service)
|
||||
if not is_local_delete:
|
||||
if original_task_state in (task_states.DELETING,
|
||||
task_states.SOFT_DELETING):
|
||||
LOG.info(_LI('Instance is already in deleting state, '
|
||||
'ignoring this request'),
|
||||
instance=instance)
|
||||
quotas.rollback()
|
||||
return
|
||||
self._record_action_start(context, instance,
|
||||
instance_actions.DELETE)
|
||||
if local_delete:
|
||||
try:
|
||||
self._local_delete(context, instance, bdms, delete_type,
|
||||
cb)
|
||||
quotas.commit()
|
||||
return
|
||||
except exception.ObjectActionError:
|
||||
# The instance may have been in SHELVED_OFFLOADED
|
||||
# state and may have been unshelved.
|
||||
# Continue with attempts to delete it.
|
||||
instance.refresh()
|
||||
|
||||
# NOTE(snikitin): If instance's vm_state is 'soft-delete',
|
||||
# we should not count reservations here, because instance
|
||||
# in soft-delete vm_state have already had quotas
|
||||
# decremented. More details:
|
||||
# https://bugs.launchpad.net/nova/+bug/1333145
|
||||
if instance.vm_state == vm_states.SOFT_DELETED:
|
||||
quotas.rollback()
|
||||
if original_task_state in (task_states.DELETING,
|
||||
task_states.SOFT_DELETING):
|
||||
LOG.info(_LI('Instance is already in deleting state, '
|
||||
'ignoring this request'),
|
||||
instance=instance)
|
||||
quotas.rollback()
|
||||
return
|
||||
self._record_action_start(context, instance,
|
||||
instance_actions.DELETE)
|
||||
|
||||
cb(context, instance, bdms,
|
||||
reservations=quotas.reservations)
|
||||
except exception.ComputeHostNotFound:
|
||||
pass
|
||||
# NOTE(snikitin): If instance's vm_state is 'soft-delete',
|
||||
# we should not count reservations here, because instance
|
||||
# in soft-delete vm_state have already had quotas
|
||||
# decremented. More details:
|
||||
# https://bugs.launchpad.net/nova/+bug/1333145
|
||||
if instance.vm_state == vm_states.SOFT_DELETED:
|
||||
quotas.rollback()
|
||||
|
||||
if is_local_delete:
|
||||
# If instance is in shelved_offloaded state or compute node
|
||||
# isn't up, delete instance from db and clean bdms info and
|
||||
# network info
|
||||
self._local_delete(context, instance, bdms, delete_type, cb)
|
||||
quotas.commit()
|
||||
cb(context, instance, bdms,
|
||||
reservations=quotas.reservations)
|
||||
|
||||
except exception.InstanceNotFound:
|
||||
# NOTE(comstud): Race condition. Instance already gone.
|
||||
|
@ -1648,6 +1645,23 @@ class API(base.Base):
|
|||
if quotas:
|
||||
quotas.rollback()
|
||||
|
||||
def _is_local_delete(self, context, instance):
|
||||
if instance.vm_state == vm_states.SHELVED_OFFLOADED:
|
||||
return True
|
||||
if not instance.host:
|
||||
if instance.vm_state == vm_states.ERROR:
|
||||
return True
|
||||
return False
|
||||
else:
|
||||
try:
|
||||
# Check if compute service is up.
|
||||
service = objects.Service.get_by_compute_host(
|
||||
context.elevated(), instance.host)
|
||||
return not self.servicegroup_api.service_is_up(service)
|
||||
except exception.ComputeHostNotFound:
|
||||
pass
|
||||
return True
|
||||
|
||||
def _confirm_resize_on_deleting(self, context, instance):
|
||||
# If in the middle of a resize, use confirm_resize to
|
||||
# ensure the original instance is cleaned up too
|
||||
|
@ -1741,6 +1755,22 @@ class API(base.Base):
|
|||
connector)
|
||||
self.volume_api.detach(elevated, bdm.volume_id,
|
||||
instance.uuid)
|
||||
except Exception as exc:
|
||||
err_str = _LW("Ignoring volume cleanup failure due to %s")
|
||||
LOG.warn(err_str % exc, instance=instance)
|
||||
# This block handles the following case:
|
||||
# 1. Instance scheduled to host and fails on the host.
|
||||
# 2. Compute manager's cleanup calls terminate_connection
|
||||
# and detach if the spawn made it that far.
|
||||
# 3. Instance fails to boot on all other reschedule attempts.
|
||||
# 4. Instance is left in error state with no assigned host.
|
||||
# 5. Volumes in the instance's BDMs are left in the available
|
||||
# state.
|
||||
# When this is the case the terminate_connection and detach
|
||||
# calls above will fail. We attempt the volume delete in a
|
||||
# separate try-except to clean up these volume and avoid the
|
||||
# storage leak.
|
||||
try:
|
||||
if bdm.delete_on_termination:
|
||||
self.volume_api.delete(context, bdm.volume_id)
|
||||
except Exception as exc:
|
||||
|
|
|
@ -3711,6 +3711,101 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
|
|||
self.assertRaises(exception.CannotResizeToSameFlavor,
|
||||
self._test_resize, same_flavor=True)
|
||||
|
||||
@mock.patch.object(objects.BlockDeviceMappingList, 'get_by_instance_uuid')
|
||||
@mock.patch('nova.context.RequestContext.elevated')
|
||||
@mock.patch.object(objects.Instance, 'save')
|
||||
@mock.patch.object(quota.QUOTAS, 'reserve')
|
||||
@mock.patch.object(compute_utils, 'notify_about_instance_usage')
|
||||
@mock.patch.object(objects.BlockDeviceMapping, 'destroy')
|
||||
@mock.patch.object(objects.Instance, 'destroy')
|
||||
def _test_delete_volume_backed_instance(
|
||||
self, vm_state, mock_instance_destroy, bdm_destroy,
|
||||
notify_about_instance_usage, mock_reserve,
|
||||
mock_save, mock_elevated, bdm_get_by_instance_uuid):
|
||||
volume_id = uuidutils.generate_uuid()
|
||||
bdms = [objects.BlockDeviceMapping(
|
||||
**fake_block_device.FakeDbBlockDeviceDict(
|
||||
{'id': 42, 'volume_id': volume_id,
|
||||
'source_type': 'volume', 'destination_type': 'volume',
|
||||
'delete_on_termination': False}))]
|
||||
reservations = ['fake-resv']
|
||||
|
||||
bdm_get_by_instance_uuid.return_value = bdms
|
||||
mock_reserve.return_value = reservations
|
||||
mock_elevated.return_value = self.context
|
||||
|
||||
params = {'host': None, 'vm_state': vm_state}
|
||||
inst = self._create_instance_obj(params=params)
|
||||
connector = {'ip': '127.0.0.1', 'initiator': 'iqn.fake'}
|
||||
|
||||
with mock.patch.object(self.compute_api.network_api,
|
||||
'deallocate_for_instance') as mock_deallocate, \
|
||||
mock.patch.object(self.compute_api.volume_api,
|
||||
'terminate_connection') as mock_terminate_conn, \
|
||||
mock.patch.object(self.compute_api.volume_api,
|
||||
'detach') as mock_detach:
|
||||
self.compute_api.delete(self.context, inst)
|
||||
|
||||
mock_deallocate.assert_called_once_with(self.context, inst)
|
||||
mock_detach.assert_called_once_with(self.context, volume_id,
|
||||
inst.uuid)
|
||||
mock_terminate_conn.assert_called_once_with(self.context,
|
||||
volume_id, connector)
|
||||
bdm_destroy.assert_called_once_with()
|
||||
|
||||
def test_delete_volume_backed_instance_in_error(self):
|
||||
self._test_delete_volume_backed_instance(vm_states.ERROR)
|
||||
|
||||
def test_delete_volume_backed_instance_in_shelved_offloaded(self):
|
||||
self._test_delete_volume_backed_instance(vm_states.SHELVED_OFFLOADED)
|
||||
|
||||
def test_is_local_delete(self):
|
||||
updates = {'host': None}
|
||||
fake_inst = fake_instance.fake_instance_obj(self.context, **updates)
|
||||
# Test the shelved_offloaded VM state.
|
||||
fake_inst.vm_state = vm_states.SHELVED_OFFLOADED
|
||||
self.assertTrue(self.compute_api._is_local_delete(self.context,
|
||||
fake_inst))
|
||||
|
||||
# Test instance in error state with no host
|
||||
fake_inst.vm_state = vm_states.ERROR
|
||||
self.assertTrue(self.compute_api._is_local_delete(self.context,
|
||||
fake_inst))
|
||||
|
||||
# Test instance in building state with no host
|
||||
fake_inst.vm_state = vm_states.BUILDING
|
||||
self.assertFalse(self.compute_api._is_local_delete(self.context,
|
||||
fake_inst))
|
||||
|
||||
# Set host for tests were instance is on a host
|
||||
fake_inst.host = 'fake_host'
|
||||
|
||||
# Test instance with a host that is down
|
||||
with mock.patch.object(self.context, 'elevated'), \
|
||||
mock.patch('nova.objects.Service.get_by_compute_host'), \
|
||||
mock.patch.object(self.compute_api.servicegroup_api,
|
||||
'service_is_up') as mock_service_is_up:
|
||||
mock_service_is_up.return_value = False
|
||||
self.assertTrue(self.compute_api._is_local_delete(self.context,
|
||||
fake_inst))
|
||||
|
||||
# Test instance with a host that is up
|
||||
with mock.patch.object(self.context, 'elevated'), \
|
||||
mock.patch('nova.objects.Service.get_by_compute_host'), \
|
||||
mock.patch.object(self.compute_api.servicegroup_api,
|
||||
'service_is_up') as mock_service_is_up:
|
||||
mock_service_is_up.return_value = True
|
||||
self.assertFalse(self.compute_api._is_local_delete(self.context,
|
||||
fake_inst))
|
||||
|
||||
# Test instance with a host that is not found
|
||||
with mock.patch.object(self.context, 'elevated'), \
|
||||
mock.patch('nova.objects.Service.get_by_compute_host') \
|
||||
as mock_get_host:
|
||||
mock_get_host.side_effect = exception.ComputeHostNotFound('host')
|
||||
self.assertTrue(self.compute_api._is_local_delete(self.context,
|
||||
fake_inst))
|
||||
|
||||
|
||||
class ComputeAPIAPICellUnitTestCase(_ComputeAPIUnitTestMixIn,
|
||||
test.NoDBTestCase):
|
||||
|
|
Loading…
Reference in New Issue