Delete ERROR+DELETING VMs during compute startup.

We should perhaps do this check during message bus reconnection as
well.. Anyhow, if a compute node is offline during a nova API call
to delete an instance, and the rabbit message is lost for some
reason (or alternatively if the delete method throws an error)
then the task state is not cleared and won't be cleared on compute
restart, leaving it wedged forever.

Change-Id: Ie0a47958eb0fb58307902437a95634d5f54f74f3
Fixes-bug: #1281324
Co-Authored-By: Steve Kowalik <steven@wedontsleep.org>
This commit is contained in:
Robert Collins 2014-02-18 16:03:23 +13:00 committed by Steve Kowalik
parent fdf248652a
commit 556ab844c8
2 changed files with 25 additions and 3 deletions

View File

@ -782,11 +782,12 @@ class ComputeManager(manager.Manager):
# Instances that are shut down, or in an error state can not be
# initialized and are not attempted to be recovered. The exception
# to this are instances that are in RESIZE_MIGRATING, which are
# attempted recovery further down.
# to this are instances that are in RESIZE_MIGRATING or DELETING,
# which are dealt with further down.
if (instance.vm_state == vm_states.SOFT_DELETED or
(instance.vm_state == vm_states.ERROR and
instance.task_state != task_states.RESIZE_MIGRATING)):
instance.task_state not in
(task_states.RESIZE_MIGRATING, task_states.DELETING))):
LOG.debug(_("Instance is in %s state."),
instance.vm_state, instance=instance)
return

View File

@ -451,6 +451,27 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
self.compute._init_instance(self.context, instance)
self.mox.VerifyAll()
def test_init_instance_deletes_error_deleting_instance(self):
instance = instance_obj.Instance(self.context)
instance.uuid = 'foo'
instance.vm_state = vm_states.ERROR
instance.task_state = task_states.DELETING
self.mox.StubOutWithMock(block_device_obj.BlockDeviceMappingList,
'get_by_instance_uuid')
self.mox.StubOutWithMock(self.compute, '_delete_instance')
self.mox.StubOutWithMock(instance, 'obj_load_attr')
bdms = []
instance.obj_load_attr('metadata')
instance.obj_load_attr('system_metadata')
block_device_obj.BlockDeviceMappingList.get_by_instance_uuid(
self.context, instance.uuid).AndReturn(bdms)
self.compute._delete_instance(self.context, instance, bdms)
self.mox.ReplayAll()
self.compute._init_instance(self.context, instance)
self.mox.VerifyAll()
def test_get_instances_on_driver(self):
fake_context = context.get_admin_context()