Delete ERROR+DELETING VMs during compute startup.
We should perhaps do this check during message bus reconnection as well.. Anyhow, if a compute node is offline during a nova API call to delete an instance, and the rabbit message is lost for some reason (or alternatively if the delete method throws an error) then the task state is not cleared and won't be cleared on compute restart, leaving it wedged forever. Change-Id: Ie0a47958eb0fb58307902437a95634d5f54f74f3 Fixes-bug: #1281324 Co-Authored-By: Steve Kowalik <steven@wedontsleep.org>
This commit is contained in:
parent
fdf248652a
commit
556ab844c8
|
@ -782,11 +782,12 @@ class ComputeManager(manager.Manager):
|
|||
|
||||
# Instances that are shut down, or in an error state can not be
|
||||
# initialized and are not attempted to be recovered. The exception
|
||||
# to this are instances that are in RESIZE_MIGRATING, which are
|
||||
# attempted recovery further down.
|
||||
# to this are instances that are in RESIZE_MIGRATING or DELETING,
|
||||
# which are dealt with further down.
|
||||
if (instance.vm_state == vm_states.SOFT_DELETED or
|
||||
(instance.vm_state == vm_states.ERROR and
|
||||
instance.task_state != task_states.RESIZE_MIGRATING)):
|
||||
instance.task_state not in
|
||||
(task_states.RESIZE_MIGRATING, task_states.DELETING))):
|
||||
LOG.debug(_("Instance is in %s state."),
|
||||
instance.vm_state, instance=instance)
|
||||
return
|
||||
|
|
|
@ -451,6 +451,27 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
self.compute._init_instance(self.context, instance)
|
||||
self.mox.VerifyAll()
|
||||
|
||||
def test_init_instance_deletes_error_deleting_instance(self):
|
||||
instance = instance_obj.Instance(self.context)
|
||||
instance.uuid = 'foo'
|
||||
instance.vm_state = vm_states.ERROR
|
||||
instance.task_state = task_states.DELETING
|
||||
self.mox.StubOutWithMock(block_device_obj.BlockDeviceMappingList,
|
||||
'get_by_instance_uuid')
|
||||
self.mox.StubOutWithMock(self.compute, '_delete_instance')
|
||||
self.mox.StubOutWithMock(instance, 'obj_load_attr')
|
||||
|
||||
bdms = []
|
||||
instance.obj_load_attr('metadata')
|
||||
instance.obj_load_attr('system_metadata')
|
||||
block_device_obj.BlockDeviceMappingList.get_by_instance_uuid(
|
||||
self.context, instance.uuid).AndReturn(bdms)
|
||||
self.compute._delete_instance(self.context, instance, bdms)
|
||||
self.mox.ReplayAll()
|
||||
|
||||
self.compute._init_instance(self.context, instance)
|
||||
self.mox.VerifyAll()
|
||||
|
||||
def test_get_instances_on_driver(self):
|
||||
fake_context = context.get_admin_context()
|
||||
|
||||
|
|
Loading…
Reference in New Issue