Nil out inst.host and inst.node when build fails
This patch addresses a bug that leaves instance.host/node set after a VM fails to build or reschedule (e.g., due to an error in the spawn path). Rather, we should nil these out at the end of the build process so that the instance is not erroneously being associated with stale host/node information. Also included are a few cosmetic updates in comments and eliminating some implied 'None' parameters from "dict.get(key, None)" calls I found during the course of investigating this change. Change-Id: I4d9c90b1d3fcce5237323fffcc6db1fb93d23e37 Closes-Bug: #1427944
This commit is contained in:
parent
afd264ac36
commit
5d8eb7184e
|
@ -741,6 +741,14 @@ class ComputeManager(manager.Manager):
|
|||
self._update_resource_tracker(context, instance_ref)
|
||||
return instance_ref
|
||||
|
||||
def _nil_out_instance_obj_host_and_node(self, instance):
|
||||
# NOTE(jwcroppe): We don't do instance.save() here for performance
|
||||
# reasons; a call to this is expected to be immediately followed by
|
||||
# another call that does instance.save(), thus avoiding two writes
|
||||
# to the database layer.
|
||||
instance.host = None
|
||||
instance.node = None
|
||||
|
||||
def _set_instance_obj_error_state(self, context, instance):
|
||||
try:
|
||||
instance.vm_state = vm_states.ERROR
|
||||
|
@ -1456,7 +1464,7 @@ class ComputeManager(manager.Manager):
|
|||
"""Attempt to re-schedule a compute operation."""
|
||||
|
||||
instance_uuid = instance.uuid
|
||||
retry = filter_properties.get('retry', None)
|
||||
retry = filter_properties.get('retry')
|
||||
if not retry:
|
||||
# no retry information, do not reschedule.
|
||||
LOG.debug("Retry info not present, will not reschedule",
|
||||
|
@ -1893,7 +1901,7 @@ class ComputeManager(manager.Manager):
|
|||
filter_properties)
|
||||
return build_results.ACTIVE
|
||||
except exception.RescheduledException as e:
|
||||
retry = filter_properties.get('retry', None)
|
||||
retry = filter_properties.get('retry')
|
||||
if not retry:
|
||||
# no retry information, do not reschedule.
|
||||
LOG.debug("Retry info not present, will not reschedule",
|
||||
|
@ -1902,6 +1910,7 @@ class ComputeManager(manager.Manager):
|
|||
requested_networks)
|
||||
compute_utils.add_instance_fault_from_exc(context,
|
||||
instance, e, sys.exc_info())
|
||||
self._nil_out_instance_obj_host_and_node(instance)
|
||||
self._set_instance_obj_error_state(context, instance)
|
||||
return build_results.FAILED
|
||||
LOG.debug(e.format_message(), instance=instance)
|
||||
|
@ -1914,11 +1923,12 @@ class ComputeManager(manager.Manager):
|
|||
else:
|
||||
# NOTE(alex_xu): Network already allocated and we don't
|
||||
# want to deallocate them before rescheduling. But we need
|
||||
# cleanup those network resource setup on this host before
|
||||
# to cleanup those network resources setup on this host before
|
||||
# rescheduling.
|
||||
self.network_api.cleanup_instance_network_on_host(
|
||||
context, instance, self.host)
|
||||
|
||||
self._nil_out_instance_obj_host_and_node(instance)
|
||||
instance.task_state = task_states.SCHEDULING
|
||||
instance.save()
|
||||
|
||||
|
@ -1942,6 +1952,7 @@ class ComputeManager(manager.Manager):
|
|||
block_device_mapping, raise_exc=False)
|
||||
compute_utils.add_instance_fault_from_exc(context, instance,
|
||||
e, sys.exc_info())
|
||||
self._nil_out_instance_obj_host_and_node(instance)
|
||||
self._set_instance_obj_error_state(context, instance)
|
||||
return build_results.FAILED
|
||||
except Exception as e:
|
||||
|
@ -1954,6 +1965,7 @@ class ComputeManager(manager.Manager):
|
|||
block_device_mapping, raise_exc=False)
|
||||
compute_utils.add_instance_fault_from_exc(context, instance,
|
||||
e, sys.exc_info())
|
||||
self._nil_out_instance_obj_host_and_node(instance)
|
||||
self._set_instance_obj_error_state(context, instance)
|
||||
return build_results.FAILED
|
||||
|
||||
|
@ -2634,7 +2646,7 @@ class ComputeManager(manager.Manager):
|
|||
" '%s'"), str(image_ref))
|
||||
|
||||
# NOTE(mriedem): On a recreate (evacuate), we need to update
|
||||
# the instance's host and node properties to reflect it's
|
||||
# the instance's host and node properties to reflect its
|
||||
# destination node for the recreate.
|
||||
node_name = None
|
||||
try:
|
||||
|
|
|
@ -384,6 +384,17 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
self.assertIsInstance(compute._build_semaphore,
|
||||
compute_utils.UnlimitedSemaphore)
|
||||
|
||||
def test_nil_out_inst_obj_host_and_node_sets_nil(self):
|
||||
instance = fake_instance.fake_instance_obj(self.context,
|
||||
uuid='foo-uuid',
|
||||
host='foo-host',
|
||||
node='foo-node')
|
||||
self.assertIsNotNone(instance.host)
|
||||
self.assertIsNotNone(instance.node)
|
||||
self.compute._nil_out_instance_obj_host_and_node(instance)
|
||||
self.assertIsNone(instance.host)
|
||||
self.assertIsNone(instance.node)
|
||||
|
||||
def test_init_host(self):
|
||||
our_host = self.compute.host
|
||||
inst = fake_instance.fake_db_instance(
|
||||
|
@ -2804,6 +2815,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
self.mox.StubOutWithMock(self.compute, '_cleanup_allocated_networks')
|
||||
self.mox.StubOutWithMock(self.compute, '_cleanup_volumes')
|
||||
self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc')
|
||||
self.mox.StubOutWithMock(self.compute,
|
||||
'_nil_out_instance_obj_host_and_node')
|
||||
self.mox.StubOutWithMock(self.compute, '_set_instance_obj_error_state')
|
||||
self.mox.StubOutWithMock(self.compute.compute_task_api,
|
||||
'build_instances')
|
||||
|
@ -2821,6 +2834,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
self.block_device_mapping, raise_exc=False)
|
||||
compute_utils.add_instance_fault_from_exc(self.context,
|
||||
self.instance, mox.IgnoreArg(), mox.IgnoreArg())
|
||||
self.compute._nil_out_instance_obj_host_and_node(self.instance)
|
||||
self.compute._set_instance_obj_error_state(self.context, self.instance)
|
||||
self._instance_action_events()
|
||||
self.mox.ReplayAll()
|
||||
|
@ -2845,6 +2859,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
self.mox.StubOutWithMock(self.compute, '_set_instance_obj_error_state')
|
||||
self.mox.StubOutWithMock(self.compute.compute_task_api,
|
||||
'build_instances')
|
||||
self.mox.StubOutWithMock(self.compute,
|
||||
'_nil_out_instance_obj_host_and_node')
|
||||
self.mox.StubOutWithMock(self.compute.network_api,
|
||||
'cleanup_instance_network_on_host')
|
||||
self._do_build_instance_update(reschedule_update=True)
|
||||
|
@ -2857,6 +2873,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
instance_uuid=self.instance.uuid))
|
||||
self.compute.network_api.cleanup_instance_network_on_host(self.context,
|
||||
self.instance, self.compute.host)
|
||||
self.compute._nil_out_instance_obj_host_and_node(self.instance)
|
||||
self.compute.compute_task_api.build_instances(self.context,
|
||||
[self.instance], self.image, self.filter_properties,
|
||||
self.admin_pass, self.injected_files, self.requested_networks,
|
||||
|
@ -2964,6 +2981,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
self.mox.StubOutWithMock(self.compute, '_set_instance_obj_error_state')
|
||||
self.mox.StubOutWithMock(self.compute, '_cleanup_allocated_networks')
|
||||
self.mox.StubOutWithMock(self.compute, '_cleanup_volumes')
|
||||
self.mox.StubOutWithMock(self.compute,
|
||||
'_nil_out_instance_obj_host_and_node')
|
||||
self._do_build_instance_update()
|
||||
self.compute._build_and_run_instance(self.context, self.instance,
|
||||
self.image, self.injected_files, self.admin_pass,
|
||||
|
@ -2976,6 +2995,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
self.requested_networks)
|
||||
compute_utils.add_instance_fault_from_exc(self.context, self.instance,
|
||||
mox.IgnoreArg(), mox.IgnoreArg())
|
||||
self.compute._nil_out_instance_obj_host_and_node(self.instance)
|
||||
self.compute._set_instance_obj_error_state(self.context,
|
||||
self.instance)
|
||||
self._instance_action_events()
|
||||
|
@ -3002,6 +3022,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
self.mox.StubOutWithMock(self.compute.driver,
|
||||
'deallocate_networks_on_reschedule')
|
||||
self.mox.StubOutWithMock(self.compute, '_cleanup_allocated_networks')
|
||||
self.mox.StubOutWithMock(self.compute,
|
||||
'_nil_out_instance_obj_host_and_node')
|
||||
self.mox.StubOutWithMock(self.compute.compute_task_api,
|
||||
'build_instances')
|
||||
self.mox.StubOutWithMock(self.compute.network_api,
|
||||
|
@ -3018,6 +3040,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
self.instance).AndReturn(False)
|
||||
self.compute.network_api.cleanup_instance_network_on_host(
|
||||
self.context, self.instance, self.compute.host)
|
||||
self.compute._nil_out_instance_obj_host_and_node(self.instance)
|
||||
self.compute.compute_task_api.build_instances(self.context,
|
||||
[self.instance], self.image, self.filter_properties,
|
||||
self.admin_pass, self.injected_files, self.requested_networks,
|
||||
|
@ -3046,6 +3069,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
self.mox.StubOutWithMock(self.compute.driver,
|
||||
'deallocate_networks_on_reschedule')
|
||||
self.mox.StubOutWithMock(self.compute, '_cleanup_allocated_networks')
|
||||
self.mox.StubOutWithMock(self.compute,
|
||||
'_nil_out_instance_obj_host_and_node')
|
||||
self.mox.StubOutWithMock(self.compute.compute_task_api,
|
||||
'build_instances')
|
||||
self._do_build_instance_update(reschedule_update=True)
|
||||
|
@ -3060,6 +3085,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
self.instance).AndReturn(True)
|
||||
self.compute._cleanup_allocated_networks(self.context, self.instance,
|
||||
self.requested_networks)
|
||||
self.compute._nil_out_instance_obj_host_and_node(self.instance)
|
||||
self.compute.compute_task_api.build_instances(self.context,
|
||||
[self.instance], self.image, self.filter_properties,
|
||||
self.admin_pass, self.injected_files, self.requested_networks,
|
||||
|
@ -3080,7 +3106,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
build_results.RESCHEDULED)
|
||||
|
||||
def _test_build_and_run_exceptions(self, exc, set_error=False,
|
||||
cleanup_volumes=False):
|
||||
cleanup_volumes=False, nil_out_host_and_node=False):
|
||||
self.mox.StubOutWithMock(self.compute, '_build_and_run_instance')
|
||||
self.mox.StubOutWithMock(self.compute, '_cleanup_allocated_networks')
|
||||
self.mox.StubOutWithMock(self.compute, '_cleanup_volumes')
|
||||
|
@ -3097,6 +3123,10 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
if cleanup_volumes:
|
||||
self.compute._cleanup_volumes(self.context, self.instance.uuid,
|
||||
self.block_device_mapping, raise_exc=False)
|
||||
if nil_out_host_and_node:
|
||||
self.mox.StubOutWithMock(self.compute,
|
||||
'_nil_out_instance_obj_host_and_node')
|
||||
self.compute._nil_out_instance_obj_host_and_node(self.instance)
|
||||
if set_error:
|
||||
self.mox.StubOutWithMock(self.compute,
|
||||
'_set_instance_obj_error_state')
|
||||
|
@ -3139,12 +3169,14 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
instance_uuid='fake_uuid', expected={}, actual={}))
|
||||
|
||||
def test_build_and_run_buildabort_exception(self):
|
||||
self._test_build_and_run_exceptions(exception.BuildAbortException(
|
||||
instance_uuid='', reason=''), set_error=True, cleanup_volumes=True)
|
||||
self._test_build_and_run_exceptions(
|
||||
exception.BuildAbortException(instance_uuid='', reason=''),
|
||||
set_error=True, cleanup_volumes=True, nil_out_host_and_node=True)
|
||||
|
||||
def test_build_and_run_unhandled_exception(self):
|
||||
self._test_build_and_run_exceptions(test.TestingException(),
|
||||
set_error=True, cleanup_volumes=True)
|
||||
set_error=True, cleanup_volumes=True,
|
||||
nil_out_host_and_node=True)
|
||||
|
||||
def test_instance_not_found(self):
|
||||
exc = exception.InstanceNotFound(instance_id=1)
|
||||
|
@ -3310,6 +3342,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
'build_instances')
|
||||
self.mox.StubOutWithMock(self.compute.network_api,
|
||||
'cleanup_instance_network_on_host')
|
||||
self.mox.StubOutWithMock(self.compute,
|
||||
'_nil_out_instance_obj_host_and_node')
|
||||
self.compute._get_resource_tracker(self.node).AndReturn(
|
||||
FakeResourceTracker())
|
||||
self._do_build_instance_update(reschedule_update=True)
|
||||
|
@ -3319,6 +3353,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
fault=exc, stub=False)
|
||||
self.compute.network_api.cleanup_instance_network_on_host(
|
||||
self.context, self.instance, self.compute.host)
|
||||
self.compute._nil_out_instance_obj_host_and_node(self.instance)
|
||||
self.compute.compute_task_api.build_instances(self.context,
|
||||
[self.instance], self.image, self.filter_properties,
|
||||
self.admin_pass, self.injected_files, self.requested_networks,
|
||||
|
|
Loading…
Reference in New Issue