From ba406564030d834353a8a1899c8520e522aa39fe Mon Sep 17 00:00:00 2001 From: Lucian Petrut Date: Mon, 26 Aug 2019 10:28:37 +0300 Subject: [PATCH] Avoid error state for recovered instances after failed migrations Most users expect that if a live migration fails but the instance is fully recovered, it shouldn't enter 'error' state. Setting the migration status to 'error' should be enough. This simplifies debugging, making it clear that the instance dosn't have to be manually recovered. This patch changed this behavior, indirectly affecting the Hyper-V driver: Idfdce9e7dd8106af01db0358ada15737cb846395 We'll stop propagating exceptions when managing to recover the instance, which matches the reference driver (libvirt) behavior. Change-Id: I4b667c06e008b03457d35ce9c920b916d21c255f Closes-Bug: 1841411 (cherry picked from commit a721681e6111d85264e20085d1f184d1c923b69e) --- .../nova/cluster/livemigrationops.py | 18 +++++++------- compute_hyperv/nova/livemigrationops.py | 12 ++++++---- .../unit/cluster/test_livemigrationops.py | 4 +--- .../tests/unit/test_livemigrationops.py | 24 ++++++++----------- 4 files changed, 28 insertions(+), 30 deletions(-) diff --git a/compute_hyperv/nova/cluster/livemigrationops.py b/compute_hyperv/nova/cluster/livemigrationops.py index 551ca77d..75e4adaf 100644 --- a/compute_hyperv/nova/cluster/livemigrationops.py +++ b/compute_hyperv/nova/cluster/livemigrationops.py @@ -20,7 +20,6 @@ from nova import exception from os_win import constants as os_win_const from os_win import utilsfactory from oslo_log import log as logging -from oslo_utils import excutils from compute_hyperv.i18n import _ import compute_hyperv.nova.conf @@ -72,14 +71,17 @@ class ClusterLiveMigrationOps(livemigrationops.LiveMigrationOps): dest, CONF.hyperv.instance_live_migration_timeout) except Exception: - with excutils.save_and_reraise_exception(): - self._check_failed_instance_migration( - instance_ref, - expected_state=os_win_const.CLUSTER_GROUP_ONLINE) + LOG.exception("Live migration failed. Attempting rollback.", + instance=instance_ref) + # The recover method will update the migration state. + # We won't error out if we manage to recover the instance, + # which would otherwise end up in error state. + self._check_failed_instance_migration( + instance_ref, + expected_state=os_win_const.CLUSTER_GROUP_ONLINE) - LOG.debug("Calling live migration recover_method " - "for instance.", instance=instance_ref) - recover_method(context, instance_ref, dest, migrate_data) + recover_method(context, instance_ref, dest, migrate_data) + return LOG.debug("Calling live migration post_method for instance.", instance=instance_ref) diff --git a/compute_hyperv/nova/livemigrationops.py b/compute_hyperv/nova/livemigrationops.py index ceb07be2..7f74ef55 100644 --- a/compute_hyperv/nova/livemigrationops.py +++ b/compute_hyperv/nova/livemigrationops.py @@ -21,7 +21,6 @@ from nova import exception from nova.objects import migrate_data as migrate_data_obj from os_win import utilsfactory from oslo_log import log as logging -from oslo_utils import excutils from compute_hyperv.i18n import _ from compute_hyperv.nova import block_device_manager @@ -77,10 +76,13 @@ class LiveMigrationOps(object): dest, migrate_disks=not shared_storage) except Exception: - with excutils.save_and_reraise_exception(): - LOG.debug("Calling live migration recover_method " - "for instance: %s", instance_name) - recover_method(context, instance_ref, dest, migrate_data) + # The recover method will update the migration state. + # We won't error out if we manage to recover the instance, + # which would otherwise end up in error state. + LOG.exception("Live migration failed. Attempting rollback.", + instance=instance_ref) + recover_method(context, instance_ref, dest, migrate_data) + return LOG.debug("Calling live migration post_method for instance: %s", instance_name) diff --git a/compute_hyperv/tests/unit/cluster/test_livemigrationops.py b/compute_hyperv/tests/unit/cluster/test_livemigrationops.py index 864650b5..555c4559 100644 --- a/compute_hyperv/tests/unit/cluster/test_livemigrationops.py +++ b/compute_hyperv/tests/unit/cluster/test_livemigrationops.py @@ -86,9 +86,7 @@ class ClusterLiveMigrationOpsTestCase(test_base.HyperVBaseTestCase): clustutils = self.livemigrops._clustutils clustutils.live_migrate_vm.side_effect = nova_test.TestingException - self.assertRaises( - nova_test.TestingException, - self.livemigrops.live_migration, + self.livemigrops.live_migration( self._fake_context, mock_instance, dest, mock.sentinel.post_method, recover_method, block_migration=mock.sentinel.block_migration, diff --git a/compute_hyperv/tests/unit/test_livemigrationops.py b/compute_hyperv/tests/unit/test_livemigrationops.py index e62f0c0c..89699d2e 100644 --- a/compute_hyperv/tests/unit/test_livemigrationops.py +++ b/compute_hyperv/tests/unit/test_livemigrationops.py @@ -73,25 +73,21 @@ class LiveMigrationOpsTestCase(test_base.HyperVBaseTestCase): else: migrate_data = None + self._livemigrops.live_migration(context=self.context, + instance_ref=mock_instance, + dest=fake_dest, + post_method=mock_post, + recover_method=mock_recover, + block_migration=( + mock.sentinel.block_migr), + migrate_data=migrate_data) + if side_effect is os_win_exc.HyperVException: - self.assertRaises(os_win_exc.HyperVException, - self._livemigrops.live_migration, - self.context, mock_instance, fake_dest, - mock_post, mock_recover, - mock.sentinel.block_migr, - migrate_data) mock_recover.assert_called_once_with(self.context, mock_instance, fake_dest, migrate_data) + mock_post.assert_not_called() else: - self._livemigrops.live_migration(context=self.context, - instance_ref=mock_instance, - dest=fake_dest, - post_method=mock_post, - recover_method=mock_recover, - block_migration=( - mock.sentinel.block_migr), - migrate_data=migrate_data) post_call_args = mock_post.call_args_list self.assertEqual(1, len(post_call_args))