Avoid error state for recovered instances after failed migrations

Most users expect that if a live migration fails but the instance
is fully recovered, it shouldn't enter 'error' state. Setting
the migration status to 'error' should be enough. This simplifies
debugging, making it clear that the instance dosn't have to be
manually recovered.

This patch changed this behavior, indirectly affecting the Hyper-V
driver: Idfdce9e7dd8106af01db0358ada15737cb846395

We'll stop propagating exceptions when managing to recover the
instance, which matches the reference driver (libvirt) behavior.

Change-Id: I4b667c06e008b03457d35ce9c920b916d21c255f
Closes-Bug: 1841411
This commit is contained in:
Lucian Petrut 2019-08-26 10:28:37 +03:00
parent d9c1cb6aa1
commit a721681e61
4 changed files with 28 additions and 30 deletions

View File

@ -20,7 +20,6 @@ from nova import exception
from os_win import constants as os_win_const
from os_win import utilsfactory
from oslo_log import log as logging
from oslo_utils import excutils
from compute_hyperv.i18n import _
import compute_hyperv.nova.conf
@ -72,14 +71,17 @@ class ClusterLiveMigrationOps(livemigrationops.LiveMigrationOps):
dest,
CONF.hyperv.instance_live_migration_timeout)
except Exception:
with excutils.save_and_reraise_exception():
self._check_failed_instance_migration(
instance_ref,
expected_state=os_win_const.CLUSTER_GROUP_ONLINE)
LOG.exception("Live migration failed. Attempting rollback.",
instance=instance_ref)
# The recover method will update the migration state.
# We won't error out if we manage to recover the instance,
# which would otherwise end up in error state.
self._check_failed_instance_migration(
instance_ref,
expected_state=os_win_const.CLUSTER_GROUP_ONLINE)
LOG.debug("Calling live migration recover_method "
"for instance.", instance=instance_ref)
recover_method(context, instance_ref, dest, migrate_data)
recover_method(context, instance_ref, dest, migrate_data)
return
LOG.debug("Calling live migration post_method for instance.",
instance=instance_ref)

View File

@ -21,7 +21,6 @@ from nova import exception
from nova.objects import migrate_data as migrate_data_obj
from os_win import utilsfactory
from oslo_log import log as logging
from oslo_utils import excutils
from compute_hyperv.i18n import _
from compute_hyperv.nova import block_device_manager
@ -77,10 +76,13 @@ class LiveMigrationOps(object):
dest,
migrate_disks=not shared_storage)
except Exception:
with excutils.save_and_reraise_exception():
LOG.debug("Calling live migration recover_method "
"for instance: %s", instance_name)
recover_method(context, instance_ref, dest, migrate_data)
# The recover method will update the migration state.
# We won't error out if we manage to recover the instance,
# which would otherwise end up in error state.
LOG.exception("Live migration failed. Attempting rollback.",
instance=instance_ref)
recover_method(context, instance_ref, dest, migrate_data)
return
LOG.debug("Calling live migration post_method for instance: %s",
instance_name)

View File

@ -86,9 +86,7 @@ class ClusterLiveMigrationOpsTestCase(test_base.HyperVBaseTestCase):
clustutils = self.livemigrops._clustutils
clustutils.live_migrate_vm.side_effect = nova_test.TestingException
self.assertRaises(
nova_test.TestingException,
self.livemigrops.live_migration,
self.livemigrops.live_migration(
self._fake_context, mock_instance, dest, mock.sentinel.post_method,
recover_method,
block_migration=mock.sentinel.block_migration,

View File

@ -73,25 +73,21 @@ class LiveMigrationOpsTestCase(test_base.HyperVBaseTestCase):
else:
migrate_data = None
self._livemigrops.live_migration(context=self.context,
instance_ref=mock_instance,
dest=fake_dest,
post_method=mock_post,
recover_method=mock_recover,
block_migration=(
mock.sentinel.block_migr),
migrate_data=migrate_data)
if side_effect is os_win_exc.HyperVException:
self.assertRaises(os_win_exc.HyperVException,
self._livemigrops.live_migration,
self.context, mock_instance, fake_dest,
mock_post, mock_recover,
mock.sentinel.block_migr,
migrate_data)
mock_recover.assert_called_once_with(self.context, mock_instance,
fake_dest,
migrate_data)
mock_post.assert_not_called()
else:
self._livemigrops.live_migration(context=self.context,
instance_ref=mock_instance,
dest=fake_dest,
post_method=mock_post,
recover_method=mock_recover,
block_migration=(
mock.sentinel.block_migr),
migrate_data=migrate_data)
post_call_args = mock_post.call_args_list
self.assertEqual(1, len(post_call_args))