Teardown networking when rolling back live migration even if shared disk

Change I2c86989ab7c6593bf346611cde8c043116d55bc5 way back in Essex
added the "setup_network_on_host" network API calls to the migration
flows, including rollback_live_migration_at_destination. The initial
implementation of that method for Quantum (Neutron) was a no-op.

Change Ib1cc44bf9d01baf4d1f1d26c2a368a5ca7c6ab68 in Newton added the
Neutron implementation for the setup_networks_on_host method in order
to track the destination host being migrated to for instances that
have floating IPs with DVR.

When rolling back from a live migration failure on the destination host,
the "migrating_to" attribute in the port binding profile, added in
pre_live_migration() on the destination compute, is cleared.

However, that only happens in rollback_live_migration_at_destination,
which is only called if the instance is not on shared storage (think
libvirt with the rbd image backend or with NFS). That's controlled
via the "do_cleanup" flag returned from _live_migration_cleanup_flags().

If the live migration is happening over shared storage and fails, then
rollback_live_migration_at_destination isn't called which means
setup_network_on_host isn't called, which means the "migrating_to"
attribute in the port binding profile isn't cleaned up.

This change simply adds the cleanup in _rollback_live_migration in the
event that neutron is being used and we're live migrating over shared
storage so rollback_live_migration_at_destination isn't called.

Change-Id: I658e0a749e842163ed74f82c975bcaf19f9f7f07
Closes-Bug: #1757292
This commit is contained in:
Matt Riedemann 2018-03-22 17:38:00 -04:00
parent 167023b507
commit bb8ba2cf56
2 changed files with 74 additions and 4 deletions

View File

@ -6515,6 +6515,18 @@ class ComputeManager(manager.Manager):
self.compute_rpcapi.rollback_live_migration_at_destination(
context, instance, dest, destroy_disks=destroy_disks,
migrate_data=migrate_data)
elif utils.is_neutron():
# The port binding profiles need to be cleaned up.
with errors_out_migration_ctxt(migration):
try:
self.network_api.setup_networks_on_host(
context, instance, teardown=True)
except Exception:
with excutils.save_and_reraise_exception():
LOG.exception(
'An error occurred while cleaning up networking '
'during live migration rollback.',
instance=instance)
self._notify_about_instance_usage(context, instance,
"live_migration._rollback.end")

View File

@ -6597,8 +6597,10 @@ class ComputeTestCase(BaseTestCase,
mock.call(c, instance, self.compute.host,
action='live_migration_rollback', phase='end',
bdms=bdms)])
mock_nw_api.setup_networks_on_host.assert_called_once_with(
c, instance, self.compute.host)
mock_nw_api.setup_networks_on_host.assert_has_calls([
mock.call(c, instance, self.compute.host),
mock.call(c, instance, teardown=True)
])
mock_ra.assert_called_once_with(mock.ANY, instance, migration)
mock_mig_save.assert_called_once_with()
_test()
@ -6640,13 +6642,69 @@ class ComputeTestCase(BaseTestCase,
mock.call(c, instance, self.compute.host,
action='live_migration_rollback', phase='end',
bdms=bdms)])
mock_nw_api.setup_networks_on_host.assert_called_once_with(
c, instance, self.compute.host)
mock_nw_api.setup_networks_on_host.assert_has_calls([
mock.call(c, instance, self.compute.host),
mock.call(c, instance, teardown=True)
])
_test()
self.assertEqual('fake', migration.status)
migration.save.assert_called_once_with()
@mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename',
return_value=objects.ComputeNode(
host='dest-host', uuid=uuids.dest_node))
@mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid',
return_value=objects.BlockDeviceMappingList())
def test_rollback_live_migration_network_teardown_fails(
self, mock_bdms, mock_get_node):
"""Tests that _rollback_live_migration calls setup_networks_on_host
directly, which raises an exception, and the migration record status
is still set to 'error' before re-raising the error.
"""
ctxt = context.get_admin_context()
instance = fake_instance.fake_instance_obj(ctxt)
migration = objects.Migration(ctxt, uuid=uuids.migration)
migrate_data = objects.LibvirtLiveMigrateData(migration=migration)
@mock.patch.object(self.compute, '_notify_about_instance_usage')
@mock.patch('nova.compute.utils.notify_about_instance_action')
@mock.patch.object(instance, 'save')
@mock.patch.object(migration, 'save')
@mock.patch.object(self.compute, '_revert_allocation')
@mock.patch.object(self.compute, '_live_migration_cleanup_flags',
return_value=(False, False))
@mock.patch.object(self.compute.network_api, 'setup_networks_on_host',
side_effect=(None, test.TestingException))
def _test(mock_nw_setup, _mock_lmcf, mock_ra, mock_mig_save,
mock_inst_save, _mock_notify_action, mock_notify_usage):
self.assertRaises(test.TestingException,
self.compute._rollback_live_migration,
ctxt, instance, 'dest-host', migrate_data,
migration_status='goofballs')
# setup_networks_on_host is called twice:
# - once to re-setup networking on the source host, which for
# neutron doesn't actually do anything since the port's host
# binding didn't change since live migration failed
# - once to teardown the 'migrating_to' information in the port
# binding profile, where migrating_to points at the destination
# host (that's done in pre_live_migration on the dest host). This
# cleanup would happen in rollback_live_migration_at_destination
# except _live_migration_cleanup_flags returned False for
# 'do_cleanup'.
mock_nw_setup.assert_has_calls([
mock.call(ctxt, instance, self.compute.host),
mock.call(ctxt, instance, teardown=True)
])
mock_ra.assert_called_once_with(ctxt, instance, migration)
mock_mig_save.assert_called_once_with()
# Since we failed during rollback, the migration status gets set
# to 'error' instead of 'goofballs'.
self.assertEqual('error', migration.status)
_test()
@mock.patch.object(fake.FakeDriver,
'rollback_live_migration_at_destination')
def test_rollback_live_migration_at_destination_correctly(self,