Annotate flows and handle PortBindingDeletionFailed in ComputeManager

This change adds comments to the live migration methods about
some of the network API calls since it can get very confusing
trying to trace what is happening with networking during
live migration since the network API has several similar
sounding methods.

This also handles PortBindingDeletionFailed which can be raised
from the neutronv2 API implementation of setup_networks_on_host
when teardown=True. There are three cases that can hit this:

1. post live migration at the destination cleaning up the source
   host port bindings
2. rollback live migration at the source cleaning up the
   destination host port bindings
3. rollback live migration at the destination cleaning up the
   destination host port bindings

Depends-On: https://review.openstack.org/558990/

Co-Authored-By: Sean Mooney <sean.k.mooney@intel.com>

Part of blueprint neutron-new-port-binding-api

Change-Id: I1ba00850bdd7c015c5f462f1d75a5b833a5e9af9
This commit is contained in:
Matt Riedemann 2018-03-09 16:03:57 -05:00
parent 2b52cde565
commit e53f46672e
4 changed files with 155 additions and 10 deletions

View File

@ -6516,6 +6516,9 @@ class ComputeManager(manager.Manager):
migration = {'source_compute': self.host,
'dest_compute': dest, }
# For neutron, migrate_instance_start will activate the destination
# host port bindings, if there are any created by conductor before live
# migration started.
self.network_api.migrate_instance_start(ctxt,
instance,
migration)
@ -6645,6 +6648,7 @@ class ComputeManager(manager.Manager):
# this is called a second time because
# multi_host does not create the bridge in
# plug_vifs
# NOTE(mriedem): This is a no-op for neutron.
self.network_api.setup_networks_on_host(context, instance,
self.host)
migration = {'source_compute': instance.host,
@ -6691,10 +6695,21 @@ class ComputeManager(manager.Manager):
instance.progress = 0
instance.save(expected_task_state=task_states.MIGRATING)
# NOTE(tr3buchet): tear down networks on source host
self.network_api.setup_networks_on_host(context, instance,
prev_host, teardown=True)
# NOTE(vish): this is necessary to update dhcp
# NOTE(tr3buchet): tear down networks on source host (nova-net)
# NOTE(mriedem): For neutron, this will delete any inactive source
# host port bindings.
try:
self.network_api.setup_networks_on_host(context, instance,
prev_host, teardown=True)
except exception.PortBindingDeletionFailed as e:
# Removing the inactive port bindings from the source host is not
# critical so just log an error but don't fail.
LOG.error('Network cleanup failed for source host %s during post '
'live migration. You may need to manually clean up '
'resources in the network service. Error: %s',
prev_host, six.text_type(e))
# NOTE(vish): this is necessary to update dhcp for nova-network
# NOTE(mriedem): This is a no-op for neutron.
self.network_api.setup_networks_on_host(context, instance, self.host)
self._notify_about_instance_usage(
context, instance, "live_migration.post.dest.end",
@ -6742,7 +6757,9 @@ class ComputeManager(manager.Manager):
instance.progress = 0
instance.save(expected_task_state=[task_states.MIGRATING])
# NOTE(tr3buchet): setup networks on source host (really it's re-setup)
# NOTE(tr3buchet): setup networks on source host (really it's re-setup
# for nova-network)
# NOTE(mriedem): This is a no-op for neutron.
self.network_api.setup_networks_on_host(context, instance, self.host)
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
@ -6785,8 +6802,18 @@ class ComputeManager(manager.Manager):
# The port binding profiles need to be cleaned up.
with errors_out_migration_ctxt(migration):
try:
# This call will delete any inactive destination host
# port bindings.
self.network_api.setup_networks_on_host(
context, instance, teardown=True)
context, instance, host=dest, teardown=True)
except exception.PortBindingDeletionFailed as e:
# Removing the inactive port bindings from the destination
# host is not critical so just log an error but don't fail.
LOG.error(
'Network cleanup failed for destination host %s '
'during live migration rollback. You may need to '
'manually clean up resources in the network service. '
'Error: %s', dest, six.text_type(e))
except Exception:
with excutils.save_and_reraise_exception():
LOG.exception(
@ -6826,9 +6853,25 @@ class ComputeManager(manager.Manager):
action=fields.NotificationAction.LIVE_MIGRATION_ROLLBACK_DEST,
phase=fields.NotificationPhase.START)
try:
# NOTE(tr3buchet): tear down networks on destination host
# NOTE(tr3buchet): tear down networks on dest host (nova-net)
# NOTE(mriedem): For neutron, this call will delete any
# destination host port bindings.
# TODO(mriedem): We should eventually remove this call from
# this method (rollback_live_migration_at_destination) since this
# method is only called conditionally based on whether or not the
# instance is running on shared storage. _rollback_live_migration
# already calls this method for neutron if we are running on
# shared storage.
self.network_api.setup_networks_on_host(context, instance,
self.host, teardown=True)
except exception.PortBindingDeletionFailed as e:
# Removing the inactive port bindings from the destination
# host is not critical so just log an error but don't fail.
LOG.error(
'Network cleanup failed for destination host %s '
'during live migration rollback. You may need to '
'manually clean up resources in the network service. '
'Error: %s', self.host, six.text_type(e))
except Exception:
with excutils.save_and_reraise_exception():
# NOTE(tdurakov): even if teardown networks fails driver

View File

@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 34
SERVICE_VERSION = 35
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -143,6 +143,9 @@ SERVICE_VERSION_HISTORY = (
{'compute_rpc': '5.0'},
# Version 34: Adds support to abort queued/preparing live migrations.
{'compute_rpc': '5.0'},
# Version 35: Indicates that nova-compute supports live migration with
# ports bound early on the destination host using VIFMigrateData.
{'compute_rpc': '5.0'},
)

View File

@ -6658,7 +6658,7 @@ class ComputeTestCase(BaseTestCase,
bdms=bdms)])
mock_nw_api.setup_networks_on_host.assert_has_calls([
mock.call(c, instance, self.compute.host),
mock.call(c, instance, teardown=True)
mock.call(c, instance, host='foo', teardown=True)
])
mock_ra.assert_called_once_with(mock.ANY, instance, migration)
mock_mig_save.assert_called_once_with()
@ -6714,7 +6714,7 @@ class ComputeTestCase(BaseTestCase,
# 'do_cleanup'.
mock_nw_setup.assert_has_calls([
mock.call(ctxt, instance, self.compute.host),
mock.call(ctxt, instance, teardown=True)
mock.call(ctxt, instance, host='dest-host', teardown=True)
])
mock_ra.assert_called_once_with(ctxt, instance, migration)
mock_mig_save.assert_called_once_with()

View File

@ -7497,6 +7497,41 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
_do_test()
@mock.patch('nova.compute.manager.LOG.error')
@mock.patch('nova.compute.utils.notify_about_instance_action')
def test_post_live_migration_at_destination_port_binding_delete_fails(
self, mock_notify, mock_log_error):
"""Tests that neutron fails to delete the source host port bindings
but we handle the error and just log it.
"""
@mock.patch.object(self.compute, '_notify_about_instance_usage')
@mock.patch.object(self.compute, 'network_api')
@mock.patch.object(self.compute, '_get_instance_block_device_info')
@mock.patch.object(self.compute, '_get_power_state',
return_value=power_state.RUNNING)
@mock.patch.object(self.compute, '_get_compute_info',
return_value=objects.ComputeNode(
hypervisor_hostname='fake-dest-host'))
@mock.patch.object(self.instance, 'save')
def _do_test(instance_save, get_compute_node, get_power_state,
get_bdms, network_api, legacy_notify):
# setup_networks_on_host is called three times:
# 1. set the migrating_to port binding profile value (no-op)
# 2. delete the source host port bindings - we make this raise
# 3. once more to update dhcp for nova-network (no-op for neutron)
network_api.setup_networks_on_host.side_effect = [
None,
exception.PortBindingDeletionFailed(
port_id=uuids.port_id, host='fake-source-host'),
None]
self.compute.post_live_migration_at_destination(
self.context, self.instance, block_migration=False)
self.assertEqual(1, mock_log_error.call_count)
self.assertIn('Network cleanup failed for source host',
mock_log_error.call_args[0][0])
_do_test()
@mock.patch('nova.objects.ConsoleAuthToken.'
'clean_console_auths_for_instance')
def _call_post_live_migration(self, mock_clean, *args, **kwargs):
@ -7718,6 +7753,70 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
_test()
@mock.patch('nova.compute.manager.LOG.error')
@mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid',
return_value=objects.BlockDeviceMappingList())
@mock.patch('nova.compute.utils.notify_about_instance_action')
def test_rollback_live_migration_port_binding_delete_fails(
self, mock_notify, mock_get_bdms, mock_log_error):
"""Tests that neutron fails to delete the destination host port
bindings but we handle the error and just log it.
"""
migrate_data = objects.LibvirtLiveMigrateData(
migration=self.migration, is_shared_instance_path=True,
is_shared_block_storage=True)
@mock.patch.object(self.compute, '_revert_allocation')
@mock.patch.object(self.instance, 'save')
@mock.patch.object(self.compute, 'network_api')
@mock.patch.object(self.compute, '_notify_about_instance_usage')
def _do_test(legacy_notify, network_api, instance_save,
_revert_allocation):
# setup_networks_on_host is called two times:
# 1. set the migrating_to attribute in the port binding profile,
# which is a no-op in this case for neutron
# 2. delete the dest host port bindings - we make this raise
network_api.setup_networks_on_host.side_effect = [
None,
exception.PortBindingDeletionFailed(
port_id=uuids.port_id, host='fake-dest-host')]
self.compute._rollback_live_migration(
self.context, self.instance, 'fake-dest-host', migrate_data)
self.assertEqual(1, mock_log_error.call_count)
self.assertIn('Network cleanup failed for destination host',
mock_log_error.call_args[0][0])
_do_test()
@mock.patch('nova.compute.manager.LOG.error')
def test_rollback_live_migration_at_destination_port_binding_delete_fails(
self, mock_log_error):
"""Tests that neutron fails to delete the destination host port
bindings but we handle the error and just log it.
"""
@mock.patch.object(self.compute, '_notify_about_instance_usage')
@mock.patch.object(self.compute, 'network_api')
@mock.patch.object(self.compute, '_get_instance_block_device_info')
@mock.patch.object(self.compute.driver,
'rollback_live_migration_at_destination')
def _do_test(driver_rollback, get_bdms, network_api, legacy_notify):
self.compute.network_api.setup_networks_on_host.side_effect = (
exception.PortBindingDeletionFailed(
port_id=uuids.port_id, host='fake-dest-host'))
self.compute.rollback_live_migration_at_destination(
self.context, self.instance, destroy_disks=False,
migrate_data=mock.sentinel.migrate_data)
self.assertEqual(1, mock_log_error.call_count)
self.assertIn('Network cleanup failed for destination host',
mock_log_error.call_args[0][0])
driver_rollback.assert_called_once_with(
self.context, self.instance,
network_api.get_instance_nw_info.return_value,
get_bdms.return_value, destroy_disks=False,
migrate_data=mock.sentinel.migrate_data)
_do_test()
def _get_migration(self, migration_id, status, migration_type):
migration = objects.Migration()
migration.id = migration_id