Ensure attachment cleanup on failure in driver.pre_live_migration

Previously, if the call to driver.pre_live_migration failed (which in
libvirt can happen with a DestinationDiskExists exception), the
compute manager wouldn't rollback/cleanup volume attachments, leading
to corrupt volume attachment information, and, depending on the
backend, the instance being unable to access its volume. This patch
moves the driver.pre_live_migration call inside the existing
try/except, allowing the compute manager to properly rollback/cleanup
volume attachments.

The compute manager has its own _rollback_live_migration() cleanup in
case the pre_live_migration() RPC call to the destination fails. There
should be no conflicts between the cleanup in that and the new volume
cleanup in the except block. The remove_volume_connection() ->
driver_detach() -> detach_volume() call catches the InstanceNotFound
exception and warns about the instance disappearing (it was never
really on the destination in the first place). The attachment_delete()
in _rollback_live_migration() is contingent on there being an
old_vol_attachment in migrate_data, which there isn't because
pre_live_migration() raised instead of returning.

Conflicts in nova/compute/manager.py due to absence of setting
migrate_data.wait_for_vif_plug from 5aadff75c3.

Change-Id: I67f66e95d69ae6df22e539550a3eac697ea8f5d8
Closes-bug: 1778206
(cherry picked from commit 1a29248d5e)
(cherry picked from commit 6aecf475db)
This commit is contained in:
Matthew Booth 2018-06-26 14:42:47 +01:00 committed by Artom Lifshitz
parent 6b065dad80
commit 4b53e4edf9
2 changed files with 102 additions and 32 deletions

View File

@ -5938,8 +5938,8 @@ class ComputeManager(manager.Manager):
action=fields.NotificationAction.LIVE_MIGRATION_PRE,
phase=fields.NotificationPhase.START, bdms=bdms)
connector = self.driver.get_volume_connector(instance)
try:
connector = self.driver.get_volume_connector(instance)
for bdm in bdms:
if bdm.is_volume and bdm.attachment_id is not None:
# This bdm uses the new cinder v3.44 API.
@ -5964,6 +5964,31 @@ class ComputeManager(manager.Manager):
# update the bdm with the new attachment_id.
bdm.attachment_id = attach_ref['id']
bdm.save()
block_device_info = self._get_instance_block_device_info(
context, instance, refresh_conn_info=True,
bdms=bdms)
migrate_data = self.driver.pre_live_migration(context,
instance,
block_device_info,
network_info,
disk,
migrate_data)
LOG.debug('driver pre_live_migration data is %s', migrate_data)
# NOTE(tr3buchet): setup networks on destination host
self.network_api.setup_networks_on_host(context, instance,
self.host)
# Creating filters to hypervisors and firewalls.
# An example is that nova-instance-instance-xxx,
# which is written to libvirt.xml(Check "virsh nwfilter-list")
# This nwfilter is necessary on the destination host.
# In addition, this method is creating filtering rule
# onto destination host.
self.driver.ensure_filtering_rules_for_instance(instance,
network_info)
except Exception:
# If we raise, migrate_data with the updated attachment ids
# will not be returned to the source host for rollback.
@ -5978,18 +6003,6 @@ class ComputeManager(manager.Manager):
bdm.attachment_id = old_attachments[bdm.volume_id]
bdm.save()
block_device_info = self._get_instance_block_device_info(
context, instance, refresh_conn_info=True,
bdms=bdms)
migrate_data = self.driver.pre_live_migration(context,
instance,
block_device_info,
network_info,
disk,
migrate_data)
LOG.debug('driver pre_live_migration data is %s', migrate_data)
# Volume connections are complete, tell cinder that all the
# attachments have completed.
for bdm in bdms:
@ -5997,19 +6010,6 @@ class ComputeManager(manager.Manager):
self.volume_api.attachment_complete(context,
bdm.attachment_id)
# NOTE(tr3buchet): setup networks on destination host
self.network_api.setup_networks_on_host(context, instance,
self.host)
# Creating filters to hypervisors and firewalls.
# An example is that nova-instance-instance-xxx,
# which is written to libvirt.xml(Check "virsh nwfilter-list")
# This nwfilter is necessary on the destination host.
# In addition, this method is creating filtering rule
# onto destination host.
self.driver.ensure_filtering_rules_for_instance(instance,
network_info)
self._notify_about_instance_usage(
context, instance, "live_migration.pre.end",
network_info=network_info)

View File

@ -6989,21 +6989,17 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
@mock.patch.object(compute, '_notify_about_instance_usage')
@mock.patch('nova.compute.utils.notify_about_instance_action')
@mock.patch.object(compute, 'network_api')
@mock.patch.object(compute.driver, 'pre_live_migration')
@mock.patch.object(compute, '_get_instance_block_device_info')
@mock.patch.object(compute_utils, 'is_volume_backed_instance')
@mock.patch.object(objects.BlockDeviceMappingList,
'get_by_instance_uuid')
@mock.patch.object(compute.volume_api, 'attachment_delete')
@mock.patch.object(compute.volume_api, 'attachment_create')
def _test(mock_attach_create, mock_attach_delete, mock_get_bdms,
mock_ivbi, mock_gibdi, mock_plm, mock_nwapi, mock_ver_notify,
mock_notify, mock_bdm_save, mock_exception):
mock_nwapi, mock_ver_notify, mock_notify, mock_bdm_save,
mock_exception):
new_attachment_id = uuids.attachment3
mock_attach_create.side_effect = [{'id': new_attachment_id},
test.TestingException]
mock_get_bdms.return_value = [vol1_bdm, vol2_bdm]
mock_plm.return_value = migrate_data
self.assertRaises(test.TestingException,
compute.pre_live_migration,
@ -7014,6 +7010,80 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
self.assertEqual(mock_attach_create.call_count, 2)
mock_attach_delete.assert_called_once_with(self.context,
new_attachment_id)
# Meta: ensure un-asserted mocks are still required
for m in (mock_nwapi, mock_get_bdms, mock_ver_notify, mock_notify,
mock_bdm_save, mock_exception):
# NOTE(artom) This is different from assert_called() because
# mock_calls contains the calls to a mock's method as well
# (which is what we want for network_api.get_instance_nw_info
# for example), whereas assert_called() only asserts
# calls to the mock itself.
self.assertGreater(len(m.mock_calls), 0)
_test()
def test_pre_live_migration_exceptions_delete_attachments(self):
# The instance in this test has 2 attachments. The call to
# driver.pre_live_migration will raise an exception. This will test
# that the attachments are restored after the exception is thrown.
compute = manager.ComputeManager()
instance = fake_instance.fake_instance_obj(self.context,
uuid=uuids.instance)
vol1_bdm = fake_block_device.fake_bdm_object(
self.context,
{'source_type': 'volume', 'destination_type': 'volume',
'volume_id': uuids.vol1, 'device_name': '/dev/vdb',
'instance_uuid': instance.uuid,
'connection_info': '{"test": "test"}'})
vol1_bdm.attachment_id = uuids.vol1_attach_orig
vol2_bdm = fake_block_device.fake_bdm_object(
self.context,
{'source_type': 'volume', 'destination_type': 'volume',
'volume_id': uuids.vol2, 'device_name': '/dev/vdc',
'instance_uuid': instance.uuid,
'connection_info': '{"test": "test"}'})
vol2_bdm.attachment_id = uuids.vol2_attach_orig
migrate_data = migrate_data_obj.LiveMigrateData()
migrate_data.old_vol_attachment_ids = {}
@mock.patch.object(manager, 'compute_utils', autospec=True)
@mock.patch.object(compute, 'network_api', autospec=True)
@mock.patch.object(compute, 'volume_api', autospec=True)
@mock.patch.object(objects.BlockDeviceMapping, 'save')
@mock.patch.object(objects.BlockDeviceMappingList,
'get_by_instance_uuid')
@mock.patch.object(compute.driver, 'pre_live_migration', autospec=True)
def _test(mock_plm, mock_bdms_get, mock_bdm_save, mock_vol_api,
mock_net_api, mock_compute_utils):
mock_vol_api.attachment_create.side_effect = [
{'id': uuids.vol1_attach_new},
{'id': uuids.vol2_attach_new}]
mock_bdms_get.return_value = [vol1_bdm, vol2_bdm]
mock_plm.side_effect = test.TestingException
self.assertRaises(test.TestingException,
compute.pre_live_migration,
self.context, instance, False, {}, migrate_data)
self.assertEqual(2, mock_vol_api.attachment_create.call_count)
# Assert BDMs have original attachments restored
self.assertEqual(uuids.vol1_attach_orig, vol1_bdm.attachment_id)
self.assertEqual(uuids.vol2_attach_orig, vol2_bdm.attachment_id)
# Assert attachment cleanup
self.assertEqual(2, mock_vol_api.attachment_delete.call_count)
mock_vol_api.attachment_delete.assert_has_calls(
[mock.call(self.context, uuids.vol1_attach_new),
mock.call(self.context, uuids.vol2_attach_new)],
any_order=True)
# Meta: ensure un-asserted mocks are still required
for m in (mock_net_api, mock_compute_utils):
self.assertGreater(len(m.mock_calls), 0)
_test()
def test_get_neutron_events_for_live_migration_empty(self):