From 4b53e4edf9f7f8c794316d3a0920d727723d1bab Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Tue, 26 Jun 2018 14:42:47 +0100 Subject: [PATCH] Ensure attachment cleanup on failure in driver.pre_live_migration Previously, if the call to driver.pre_live_migration failed (which in libvirt can happen with a DestinationDiskExists exception), the compute manager wouldn't rollback/cleanup volume attachments, leading to corrupt volume attachment information, and, depending on the backend, the instance being unable to access its volume. This patch moves the driver.pre_live_migration call inside the existing try/except, allowing the compute manager to properly rollback/cleanup volume attachments. The compute manager has its own _rollback_live_migration() cleanup in case the pre_live_migration() RPC call to the destination fails. There should be no conflicts between the cleanup in that and the new volume cleanup in the except block. The remove_volume_connection() -> driver_detach() -> detach_volume() call catches the InstanceNotFound exception and warns about the instance disappearing (it was never really on the destination in the first place). The attachment_delete() in _rollback_live_migration() is contingent on there being an old_vol_attachment in migrate_data, which there isn't because pre_live_migration() raised instead of returning. Conflicts in nova/compute/manager.py due to absence of setting migrate_data.wait_for_vif_plug from 5aadff75c3a. Change-Id: I67f66e95d69ae6df22e539550a3eac697ea8f5d8 Closes-bug: 1778206 (cherry picked from commit 1a29248d5e688ba1d4f806895dccd45fcb34b833) (cherry picked from commit 6aecf475dbff9c7d477378c021749b987d47deae) --- nova/compute/manager.py | 52 ++++++------- nova/tests/unit/compute/test_compute_mgr.py | 82 +++++++++++++++++++-- 2 files changed, 102 insertions(+), 32 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index a23343708393..df4667877f72 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -5938,8 +5938,8 @@ class ComputeManager(manager.Manager): action=fields.NotificationAction.LIVE_MIGRATION_PRE, phase=fields.NotificationPhase.START, bdms=bdms) + connector = self.driver.get_volume_connector(instance) try: - connector = self.driver.get_volume_connector(instance) for bdm in bdms: if bdm.is_volume and bdm.attachment_id is not None: # This bdm uses the new cinder v3.44 API. @@ -5964,6 +5964,31 @@ class ComputeManager(manager.Manager): # update the bdm with the new attachment_id. bdm.attachment_id = attach_ref['id'] bdm.save() + + block_device_info = self._get_instance_block_device_info( + context, instance, refresh_conn_info=True, + bdms=bdms) + + migrate_data = self.driver.pre_live_migration(context, + instance, + block_device_info, + network_info, + disk, + migrate_data) + LOG.debug('driver pre_live_migration data is %s', migrate_data) + + # NOTE(tr3buchet): setup networks on destination host + self.network_api.setup_networks_on_host(context, instance, + self.host) + + # Creating filters to hypervisors and firewalls. + # An example is that nova-instance-instance-xxx, + # which is written to libvirt.xml(Check "virsh nwfilter-list") + # This nwfilter is necessary on the destination host. + # In addition, this method is creating filtering rule + # onto destination host. + self.driver.ensure_filtering_rules_for_instance(instance, + network_info) except Exception: # If we raise, migrate_data with the updated attachment ids # will not be returned to the source host for rollback. @@ -5978,18 +6003,6 @@ class ComputeManager(manager.Manager): bdm.attachment_id = old_attachments[bdm.volume_id] bdm.save() - block_device_info = self._get_instance_block_device_info( - context, instance, refresh_conn_info=True, - bdms=bdms) - - migrate_data = self.driver.pre_live_migration(context, - instance, - block_device_info, - network_info, - disk, - migrate_data) - LOG.debug('driver pre_live_migration data is %s', migrate_data) - # Volume connections are complete, tell cinder that all the # attachments have completed. for bdm in bdms: @@ -5997,19 +6010,6 @@ class ComputeManager(manager.Manager): self.volume_api.attachment_complete(context, bdm.attachment_id) - # NOTE(tr3buchet): setup networks on destination host - self.network_api.setup_networks_on_host(context, instance, - self.host) - - # Creating filters to hypervisors and firewalls. - # An example is that nova-instance-instance-xxx, - # which is written to libvirt.xml(Check "virsh nwfilter-list") - # This nwfilter is necessary on the destination host. - # In addition, this method is creating filtering rule - # onto destination host. - self.driver.ensure_filtering_rules_for_instance(instance, - network_info) - self._notify_about_instance_usage( context, instance, "live_migration.pre.end", network_info=network_info) diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index a736bcfdc418..4b32b92d9bdd 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -6989,21 +6989,17 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase): @mock.patch.object(compute, '_notify_about_instance_usage') @mock.patch('nova.compute.utils.notify_about_instance_action') @mock.patch.object(compute, 'network_api') - @mock.patch.object(compute.driver, 'pre_live_migration') - @mock.patch.object(compute, '_get_instance_block_device_info') - @mock.patch.object(compute_utils, 'is_volume_backed_instance') @mock.patch.object(objects.BlockDeviceMappingList, 'get_by_instance_uuid') @mock.patch.object(compute.volume_api, 'attachment_delete') @mock.patch.object(compute.volume_api, 'attachment_create') def _test(mock_attach_create, mock_attach_delete, mock_get_bdms, - mock_ivbi, mock_gibdi, mock_plm, mock_nwapi, mock_ver_notify, - mock_notify, mock_bdm_save, mock_exception): + mock_nwapi, mock_ver_notify, mock_notify, mock_bdm_save, + mock_exception): new_attachment_id = uuids.attachment3 mock_attach_create.side_effect = [{'id': new_attachment_id}, test.TestingException] mock_get_bdms.return_value = [vol1_bdm, vol2_bdm] - mock_plm.return_value = migrate_data self.assertRaises(test.TestingException, compute.pre_live_migration, @@ -7014,6 +7010,80 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase): self.assertEqual(mock_attach_create.call_count, 2) mock_attach_delete.assert_called_once_with(self.context, new_attachment_id) + + # Meta: ensure un-asserted mocks are still required + for m in (mock_nwapi, mock_get_bdms, mock_ver_notify, mock_notify, + mock_bdm_save, mock_exception): + # NOTE(artom) This is different from assert_called() because + # mock_calls contains the calls to a mock's method as well + # (which is what we want for network_api.get_instance_nw_info + # for example), whereas assert_called() only asserts + # calls to the mock itself. + self.assertGreater(len(m.mock_calls), 0) + _test() + + def test_pre_live_migration_exceptions_delete_attachments(self): + # The instance in this test has 2 attachments. The call to + # driver.pre_live_migration will raise an exception. This will test + # that the attachments are restored after the exception is thrown. + compute = manager.ComputeManager() + + instance = fake_instance.fake_instance_obj(self.context, + uuid=uuids.instance) + vol1_bdm = fake_block_device.fake_bdm_object( + self.context, + {'source_type': 'volume', 'destination_type': 'volume', + 'volume_id': uuids.vol1, 'device_name': '/dev/vdb', + 'instance_uuid': instance.uuid, + 'connection_info': '{"test": "test"}'}) + vol1_bdm.attachment_id = uuids.vol1_attach_orig + + vol2_bdm = fake_block_device.fake_bdm_object( + self.context, + {'source_type': 'volume', 'destination_type': 'volume', + 'volume_id': uuids.vol2, 'device_name': '/dev/vdc', + 'instance_uuid': instance.uuid, + 'connection_info': '{"test": "test"}'}) + vol2_bdm.attachment_id = uuids.vol2_attach_orig + + migrate_data = migrate_data_obj.LiveMigrateData() + migrate_data.old_vol_attachment_ids = {} + + @mock.patch.object(manager, 'compute_utils', autospec=True) + @mock.patch.object(compute, 'network_api', autospec=True) + @mock.patch.object(compute, 'volume_api', autospec=True) + @mock.patch.object(objects.BlockDeviceMapping, 'save') + @mock.patch.object(objects.BlockDeviceMappingList, + 'get_by_instance_uuid') + @mock.patch.object(compute.driver, 'pre_live_migration', autospec=True) + def _test(mock_plm, mock_bdms_get, mock_bdm_save, mock_vol_api, + mock_net_api, mock_compute_utils): + mock_vol_api.attachment_create.side_effect = [ + {'id': uuids.vol1_attach_new}, + {'id': uuids.vol2_attach_new}] + mock_bdms_get.return_value = [vol1_bdm, vol2_bdm] + mock_plm.side_effect = test.TestingException + + self.assertRaises(test.TestingException, + compute.pre_live_migration, + self.context, instance, False, {}, migrate_data) + + self.assertEqual(2, mock_vol_api.attachment_create.call_count) + + # Assert BDMs have original attachments restored + self.assertEqual(uuids.vol1_attach_orig, vol1_bdm.attachment_id) + self.assertEqual(uuids.vol2_attach_orig, vol2_bdm.attachment_id) + + # Assert attachment cleanup + self.assertEqual(2, mock_vol_api.attachment_delete.call_count) + mock_vol_api.attachment_delete.assert_has_calls( + [mock.call(self.context, uuids.vol1_attach_new), + mock.call(self.context, uuids.vol2_attach_new)], + any_order=True) + + # Meta: ensure un-asserted mocks are still required + for m in (mock_net_api, mock_compute_utils): + self.assertGreater(len(m.mock_calls), 0) _test() def test_get_neutron_events_for_live_migration_empty(self):