From c61ec9d98b7ca22fb6b68a80a9b382251a537a83 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Fri, 6 Apr 2018 20:28:53 -0400 Subject: [PATCH] Don't persist RequestSpec.retry During a resize, the RequestSpec.flavor is updated to the new flavor. If the resize failed on one host and was rescheduled, the RequestSpec.retry is updated for that failed host and mistakenly persisted, which can affect later move operations, like if an admin targets one of those previously failed hosts for a live migration or evacuate operation. This change fixes the problem by not ever persisting the RequestSpec.retry field to the database, since retries are per-request/operation and not something that needs to be persisted. Alternative to this, we could reset the retry field in the RequestSpec.reset_forced_destinations method but that would be slightly overloading the meaning of that method, and the approach taken in this patch is arguably cleaner since retries shouldn't ever be persisted. It should be noted, however, that one advantage to resetting the 'retry' field in the RequestSpec.reset_forced_destinations method would be to avoid this issue for any existing DB entries that have this problem. The related functional regression test is updated to show the bug is now fixed. NOTE(mriedem): This backport also includes change I61f745667f4c003d7e3ca6f2f9a99194930ac892 squashed into it in order to not re-introduce that bug. On Ocata it must be adjusted slightly to pass a string rather than list to _wait_for_migration_status since I752617066bb2167b49239ab9d17b0c89754a3e12 is not in Ocata. NOTE(mriedem): This patch has to pull some changes from two other patches to make live migration work in the fake virt driver: ce893e37f and b97c433f7. Change-Id: Iadbf8ec935565a6d4ccf6f36ef630ab6bf1bea5d Closes-Bug: #1718512 (cherry picked from commit 6647f11dc1aba89f9b0e2703f236a43f31d88079) (cherry picked from commit 757dbd17cf37aecea005dfdc954bf50bbddedd95) (cherry picked from commit 878e99d1f82fbeec840b2dbab8e40c27127d88ba) --- nova/objects/request_spec.py | 3 +++ nova/test.py | 3 +++ .../regressions/test_bug_1718512.py | 20 +++++++--------- nova/tests/unit/objects/test_request_spec.py | 3 ++- nova/virt/fake.py | 23 ++++++++++++++++--- 5 files changed, 36 insertions(+), 16 deletions(-) diff --git a/nova/objects/request_spec.py b/nova/objects/request_spec.py index f4f7320e59d0..f3529f3cd8ac 100644 --- a/nova/objects/request_spec.py +++ b/nova/objects/request_spec.py @@ -494,6 +494,9 @@ class RequestSpec(base.NovaObject): if 'instance_group' in spec and spec.instance_group: spec.instance_group.members = None spec.instance_group.hosts = None + # NOTE(mriedem): Don't persist retries since those are per-request + if 'retry' in spec and spec.retry: + spec.retry = None db_updates = {'spec': jsonutils.dumps(spec.obj_to_primitive())} if 'instance_uuid' in updates: diff --git a/nova/test.py b/nova/test.py index 8288adb6823d..f757c262bcb0 100644 --- a/nova/test.py +++ b/nova/test.py @@ -357,6 +357,9 @@ class TestCase(testtools.TestCase): cell_mapping=cell) hm.create() self.host_mappings[hm.host] = hm + if host is not None: + # Make sure that CONF.host is relevant to the right hostname + self.useFixture(nova_fixtures.ConfPatcher(host=host)) return svc.service diff --git a/nova/tests/functional/regressions/test_bug_1718512.py b/nova/tests/functional/regressions/test_bug_1718512.py index 70918ad49a24..29f0ceb48dc1 100644 --- a/nova/tests/functional/regressions/test_bug_1718512.py +++ b/nova/tests/functional/regressions/test_bug_1718512.py @@ -152,17 +152,13 @@ class TestRequestSpecRetryReschedule(test.TestCase, data = {'os-migrateLive': {'host': 'host2', 'block_migration': 'auto'}} self.admin_api.post_server_action(server['id'], data) server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') - # FIXME(mriedem): This is bug 1718512 where the failed resize left - # host2 in the RequestSpec.retry field and it affects the live migrate - # to host2 because the scheduler RetryFilter kicks it out. - self.assertEqual('host3', server['OS-EXT-SRV-ATTR:host']) - migrations = self.admin_api.api_get( - 'os-migrations?instance_uuid=%s&migration_type=live-migration' % - server['id']).body['migrations'] - self.assertEqual(1, len(migrations)) - self.assertEqual('error', migrations[0]['status']) + self.assertEqual('host2', server['OS-EXT-SRV-ATTR:host']) + # NOTE(mriedem): The instance status effectively goes to ACTIVE before + # the migration status is changed to "completed" since + # post_live_migration_at_destination changes the instance status + # and _post_live_migration changes the migration status later. So we + # need to poll the migration record until it's complete or we timeout. + self._wait_for_migration_status(server, 'completed') reqspec = objects.RequestSpec.get_by_instance_uuid( nova_context.get_admin_context(), server['id']) - self.assertIsNotNone(reqspec.retry) - self.assertEqual(1, reqspec.retry.num_attempts) - self.assertEqual('host2', reqspec.retry.hosts[0].host) + self.assertIsNone(reqspec.retry) diff --git a/nova/tests/unit/objects/test_request_spec.py b/nova/tests/unit/objects/test_request_spec.py index 00faeff3ce3c..52734fbd5b6a 100644 --- a/nova/tests/unit/objects/test_request_spec.py +++ b/nova/tests/unit/objects/test_request_spec.py @@ -535,13 +535,14 @@ class _TestRequestSpecObject(object): # object fields for field in ['image', 'numa_topology', 'pci_requests', 'flavor', - 'retry', 'limits']: + 'limits']: self.assertEqual( getattr(req_obj, field).obj_to_primitive(), getattr(serialized_obj, field).obj_to_primitive()) self.assertIsNone(serialized_obj.instance_group.members) self.assertIsNone(serialized_obj.instance_group.hosts) + self.assertIsNone(serialized_obj.retry) def test_create(self): req_obj = fake_request_spec.fake_spec_obj(remove_id=True) diff --git a/nova/virt/fake.py b/nova/virt/fake.py index 1a4d5bec6852..1bdad2ebb212 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -37,6 +37,7 @@ from nova.console import type as ctype from nova import exception from nova.i18n import _LW from nova.objects import fields as obj_fields +from nova.objects import migrate_data from nova.virt import diagnostics from nova.virt import driver from nova.virt import hardware @@ -489,11 +490,27 @@ class FakeDriver(driver.ComputeDriver): src_compute_info, dst_compute_info, block_migration=False, disk_over_commit=False): - return {} + data = migrate_data.LibvirtLiveMigrateData() + data.filename = 'fake' + data.image_type = CONF.libvirt.images_type + data.graphics_listen_addr_vnc = CONF.vnc.vncserver_listen + data.graphics_listen_addr_spice = CONF.spice.server_listen + data.serial_listen_addr = None + # Notes(eliqiao): block_migration and disk_over_commit are not + # nullable, so just don't set them if they are None + if block_migration is not None: + data.block_migration = block_migration + if disk_over_commit is not None: + data.disk_over_commit = disk_over_commit + data.disk_available_mb = 100000 + data.is_shared_block_storage = True + data.is_shared_instance_path = True + + return data def check_can_live_migrate_source(self, context, instance, dest_check_data, block_device_info=None): - return + return dest_check_data def finish_migration(self, context, migration, instance, disk_info, network_info, image_meta, resize_instance, @@ -505,7 +522,7 @@ class FakeDriver(driver.ComputeDriver): def pre_live_migration(self, context, instance, block_device_info, network_info, disk_info, migrate_data): - return + return migrate_data def unfilter_instance(self, instance, network_info): return