Don't persist RequestSpec.retry

During a resize, the RequestSpec.flavor is updated to the new flavor. If the resize failed on one host and was rescheduled, the RequestSpec.retry is updated for that failed host and mistakenly persisted, which can affect later move operations, like if an admin targets one of those previously failed hosts for a live migration or evacuate operation. This change fixes the problem by not ever persisting the RequestSpec.retry field to the database, since retries are per-request/operation and not something that needs to be persisted. Alternative to this, we could reset the retry field in the RequestSpec.reset_forced_destinations method but that would be slightly overloading the meaning of that method, and the approach taken in this patch is arguably cleaner since retries shouldn't ever be persisted. It should be noted, however, that one advantage to resetting the 'retry' field in the RequestSpec.reset_forced_destinations method would be to avoid this issue for any existing DB entries that have this problem. The related functional regression test is updated to show the bug is now fixed. NOTE(mriedem): This backport also includes change I61f745667f4c003d7e3ca6f2f9a99194930ac892 squashed into it in order to not re-introduce that bug. Change-Id: Iadbf8ec935565a6d4ccf6f36ef630ab6bf1bea5d Closes-Bug: #1718512 (cherry picked from commit 6647f11dc1)
2018-04-06 20:28:53 -04:00 · 2018-04-06 20:28:53 -04:00 · 757dbd17cf
parent c2dc902e39
commit 757dbd17cf
3 changed files with 13 additions and 13 deletions
--- a/nova/objects/request_spec.py
+++ b/nova/objects/request_spec.py
@ -510,6 +510,9 @@ class RequestSpec(base.NovaObject):
            if 'instance_group' in spec and spec.instance_group:
                spec.instance_group.members = None
                spec.instance_group.hosts = None
+            # NOTE(mriedem): Don't persist retries since those are per-request
+            if 'retry' in spec and spec.retry:
+                spec.retry = None

            db_updates = {'spec': jsonutils.dumps(spec.obj_to_primitive())}
            if 'instance_uuid' in updates:
--- a/nova/tests/functional/regressions/test_bug_1718512.py
+++ b/nova/tests/functional/regressions/test_bug_1718512.py
@ -142,17 +142,13 @@ class TestRequestSpecRetryReschedule(test.TestCase,
        data = {'os-migrateLive': {'host': 'host2', 'block_migration': 'auto'}}
        self.admin_api.post_server_action(server['id'], data)
        server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE')
-        # FIXME(mriedem): This is bug 1718512 where the failed resize left
-        # host2 in the RequestSpec.retry field and it affects the live migrate
-        # to host2 because the scheduler RetryFilter kicks it out.
-        self.assertEqual('host3', server['OS-EXT-SRV-ATTR:host'])
-        migrations = self.admin_api.api_get(
-            'os-migrations?instance_uuid=%s&migration_type=live-migration' %
-            server['id']).body['migrations']
-        self.assertEqual(1, len(migrations))
-        self.assertEqual('error', migrations[0]['status'])
+        self.assertEqual('host2', server['OS-EXT-SRV-ATTR:host'])
+        # NOTE(mriedem): The instance status effectively goes to ACTIVE before
+        # the migration status is changed to "completed" since
+        # post_live_migration_at_destination changes the instance status
+        # and _post_live_migration changes the migration status later. So we
+        # need to poll the migration record until it's complete or we timeout.
+        self._wait_for_migration_status(server, ['completed'])
        reqspec = objects.RequestSpec.get_by_instance_uuid(
            nova_context.get_admin_context(), server['id'])
-        self.assertIsNotNone(reqspec.retry)
-        self.assertEqual(1, reqspec.retry.num_attempts)
-        self.assertEqual('host2', reqspec.retry.hosts[0].host)
+        self.assertIsNone(reqspec.retry)
--- a/nova/tests/unit/objects/test_request_spec.py
+++ b/nova/tests/unit/objects/test_request_spec.py
@ -535,13 +535,14 @@ class _TestRequestSpecObject(object):

        # object fields
        for field in ['image', 'numa_topology', 'pci_requests', 'flavor',
-                'retry', 'limits']:
+                      'limits']:
            self.assertEqual(
                    getattr(req_obj, field).obj_to_primitive(),
                    getattr(serialized_obj, field).obj_to_primitive())

        self.assertIsNone(serialized_obj.instance_group.members)
        self.assertIsNone(serialized_obj.instance_group.hosts)
+        self.assertIsNone(serialized_obj.retry)

    def test_create(self):
        req_obj = fake_request_spec.fake_spec_obj(remove_id=True)