From ac85b76178017b4bda30502a6ebb9c990435ec72 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Tue, 1 Oct 2019 17:30:54 -0400 Subject: [PATCH] Set Instance AZ from Selection AZ during migrate reschedule This builds on change Ia50c5f4dd2204f1cafa669097d1e744479c4d8c8 to use the Selection.availability_zone value when rescheduling during a resize or cold migrate so that the cell conductor does not have to make an up-call to the aggregates table in the API DB which will fail if the cell conductor is not configured to use the API DB. The functional test added in Ic6926eecda1f9dd7183d66c67f04f308f6a1799d is updated to show the failure is gone and we get the AZ from the Selection object during the reschedule. For the case that the availability_zone field is not in the Selection object, there are existing unit tests in nova.tests.unit.conductor.tasks.test_migrate which will make sure we are not unconditionally trying to access the Selection.availability_zone field. Change-Id: I103d5023d3a3a7c367c7eea7fb103cb8ec52accf Closes-Bug: #1781286 --- nova/conductor/tasks/migrate.py | 11 ++++-- .../regressions/test_bug_1781286.py | 39 ++++++++----------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/nova/conductor/tasks/migrate.py b/nova/conductor/tasks/migrate.py index e14828282db1..6036a5aa55c1 100644 --- a/nova/conductor/tasks/migrate.py +++ b/nova/conductor/tasks/migrate.py @@ -352,9 +352,14 @@ class MigrationTask(base.TaskBase): (host, node) = (selection.service_host, selection.nodename) - self.instance.availability_zone = ( - availability_zones.get_host_availability_zone( - self.context, host)) + # The availability_zone field was added in v1.1 of the Selection + # object so make sure to handle the case where it is missing. + if 'availability_zone' in selection: + self.instance.availability_zone = selection.availability_zone + else: + self.instance.availability_zone = ( + availability_zones.get_host_availability_zone( + self.context, host)) LOG.debug("Calling prep_resize with selected host: %s; " "Selected node: %s; Alternates: %s", host, node, diff --git a/nova/tests/functional/regressions/test_bug_1781286.py b/nova/tests/functional/regressions/test_bug_1781286.py index 063f0a12caa1..857fa81d6e92 100644 --- a/nova/tests/functional/regressions/test_bug_1781286.py +++ b/nova/tests/functional/regressions/test_bug_1781286.py @@ -124,6 +124,7 @@ class RescheduleMigrateAvailabilityZoneUpCall( self.addCleanup(fake_notifier.reset) def test_migrate_reschedule_blocked_az_up_call(self): + self.flags(default_availability_zone='us-central') # We need to stub out the call to get_host_availability_zone to blow # up once we have gone to the compute service. original_prep_resize = compute_manager.ComputeManager._prep_resize @@ -154,26 +155,20 @@ class RescheduleMigrateAvailabilityZoneUpCall( # Now cold migrate the server to the other host. self.api.post_server_action(server['id'], {'migrate': None}) - - # FIXME(mriedem): This is bug 1781286 where we reschedule from the - # first selected host to conductor which will try to get the AZ for the - # alternate host selection which will fail since it cannot access the - # API DB. + # Because we poisoned AggregateList.get_by_host after hitting the + # compute service we have to wait for the notification that the resize + # is complete and then stop the mock so we can use the API again. fake_notifier.wait_for_versioned_notifications( - 'compute_task.migrate_server.error') - server = self._wait_for_server_parameter( - self.api, server, {'status': 'ERROR', - 'OS-EXT-SRV-ATTR:host': original_host, - 'OS-EXT-STS:task_state': None}) - # Assert there is a fault injected on the server. This is a bit - # annoying in that we would expect to see CantStartEngineError but - # because of how ComputeManager._reschedule_resize_or_reraise works. - # the reschedule call to conductor is an RPC call so that exception - # comes back to compute which injects a fault but then re-raises the - # ComputeResourcesUnavailable exception which gets recorded as the most - # recent fault which is what shows up in the API. So instead we assert - # that the reschedule happened and assert the mocked method was called. - self.assertIn('Insufficient compute resources', - server['fault']['message']) - self.assertIsNotNone(self.rescheduled) - self.agg_mock.assert_called_once() + 'instance.resize_finish.end') + # Note that we use stopall here because we actually called _prep_resize + # twice so we have more than one instance of the mock that needs to be + # stopped. + mock.patch.stopall() + server = self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE') + final_host = server['OS-EXT-SRV-ATTR:host'] + self.assertNotIn(final_host, [original_host, self.rescheduled]) + # We should have rescheduled and the instance AZ should be set from the + # Selection object. Since neither compute host is in an AZ, the server + # is in the default AZ from config. + self.assertEqual('us-central', server['OS-EXT-AZ:availability_zone']) + self.agg_mock.assert_not_called()