From 004e9acf99964ac78f85d3efbd0a04404bd9a3ef Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Wed, 20 Sep 2017 14:24:44 -0400 Subject: [PATCH] Add regression test for persisted RequestSpec.retry from failed resize Commit 74ab427d4796d8a386f84a15cc49188c2a60f8f1 in Newton added code to persist changes to the RequestSpec during a resize since the flavor changes. That change inadvertantly also persisted any failed hosts during the resize that are stored in the RequestSpec.retry field during a reschedule. The problem is that later those persisted failed hosts are rejected by the RetryFilter, which can be confusing if an admin is trying to live migrate or evacate the instance to one of those specific hosts. This adds a functional regression test to show the failure, which will be fixed in a separate change that then modifies the assertions. NOTE(mriedem): The confirmResize API post call in this version needed the check_response_status=[204] kwarg because commit 8ec0b4390401ce62cab0ea9b3786dc487e26c9f7 isn't in Pike. Change-Id: Ib8a23db838b0bbf2cfb8123cf6aaa39d00ff0640 Related-Bug: #1718512 (cherry picked from commit 89448bea577b30c40ce39185d14fe14f9c61a0c2) (cherry picked from commit c2dc902e39eb345ebf674ad47422f1e72ec170e6) --- .../regressions/test_bug_1718512.py | 159 ++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 nova/tests/functional/regressions/test_bug_1718512.py diff --git a/nova/tests/functional/regressions/test_bug_1718512.py b/nova/tests/functional/regressions/test_bug_1718512.py new file mode 100644 index 000000000000..74596ea55100 --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_1718512.py @@ -0,0 +1,159 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from nova.compute import manager as compute_manager +from nova import context as nova_context +from nova import objects +from nova.scheduler import weights +from nova import test +from nova.tests import fixtures as nova_fixtures +from nova.tests.functional import integrated_helpers +from nova.tests.unit.image import fake as image_fake +from nova.tests.unit import policy_fixture +from nova.virt import fake + + +class HostNameWeigher(weights.BaseHostWeigher): + def _weigh_object(self, host_state, weight_properties): + """Arbitrary preferring host1 over host2 over host3.""" + weights = {'host1': 100, 'host2': 50, 'host3': 1} + return weights.get(host_state.host, 0) + + +class TestRequestSpecRetryReschedule(test.TestCase, + integrated_helpers.InstanceHelperMixin): + """Regression test for bug 1718512 introduced in Newton. + + Contains a test for a regression where an instance builds on one host, + then is resized. During the resize, the first attempted host fails and + the resize is rescheduled to another host which passes. The failed host + is persisted in the RequestSpec.retry field by mistake. Then later when + trying to live migrate the instance to the same host that failed during + resize, it is rejected by the RetryFilter because it's already in the + RequestSpec.retry field. + """ + def setUp(self): + super(TestRequestSpecRetryReschedule, self).setUp() + self.useFixture(policy_fixture.RealPolicyFixture()) + + # The NeutronFixture is needed to stub out validate_networks in API. + self.useFixture(nova_fixtures.NeutronFixture(self)) + + # We need the computes reporting into placement for the filter + # scheduler to pick a host. + self.useFixture(nova_fixtures.PlacementFixture()) + + api_fixture = self.useFixture(nova_fixtures.OSAPIFixture( + api_version='v2.1')) + # The admin API is used to get the server details to verify the + # host on which the server was built. + self.admin_api = api_fixture.admin_api + self.api = api_fixture.api + + # the image fake backend needed for image discovery + image_fake.stub_out_image_service(self) + self.addCleanup(image_fake.FakeImageService_reset) + + self.start_service('conductor') + + # We have to get the image before we use 2.latest otherwise we'll get + # a 404 on the /images proxy API because of 2.36. + self.image_id = self.api.get_images()[0]['id'] + + # Use the latest microversion available to make sure something does + # not regress in new microversions; cap as necessary. + self.admin_api.microversion = 'latest' + self.api.microversion = 'latest' + + # The consoleauth service is needed for deleting console tokens when + # the server is deleted. + self.start_service('consoleauth') + + # Use our custom weigher defined above to make sure that we have + # a predictable scheduling sort order. + self.flags(weight_classes=[__name__ + '.HostNameWeigher'], + group='filter_scheduler') + self.start_service('scheduler') + + # Let's now start three compute nodes as we said above. + for host in ['host1', 'host2', 'host3']: + fake.set_nodes([host]) + self.addCleanup(fake.restore_nodes) + self.start_service('compute', host=host) + + def _stub_resize_failure(self, failed_host): + actual_prep_resize = compute_manager.ComputeManager._prep_resize + + def fake_prep_resize(_self, *args, **kwargs): + if _self.host == failed_host: + raise Exception('%s:fake_prep_resize' % failed_host) + actual_prep_resize(_self, *args, **kwargs) + self.stub_out('nova.compute.manager.ComputeManager._prep_resize', + fake_prep_resize) + + def test_resize_with_reschedule_then_live_migrate(self): + """Tests the following scenario: + + - Server is created on host1 successfully. + - Server is resized; host2 is tried and fails, and rescheduled to + host3. + - Then try to live migrate the instance to host2 which should work. + """ + flavors = self.api.get_flavors() + flavor1 = flavors[0] + flavor2 = flavors[1] + if flavor1["disk"] > flavor2["disk"]: + # Make sure that flavor1 is smaller + flavor1, flavor2 = flavor2, flavor1 + + # create the instance which should go to host1 + server = self.admin_api.post_server( + dict(server=self._build_minimal_create_server_request( + self.api, 'test_resize_with_reschedule_then_live_migrate', + self.image_id, flavor_id=flavor1['id'], networks='none'))) + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host']) + + # Stub out the resize to fail on host2, which will trigger a reschedule + # to host3. + self._stub_resize_failure('host2') + + # Resize the server to flavor2, which should make it ultimately end up + # on host3. + data = {'resize': {'flavorRef': flavor2['id']}} + self.api.post_server_action(server['id'], data) + server = self._wait_for_state_change(self.admin_api, server, + 'VERIFY_RESIZE') + self.assertEqual('host3', server['OS-EXT-SRV-ATTR:host']) + self.api.post_server_action(server['id'], {'confirmResize': None}, + check_response_status=[204]) + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + + # Now live migrate the server to host2 specifically, which previously + # failed the resize attempt but here it should pass. + data = {'os-migrateLive': {'host': 'host2', 'block_migration': 'auto'}} + self.admin_api.post_server_action(server['id'], data) + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + # FIXME(mriedem): This is bug 1718512 where the failed resize left + # host2 in the RequestSpec.retry field and it affects the live migrate + # to host2 because the scheduler RetryFilter kicks it out. + self.assertEqual('host3', server['OS-EXT-SRV-ATTR:host']) + migrations = self.admin_api.api_get( + 'os-migrations?instance_uuid=%s&migration_type=live-migration' % + server['id']).body['migrations'] + self.assertEqual(1, len(migrations)) + self.assertEqual('error', migrations[0]['status']) + reqspec = objects.RequestSpec.get_by_instance_uuid( + nova_context.get_admin_context(), server['id']) + self.assertIsNotNone(reqspec.retry) + self.assertEqual(1, reqspec.retry.num_attempts) + self.assertEqual('host2', reqspec.retry.hosts[0].host)