diff --git a/nova/tests/functional/regressions/test_bug_1718512.py b/nova/tests/functional/regressions/test_bug_1718512.py new file mode 100644 index 000000000000..70918ad49a24 --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_1718512.py @@ -0,0 +1,168 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from nova.compute import manager as compute_manager +import nova.conf +from nova import context as nova_context +from nova import objects +from nova.scheduler import weights +from nova import test +from nova.tests import fixtures as nova_fixtures +from nova.tests.functional import integrated_helpers +from nova.tests.unit.image import fake as image_fake +from nova.tests.unit import policy_fixture +from nova.virt import fake + +CONF = nova.conf.CONF + + +class HostNameWeigher(weights.BaseHostWeigher): + def _weigh_object(self, host_state, weight_properties): + """Arbitrary preferring host1 over host2 over host3.""" + weights = {'host1': 100, 'host2': 50, 'host3': 1} + return weights.get(host_state.host, 0) + + +class TestRequestSpecRetryReschedule(test.TestCase, + integrated_helpers.InstanceHelperMixin): + """Regression test for bug 1718512 introduced in Newton. + + Contains a test for a regression where an instance builds on one host, + then is resized. During the resize, the first attempted host fails and + the resize is rescheduled to another host which passes. The failed host + is persisted in the RequestSpec.retry field by mistake. Then later when + trying to live migrate the instance to the same host that failed during + resize, it is rejected by the RetryFilter because it's already in the + RequestSpec.retry field. + """ + def setUp(self): + super(TestRequestSpecRetryReschedule, self).setUp() + self.useFixture(policy_fixture.RealPolicyFixture()) + + # The NeutronFixture is needed to stub out validate_networks in API. + self.useFixture(nova_fixtures.NeutronFixture(self)) + + # We need the computes reporting into placement for the filter + # scheduler to pick a host. + self.useFixture(nova_fixtures.PlacementFixture()) + + api_fixture = self.useFixture(nova_fixtures.OSAPIFixture( + api_version='v2.1')) + # The admin API is used to get the server details to verify the + # host on which the server was built. + self.admin_api = api_fixture.admin_api + self.api = api_fixture.api + + # the image fake backend needed for image discovery + image_fake.stub_out_image_service(self) + self.addCleanup(image_fake.FakeImageService_reset) + + self.start_service('conductor') + + # We have to get the image before we use 2.latest otherwise we'll get + # a 404 on the /images proxy API because of 2.36. + self.image_id = self.api.get_images()[0]['id'] + + # Use the latest microversion available to make sure something does + # not regress in new microversions; cap as necessary. + self.admin_api.microversion = 'latest' + self.api.microversion = 'latest' + + # The consoleauth service is needed for deleting console tokens when + # the server is deleted. + self.start_service('consoleauth') + + enabled_filters = CONF.filter_scheduler.enabled_filters + # Remove the DiskFilter since we're using Placement for filtering on + # DISK_GB. + if 'DiskFilter' in enabled_filters: + enabled_filters.remove('DiskFilter') + # Use our custom weigher defined above to make sure that we have + # a predictable scheduling sort order. + self.flags(weight_classes=[__name__ + '.HostNameWeigher'], + enabled_filters=enabled_filters, + group='filter_scheduler') + self.start_service('scheduler') + + # Let's now start three compute nodes as we said above. + for host in ['host1', 'host2', 'host3']: + fake.set_nodes([host]) + self.addCleanup(fake.restore_nodes) + self.start_service('compute', host=host) + + def _stub_resize_failure(self, failed_host): + actual_prep_resize = compute_manager.ComputeManager._prep_resize + + def fake_prep_resize(_self, *args, **kwargs): + if _self.host == failed_host: + raise Exception('%s:fake_prep_resize' % failed_host) + actual_prep_resize(_self, *args, **kwargs) + self.stub_out('nova.compute.manager.ComputeManager._prep_resize', + fake_prep_resize) + + def test_resize_with_reschedule_then_live_migrate(self): + """Tests the following scenario: + + - Server is created on host1 successfully. + - Server is resized; host2 is tried and fails, and rescheduled to + host3. + - Then try to live migrate the instance to host2 which should work. + """ + flavors = self.api.get_flavors() + flavor1 = flavors[0] + flavor2 = flavors[1] + if flavor1["disk"] > flavor2["disk"]: + # Make sure that flavor1 is smaller + flavor1, flavor2 = flavor2, flavor1 + + # create the instance which should go to host1 + server = self.admin_api.post_server( + dict(server=self._build_minimal_create_server_request( + self.api, 'test_resize_with_reschedule_then_live_migrate', + self.image_id, flavor_id=flavor1['id'], networks='none'))) + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host']) + + # Stub out the resize to fail on host2, which will trigger a reschedule + # to host3. + self._stub_resize_failure('host2') + + # Resize the server to flavor2, which should make it ultimately end up + # on host3. + data = {'resize': {'flavorRef': flavor2['id']}} + self.api.post_server_action(server['id'], data) + server = self._wait_for_state_change(self.admin_api, server, + 'VERIFY_RESIZE') + self.assertEqual('host3', server['OS-EXT-SRV-ATTR:host']) + self.api.api_post('/servers/%s/action' % server['id'], + {'confirmResize': None}, check_response_status=[204]) + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + + # Now live migrate the server to host2 specifically, which previously + # failed the resize attempt but here it should pass. + data = {'os-migrateLive': {'host': 'host2', 'block_migration': 'auto'}} + self.admin_api.post_server_action(server['id'], data) + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + # FIXME(mriedem): This is bug 1718512 where the failed resize left + # host2 in the RequestSpec.retry field and it affects the live migrate + # to host2 because the scheduler RetryFilter kicks it out. + self.assertEqual('host3', server['OS-EXT-SRV-ATTR:host']) + migrations = self.admin_api.api_get( + 'os-migrations?instance_uuid=%s&migration_type=live-migration' % + server['id']).body['migrations'] + self.assertEqual(1, len(migrations)) + self.assertEqual('error', migrations[0]['status']) + reqspec = objects.RequestSpec.get_by_instance_uuid( + nova_context.get_admin_context(), server['id']) + self.assertIsNotNone(reqspec.retry) + self.assertEqual(1, reqspec.retry.num_attempts) + self.assertEqual('host2', reqspec.retry.hosts[0].host)