Add functional recreate test for regression bug 1825537

Change I2d9ab06b485f76550dbbff46f79f40ff4c97d12f in Rocky
(and backported through to Pike) added error handling to
the resize_instance and finish_resize methods to revert
allocations in placement when a failure occurs.

This is OK for resize_instance, which runs on the source
compute, as long as the instance.host/node values have not
yet been changed to the dest host/node before RPC casting
to the finish_resize method on the dest compute. It's OK
because the instance is still on the source compute and the
DB says so, so any attempt to recover the instance via hard
reboot or rebuild will be on the source host.

This is not OK for finish_resize because if we fail there
and revert the allocations, the instance host/node values
are already pointing at the dest compute and by reverting
the allocations in placement, placement will be incorrectly
tracking the instance usage with the old flavor against the
source node resource provider rather than the new flavor
against the dest node resource provider - where the instance
is actually running and the nova DB says the instance lives.

This change adds a simple functional regression test to
recreate the bug with a multi-host resize. There is already
a same-host resize functional test marked here which will
need to be fixed as well.

Conflicts:
      nova/tests/functional/test_servers.py
      nova/virt/fake.py

NOTE(mriedem): The test_servers conflict is due to not having
change If6aa37d9b6b48791e070799ab026c816fda4441c in Rocky. As
a result, the new regression test also had to be modified for
the call to assertFlavorMatchesAllocation. The fake module
conflict is due to not having change
Iefff121640e04abdbb6a4ae546c447f168dc8af9 in Rocky.

Change-Id: Ie9e294db7e24d0e3cbe83eee847f0fbfb7478900
Related-Bug: #1825537
(cherry picked from commit f4bb672106)
(cherry picked from commit eaa1fc6159)
This commit is contained in:
Matt Riedemann 2019-04-19 11:54:07 -04:00
parent 74046899a2
commit 9a977cb28c
3 changed files with 90 additions and 0 deletions

View File

@ -0,0 +1,82 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from nova.tests.functional import integrated_helpers
class FinishResizeErrorAllocationCleanupTestCase(
integrated_helpers.ProviderUsageBaseTestCase):
"""Test for bug 1825537 introduced in Rocky and backported down to Pike.
Tests a scenario where finish_resize fails on the dest compute during a
resize and ensures resource provider allocations are properly cleaned up
in placement.
"""
compute_driver = 'fake.FakeFinishMigrationFailDriver'
def setUp(self):
super(FinishResizeErrorAllocationCleanupTestCase, self).setUp()
# Get the flavors we're going to use.
flavors = self.api.get_flavors()
self.flavor1 = flavors[0]
self.flavor2 = flavors[1]
def _resize_and_assert_error(self, server, dest_host):
# Now resize the server and wait for it to go to ERROR status because
# the finish_migration virt driver method in host2 should fail.
req = {'resize': {'flavorRef': self.flavor2['id']}}
self.api.post_server_action(server['id'], req)
# The instance is set to ERROR status before the fault is recorded so
# to avoid a race we need to wait for the task_state to change
# to None which happens after the fault is recorded.
server = self._wait_for_server_parameter(
self.admin_api, server,
{'status': 'ERROR', 'OS-EXT-STS:task_state': None})
# The server should be pointing at $dest_host because resize_instance
# will have updated the host/node value on the instance before casting
# to the finish_resize method on the dest compute.
self.assertEqual(dest_host, server['OS-EXT-SRV-ATTR:host'])
# In this case the FakeFinishMigrationFailDriver.finish_migration
# method raises VirtualInterfaceCreateException.
self.assertIn('Virtual Interface creation failed',
server['fault']['message'])
def test_finish_resize_fails_allocation_cleanup(self):
# Start two computes so we can resize across hosts.
self._start_compute('host1')
self._start_compute('host2')
# Create a server on host1.
server = self._boot_and_check_allocations(self.flavor1, 'host1')
# Resize to host2 which should fail.
self._resize_and_assert_error(server, 'host2')
# Check the resource provider allocations. Since the server is pointed
# at the dest host in the DB now, the dest node resource provider
# allocations should still exist with the new flavor.
source_rp_uuid = self._get_provider_uuid_by_host('host1')
dest_rp_uuid = self._get_provider_uuid_by_host('host2')
# FIXME(mriedem): This is bug 1825537 where the allocations are
# reverted when finish_resize fails so the dest node resource provider
# does not have any allocations and the instance allocations are for
# the old flavor on the source node resource provider even though the
# instance is not running on the source host nor pointed at the source
# host in the DB.
# self.assertFlavorMatchesAllocation(
# self.flavor2, server['id'], dest_rp_uuid)
dest_rp_usages = self._get_provider_usages(dest_rp_uuid)
no_usage = {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}
self.assertEqual(no_usage, dest_rp_usages)
source_usages = self._get_provider_usages(source_rp_uuid)
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)

View File

@ -3088,6 +3088,7 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
# Ensure the allocation records still exist on the host.
source_rp_uuid = self._get_provider_uuid_by_host(hostname)
source_usages = self._get_provider_usages(source_rp_uuid)
# FIXME(mriedem): This is wrong for the _finish_resize case.
# The new_flavor should have been subtracted from the doubled
# allocation which just leaves us with the original flavor.
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)

View File

@ -674,6 +674,13 @@ class MediumFakeDriver(FakeDriver):
local_gb = 1028
class FakeFinishMigrationFailDriver(FakeDriver):
"""FakeDriver variant that will raise an exception from finish_migration"""
def finish_migration(self, *args, **kwargs):
raise exception.VirtualInterfaceCreateException()
class FakeRescheduleDriver(FakeDriver):
"""FakeDriver derivative that triggers a reschedule on the first spawn
attempt. This is expected to only be used in tests that have more than