Add functional recreate test for bug 1852610

It is possible to delete a source compute service which has
pending migration-based allocations and servers in VERIFY_RESIZE
status. Doing so deletes the compute service and compute node
but orphans the source node resource provider along with its
resource allocations held by the migration record while there
is a pending resized server.

This adds a simple cold migrate test which deletes the source
compute service while the server is in VERIFY_RESIZE status and
then tries to confirm the resize which fails.

NOTE(mriedem): A couple of methods are lifted from ServerMovingTests
since change Ie991d4b53e9bb5e7ec26da99219178ab7695abf6 is not in Rocky.

Change-Id: I644608b4e197ddea31c5f264adb492f2c8931f04
Related-Bug: #1852610
(cherry picked from commit 94d3743b18)
(cherry picked from commit 28d76cc7ae)
(cherry picked from commit 7d67387246)
This commit is contained in:
Matt Riedemann 2019-11-14 11:38:07 -05:00
parent 6eda7409ff
commit 1563a15c8b
3 changed files with 83 additions and 47 deletions

View File

@ -27,6 +27,7 @@ import nova.conf
from nova import context
from nova.db import api as db
import nova.image.glance
from nova import objects
from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.functional.api import client as api_client
@ -647,3 +648,50 @@ class ProviderUsageBaseTestCase(test.TestCase, InstanceHelperMixin):
compute.manager.host)
compute.manager.update_available_resource(ctx)
LOG.info('Finished with periodics')
def _move_and_check_allocations(self, server, request, old_flavor,
new_flavor, source_rp_uuid, dest_rp_uuid):
self.api.post_server_action(server['id'], request)
self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE')
def _check_allocation():
source_usages = self._get_provider_usages(source_rp_uuid)
self.assertFlavorMatchesAllocation(old_flavor, source_usages)
dest_usages = self._get_provider_usages(dest_rp_uuid)
self.assertFlavorMatchesAllocation(new_flavor, dest_usages)
# The instance should own the new_flavor allocation against the
# destination host created by the scheduler
allocations = self._get_allocations_by_server_uuid(server['id'])
self.assertEqual(1, len(allocations))
dest_alloc = allocations[dest_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(new_flavor, dest_alloc)
# The migration should own the old_flavor allocation against the
# source host created by conductor
migration_uuid = self.get_migration_uuid_for_instance(server['id'])
allocations = self._get_allocations_by_server_uuid(migration_uuid)
source_alloc = allocations[source_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(old_flavor, source_alloc)
# OK, so the move operation has run, but we have not yet confirmed or
# reverted the move operation. Before we run periodics, make sure
# that we have allocations/usages on BOTH the source and the
# destination hosts.
_check_allocation()
self._run_periodics()
_check_allocation()
# Make sure the RequestSpec.flavor matches the new_flavor.
ctxt = context.get_admin_context()
reqspec = objects.RequestSpec.get_by_instance_uuid(ctxt, server['id'])
self.assertEqual(new_flavor['id'], reqspec.flavor.flavorid)
def _migrate_and_check_allocations(self, server, flavor, source_rp_uuid,
dest_rp_uuid):
request = {
'migrate': None
}
self._move_and_check_allocations(
server, request=request, old_flavor=flavor, new_flavor=flavor,
source_rp_uuid=source_rp_uuid, dest_rp_uuid=dest_rp_uuid)

View File

@ -1766,53 +1766,6 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
self.compute2.manager.update_available_resource(ctx)
LOG.info('Finished with periodics')
def _migrate_and_check_allocations(self, server, flavor, source_rp_uuid,
dest_rp_uuid):
request = {
'migrate': None
}
self._move_and_check_allocations(
server, request=request, old_flavor=flavor, new_flavor=flavor,
source_rp_uuid=source_rp_uuid, dest_rp_uuid=dest_rp_uuid)
def _move_and_check_allocations(self, server, request, old_flavor,
new_flavor, source_rp_uuid, dest_rp_uuid):
self.api.post_server_action(server['id'], request)
self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE')
def _check_allocation():
source_usages = self._get_provider_usages(source_rp_uuid)
self.assertFlavorMatchesAllocation(old_flavor, source_usages)
dest_usages = self._get_provider_usages(dest_rp_uuid)
self.assertFlavorMatchesAllocation(new_flavor, dest_usages)
# The instance should own the new_flavor allocation against the
# destination host created by the scheduler
allocations = self._get_allocations_by_server_uuid(server['id'])
self.assertEqual(1, len(allocations))
dest_alloc = allocations[dest_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(new_flavor, dest_alloc)
# The migration should own the old_flavor allocation against the
# source host created by conductor
migration_uuid = self.get_migration_uuid_for_instance(server['id'])
allocations = self._get_allocations_by_server_uuid(migration_uuid)
source_alloc = allocations[source_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(old_flavor, source_alloc)
# OK, so the move operation has run, but we have not yet confirmed or
# reverted the move operation. Before we run periodics, make sure
# that we have allocations/usages on BOTH the source and the
# destination hosts.
_check_allocation()
self._run_periodics()
_check_allocation()
# Make sure the RequestSpec.flavor matches the new_flavor.
ctxt = context.get_admin_context()
reqspec = objects.RequestSpec.get_by_instance_uuid(ctxt, server['id'])
self.assertEqual(new_flavor['id'], reqspec.flavor.flavorid)
def test_resize_revert(self):
self._test_resize_revert(dest_hostname='host1')

View File

@ -173,3 +173,38 @@ class TestServicesAPI(integrated_helpers.ProviderUsageBaseTestCase):
log_output = self.stdlog.logger.output
self.assertIn('Error updating resources for node host1.', log_output)
self.assertIn('Failed to create resource provider host1', log_output)
def test_migrate_confirm_after_deleted_source_compute(self):
"""Tests a scenario where a server is cold migrated and while in
VERIFY_RESIZE status the admin attempts to delete the source compute
and then the user tries to confirm the resize.
"""
# Start a compute service and create a server there.
self._start_compute('host1')
host1_rp_uuid = self._get_provider_uuid_by_host('host1')
flavor = self.api.get_flavors()[0]
server = self._boot_and_check_allocations(flavor, 'host1')
# Start a second compute service so we can cold migrate there.
self._start_compute('host2')
host2_rp_uuid = self._get_provider_uuid_by_host('host2')
# Cold migrate the server to host2.
self._migrate_and_check_allocations(
server, flavor, host1_rp_uuid, host2_rp_uuid)
# Delete the source compute service.
service = self.admin_api.get_services(
binary='nova-compute', host='host1')[0]
self.admin_api.api_delete('/os-services/%s' % service['id'])
# FIXME(mriedem): This is bug 1852610 where the compute service is
# deleted but the resource provider is not because there are still
# migration-based allocations against the source node provider.
resp = self.placement_api.get('/resource_providers/%s' % host1_rp_uuid)
self.assertEqual(200, resp.status)
self.assertFlavorMatchesUsage(host1_rp_uuid, flavor)
# Now try to confirm the migration.
# FIXME(mriedem): This will fail until bug 1852610 is fixed and the
# source compute service delete is blocked while there is an
# in-progress migration involving the node.
self.assertNotIn('ComputeHostNotFound', self.stdlog.logger.output)
self.api.post_server_action(server['id'], {'confirmResize': None})
self._wait_for_state_change(self.api, server, 'ERROR')
self.assertIn('ComputeHostNotFound', self.stdlog.logger.output)