Merge "Move revert resize under semaphore" into stable/ussuri

This commit is contained in:
Zuul 2020-09-25 15:41:19 +00:00 committed by Gerrit Code Review
commit f20188995a
7 changed files with 69 additions and 62 deletions

View File

@ -4672,10 +4672,7 @@ class ComputeManager(manager.Manager):
self._delete_volume_attachments(ctxt, bdms)
# Free up the new_flavor usage from the resource tracker for this host.
instance.revert_migration_context()
instance.save(expected_task_state=task_states.RESIZE_REVERTING)
self.rt.drop_move_claim(ctxt, instance, instance.node,
instance_type=instance.new_flavor)
self.rt.drop_move_claim_at_dest(ctxt, instance, migration)
def _revert_instance_flavor_host_node(self, instance, migration):
"""Revert host, node and flavor fields after a resize-revert."""
@ -4873,20 +4870,9 @@ class ComputeManager(manager.Manager):
self._terminate_volume_connections(context, instance, bdms)
migration.status = 'reverted'
migration.save()
# NOTE(ndipanov): We need to do this here because dropping the
# claim means we lose the migration_context data. We really should
# fix this by moving the drop_move_claim call to the
# finish_revert_resize method as this is racy (revert is dropped,
# but instance resources will be tracked with the new flavor until
# it gets rolled back in finish_revert_resize, which is
# potentially wrong for a period of time).
instance.revert_migration_context()
instance.save()
self.rt.drop_move_claim(context, instance, instance.node)
# Free up the new_flavor usage from the resource tracker for this
# host.
self.rt.drop_move_claim_at_dest(context, instance, migration)
# RPC cast back to the source host to finish the revert there.
self.compute_rpcapi.finish_revert_resize(context, instance,

View File

@ -553,6 +553,24 @@ class ResourceTracker(object):
# though.
instance.drop_migration_context()
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def drop_move_claim_at_dest(self, context, instance, migration):
"""Drop a move claim after reverting a resize or cold migration."""
# NOTE(stephenfin): This runs on the destination, before we return to
# the source and resume the instance there. As such, the migration
# isn't really really reverted yet, but this status is what we use to
# indicate that we no longer needs to account for usage on this host
migration.status = 'reverted'
migration.save()
self._drop_move_claim(
context, instance, migration.dest_node, instance.new_flavor,
prefix='new_')
instance.revert_migration_context()
instance.save(expected_task_state=[task_states.RESIZE_REVERTING])
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def drop_move_claim(self, context, instance, nodename,
instance_type=None, prefix='new_'):

View File

@ -177,15 +177,22 @@ class InstanceHelperMixin(object):
api = getattr(self, 'admin_api', self.api)
statuses = [status.lower() for status in expected_statuses]
actual_status = None
for attempt in range(10):
migrations = api.api_get('/os-migrations').body['migrations']
for migration in migrations:
if (migration['instance_uuid'] == server['id'] and
migration['status'].lower() in statuses):
return migration
if migration['instance_uuid'] == server['id']:
actual_status = migration['status']
if migration['status'].lower() in statuses:
return migration
time.sleep(0.5)
self.fail('Timed out waiting for migration with status "%s" for '
'instance: %s' % (expected_statuses, server['id']))
self.fail(
'Timed out waiting for migration with status for instance %s '
'(expected "%s", got "%s")' % (
server['id'], expected_statuses, actual_status,
))
def _wait_for_log(self, log_line):
for i in range(10):

View File

@ -118,7 +118,7 @@ class TestColdMigrationUsage(integrated_helpers._IntegratedTestBase):
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 0)
orig_drop_claim = rt.ResourceTracker.drop_move_claim
orig_drop_claim = rt.ResourceTracker.drop_move_claim_at_dest
def fake_drop_move_claim(*args, **kwargs):
# run periodics after marking the migration reverted, simulating a
@ -131,15 +131,14 @@ class TestColdMigrationUsage(integrated_helpers._IntegratedTestBase):
if drop_race:
self._run_periodics()
# FIXME(stephenfin): the periodic should not have dropped the
# records for the src
self.assertUsage(src_host, 0)
self.assertUsage(dst_host, 1)
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 1)
return orig_drop_claim(*args, **kwargs)
self.stub_out(
'nova.compute.resource_tracker.ResourceTracker.drop_move_claim',
'nova.compute.resource_tracker.ResourceTracker.'
'drop_move_claim_at_dest',
fake_drop_move_claim,
)
@ -155,11 +154,8 @@ class TestColdMigrationUsage(integrated_helpers._IntegratedTestBase):
# migration is now reverted so we should once again only have usage on
# one host
# FIXME(stephenfin): Our usage here should always be 1 and 0 for source
# and dest respectively when reverting, but that won't happen until we
# run the periodic and rebuild our inventory from scratch
self.assertUsage(src_host, 0 if drop_race else 1)
self.assertUsage(dst_host, 1 if drop_race else 0)
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 0)
# running periodics shouldn't change things
self._run_periodics()

View File

@ -5840,6 +5840,7 @@ class ComputeTestCase(BaseTestCase,
migration.status = 'finished'
migration.migration_type = 'migration'
migration.source_node = NODENAME
migration.dest_node = NODENAME
migration.create()
migration_context = objects.MigrationContext()

View File

@ -8394,7 +8394,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
request_spec = objects.RequestSpec()
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'drop_move_claim')
'drop_move_claim_at_dest')
@mock.patch('nova.compute.rpcapi.ComputeAPI.finish_revert_resize')
@mock.patch.object(self.instance, 'revert_migration_context')
@mock.patch.object(self.compute.network_api, 'get_instance_nw_info')
@ -8432,9 +8432,9 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
migration=self.migration,
instance=self.instance,
request_spec=request_spec)
mock_drop_move_claim.assert_called_once_with(self.context,
self.instance, self.instance.node)
self.assertIsNotNone(self.instance.migration_context)
mock_drop_move_claim.assert_called_once_with(
self.context, self.instance, self.migration)
# Three fake BDMs:
# 1. volume BDM with an attachment_id which will be updated/completed
@ -11571,11 +11571,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
# Assert _error_out_instance_on_exception wasn't tripped somehow.
self.assertNotEqual(vm_states.ERROR, self.instance.vm_state)
@mock.patch('nova.objects.Instance.save')
@mock.patch('nova.objects.Instance.revert_migration_context')
@mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid')
def test_revert_snapshot_based_resize_at_dest(
self, mock_get_bdms, mock_revert_mig_ctxt, mock_inst_save):
def test_revert_snapshot_based_resize_at_dest(self, mock_get_bdms):
"""Happy path test for _revert_snapshot_based_resize_at_dest"""
# Setup more mocks.
def stub_migrate_instance_start(ctxt, instance, migration):
@ -11588,7 +11585,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
mock.patch.object(self.compute, '_get_instance_block_device_info'),
mock.patch.object(self.compute.driver, 'destroy'),
mock.patch.object(self.compute, '_delete_volume_attachments'),
mock.patch.object(self.compute.rt, 'drop_move_claim')
mock.patch.object(self.compute.rt, 'drop_move_claim_at_dest')
) as (
mock_network_api, mock_get_bdi, mock_destroy,
mock_delete_attachments, mock_drop_claim
@ -11603,6 +11600,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
# Run the code.
self.compute._revert_snapshot_based_resize_at_dest(
self.context, self.instance, self.migration)
# Assert the calls.
mock_network_api.get_instance_nw_info.assert_called_once_with(
self.context, self.instance)
@ -11623,12 +11621,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
self.stdlog.logger.output)
mock_delete_attachments.assert_called_once_with(
self.context, mock_get_bdms.return_value)
mock_revert_mig_ctxt.assert_called_once_with()
mock_inst_save.assert_called_once_with(
expected_task_state=task_states.RESIZE_REVERTING)
mock_drop_claim.assert_called_once_with(
self.context, self.instance, self.instance.node,
instance_type=self.instance.new_flavor)
self.context, self.instance, self.migration)
@mock.patch('nova.compute.manager.ComputeManager.'
'_finish_revert_snapshot_based_resize_at_source')
@ -11727,9 +11721,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
# Run the code.
self.compute.finish_revert_snapshot_based_resize_at_source(
self.context, self.instance, self.migration)
# Assert the migration status was updated.
self.migration.save.assert_called_once_with()
self.assertEqual('reverted', self.migration.status)
# Assert the instance host/node and flavor info was updated.
self.assertEqual(self.migration.source_compute, self.instance.host)
self.assertEqual(self.migration.source_node, self.instance.node)

View File

@ -2586,12 +2586,11 @@ class TestResize(BaseTestCase):
with test.nested(
mock.patch('nova.objects.Migration.save'),
mock.patch('nova.objects.Instance.drop_migration_context'),
mock.patch('nova.objects.Instance.save'),
):
if revert:
flavor = new_flavor
prefix = 'new_'
self.rt.drop_move_claim(
ctx, instance, _NODENAME, flavor, prefix=prefix)
self.rt.drop_move_claim_at_dest(ctx, instance, migration)
else: # confirm
flavor = old_flavor
self.rt.drop_move_claim_at_source(ctx, instance, migration)
@ -2758,32 +2757,40 @@ class TestResize(BaseTestCase):
instance.migration_context.new_pci_devices = objects.PciDeviceList(
objects=pci_devs)
# When reverting a resize and dropping the move claim, the
# destination compute calls drop_move_claim to drop the new_flavor
# When reverting a resize and dropping the move claim, the destination
# compute calls drop_move_claim_at_dest to drop the new_flavor
# usage and the instance should be in tracked_migrations from when
# the resize_claim was made on the dest during prep_resize.
self.rt.tracked_migrations = {
instance.uuid: objects.Migration(migration_type='resize')}
migration = objects.Migration(
dest_node=cn.hypervisor_hostname,
migration_type='resize',
)
self.rt.tracked_migrations = {instance.uuid: migration}
# not using mock.sentinel.ctx because drop_move_claim calls elevated
# not using mock.sentinel.ctx because _drop_move_claim calls elevated
ctx = mock.MagicMock()
with test.nested(
mock.patch.object(self.rt, '_update'),
mock.patch.object(self.rt.pci_tracker, 'free_device'),
mock.patch.object(self.rt, '_get_usage_dict'),
mock.patch.object(self.rt, '_update_usage')
mock.patch.object(self.rt, '_update_usage'),
mock.patch.object(migration, 'save'),
mock.patch.object(instance, 'save'),
) as (
update_mock, mock_pci_free_device, mock_get_usage,
mock_update_usage,
mock_update_usage, mock_migrate_save, mock_instance_save,
):
self.rt.drop_move_claim(ctx, instance, _NODENAME)
self.rt.drop_move_claim_at_dest(ctx, instance, migration)
mock_pci_free_device.assert_called_once_with(
pci_dev, mock.ANY)
mock_get_usage.assert_called_once_with(
instance.new_flavor, instance, numa_topology=None)
mock_update_usage.assert_called_once_with(
mock_get_usage.return_value, _NODENAME, sign=-1)
mock_migrate_save.assert_called_once()
mock_instance_save.assert_called_once()
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_sync_compute_service_disabled_trait', new=mock.Mock())