Update usage in RT.drop_move_claim during confirm resize

The confirm resize flow in the compute manager
runs on the source host. It calls RT.drop_move_claim
to drop resource usage from the source host for the
old flavor. The problem with drop_move_claim is it
only decrements the old flavor from the reported usage
if the instance is in RT.tracked_migrations, which will
only be there on the source host if the update_available_resource
periodic task runs before the resize is confirmed, otherwise
the instance is still just tracked in RT.tracked_instances on
the source host. This leaves the source compute incorrectly
reporting resource usage for the old flavor until the next
periodic runs, which could be a large window if resizes are
configured to automatically confirm, e.g. resize_confirm_window=1,
and the periodic interval is big, e.g. update_resources_interval=600.

This fixes the issue by also updating usage in drop_move_claim
when the instance is not in tracked_migrations but is in
tracked_instances.

Because of the tight coupling with the instance.migration_context
we need to ensure the migration_context still exists before
drop_move_claim is called during confirm_resize, so a test wrinkle
is added to enforce that.

test_drop_move_claim_on_revert also needed some updating for
reality because of how drop_move_claim is called during
revert_resize.

And finally, the functional recreate test is updated to show the
bug is fixed.

Change-Id: Ia6d8a7909081b0b856bd7e290e234af7e42a2b38
Closes-Bug: #1818914
Related-Bug: #1641750
Related-Bug: #1498126
This commit is contained in:
Matt Riedemann 2019-03-07 16:07:18 -05:00
parent 0b92c7e741
commit ad9f37350a
5 changed files with 43 additions and 26 deletions

View File

@ -4053,6 +4053,10 @@ class ComputeManager(manager.Manager):
migration.status = 'confirmed'
migration.save()
# NOTE(mriedem): drop_move_claim relies on
# instance.migration_context so make sure to not call
# instance.drop_migration_context() until after drop_move_claim
# is called.
self.rt.drop_move_claim(context, instance, migration.source_node,
old_instance_type, prefix='old_')
instance.drop_migration_context()

View File

@ -459,31 +459,27 @@ class ResourceTracker(object):
attributes. 'old_' or 'new_', with 'new_' being the
default.
"""
# Remove usage for an instance that is tracked in migrations, such as
# on the dest node during revert resize.
if instance['uuid'] in self.tracked_migrations:
migration = self.tracked_migrations.pop(instance['uuid'])
if not instance_type:
instance_type = self._get_instance_type(instance, prefix,
migration)
if instance_type is not None:
numa_topology = self._get_migration_context_resource(
'numa_topology', instance, prefix=prefix)
usage = self._get_usage_dict(
instance_type, instance, numa_topology=numa_topology)
self._drop_pci_devices(instance, nodename, prefix)
self._update_usage(usage, nodename, sign=-1)
ctxt = context.elevated()
self._update(ctxt, self.compute_nodes[nodename])
# Remove usage for an instance that is not tracked in migrations (such
# as on the source node after a migration).
# NOTE(lbeliveau): On resize on the same node, the instance is
# included in both tracked_migrations and tracked_instances.
elif (instance['uuid'] in self.tracked_instances):
elif instance['uuid'] in self.tracked_instances:
self.tracked_instances.remove(instance['uuid'])
if instance_type is not None:
numa_topology = self._get_migration_context_resource(
'numa_topology', instance, prefix=prefix)
usage = self._get_usage_dict(
instance_type, instance, numa_topology=numa_topology)
self._drop_pci_devices(instance, nodename, prefix)
# TODO(lbeliveau): Validate if numa needs the same treatment.
self._update_usage(usage, nodename, sign=-1)
ctxt = context.elevated()
self._update(ctxt, self.compute_nodes[nodename])

View File

@ -2122,13 +2122,8 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
self._wait_for_state_change(self.api, server, 'ACTIVE')
# There should no resource usage for flavor1 on the source host.
# FIXME(mriedem): This is bug 1818914 where the source host continues
# to report old_flavor usage until the update_available_resource
# periodic task runs. Uncomment this once fixed.
# self.assert_hypervisor_usage(
# source_rp_uuid, no_usage, volume_backed=False)
self.assert_hypervisor_usage(
source_rp_uuid, self.flavor1, volume_backed=False)
source_rp_uuid, no_usage, volume_backed=False)
# And resource usage for flavor2 should still be on the target host.
self.assert_hypervisor_usage(
dest_rp_uuid, self.flavor2, volume_backed=False)

View File

@ -7175,7 +7175,15 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
def do_confirm_resize(mock_save, mock_drop, mock_delete,
mock_confirm, mock_nwapi, mock_notify,
mock_mig_save, mock_mig_get, mock_inst_get):
self._mock_rt()
def fake_drop_move_claim(*args, **kwargs):
# RT.drop_move_claim must be called before
# instance.drop_migration_context.
mock_drop.assert_not_called()
mock_rt = self._mock_rt()
# Enforce order of drop_move_claim/drop_migration_context calls.
mock_rt.drop_move_claim.side_effect = fake_drop_move_claim
self.instance.migration_context = objects.MigrationContext(
new_pci_devices=None,
old_pci_devices=None)

View File

@ -2490,23 +2490,37 @@ class TestResize(BaseTestCase):
instance = _INSTANCE_FIXTURES[0].obj_clone()
instance.task_state = task_states.RESIZE_MIGRATING
instance.flavor = _INSTANCE_TYPE_OBJ_FIXTURES[2]
instance.new_flavor = _INSTANCE_TYPE_OBJ_FIXTURES[2]
instance.migration_context = objects.MigrationContext()
instance.migration_context.new_pci_devices = objects.PciDeviceList(
objects=pci_devs)
self.rt.tracked_instances = set([instance.uuid])
# When reverting a resize and dropping the move claim, the
# destination compute calls drop_move_claim to drop the new_flavor
# usage and the instance should be in tracked_migrations from when
# the resize_claim was made on the dest during prep_resize.
self.rt.tracked_migrations = {
instance.uuid: objects.Migration(migration_type='resize')}
# not using mock.sentinel.ctx because drop_move_claim calls elevated
ctx = mock.MagicMock()
with test.nested(
mock.patch.object(self.rt, '_update'),
mock.patch.object(self.rt.pci_tracker, 'free_device')
) as (update_mock, mock_pci_free_device):
mock.patch.object(self.rt, '_update'),
mock.patch.object(self.rt.pci_tracker, 'free_device'),
mock.patch.object(self.rt, '_get_usage_dict'),
mock.patch.object(self.rt, '_update_usage')
) as (
update_mock, mock_pci_free_device, mock_get_usage,
mock_update_usage,
):
self.rt.drop_move_claim(ctx, instance, _NODENAME)
mock_pci_free_device.assert_called_once_with(
pci_dev, mock.ANY)
mock_get_usage.assert_called_once_with(
instance.new_flavor, instance, numa_topology=None)
mock_update_usage.assert_called_once_with(
mock_get_usage.return_value, _NODENAME, sign=-1)
@mock.patch('nova.compute.utils.is_volume_backed_instance',
return_value=False)