Update usage in RT.drop_move_claim during confirm resize
The confirm resize flow in the compute manager runs on the source host. It calls RT.drop_move_claim to drop resource usage from the source host for the old flavor. The problem with drop_move_claim is it only decrements the old flavor from the reported usage if the instance is in RT.tracked_migrations, which will only be there on the source host if the update_available_resource periodic task runs before the resize is confirmed, otherwise the instance is still just tracked in RT.tracked_instances on the source host. This leaves the source compute incorrectly reporting resource usage for the old flavor until the next periodic runs, which could be a large window if resizes are configured to automatically confirm, e.g. resize_confirm_window=1, and the periodic interval is big, e.g. update_resources_interval=600. This fixes the issue by also updating usage in drop_move_claim when the instance is not in tracked_migrations but is in tracked_instances. Because of the tight coupling with the instance.migration_context we need to ensure the migration_context still exists before drop_move_claim is called during confirm_resize, so a test wrinkle is added to enforce that. test_drop_move_claim_on_revert also needed some updating for reality because of how drop_move_claim is called during revert_resize. And finally, the functional recreate test is updated to show the bug is fixed. Change-Id: Ia6d8a7909081b0b856bd7e290e234af7e42a2b38 Closes-Bug: #1818914 Related-Bug: #1641750 Related-Bug: #1498126
This commit is contained in:
parent
0b92c7e741
commit
ad9f37350a
|
@ -4053,6 +4053,10 @@ class ComputeManager(manager.Manager):
|
|||
migration.status = 'confirmed'
|
||||
migration.save()
|
||||
|
||||
# NOTE(mriedem): drop_move_claim relies on
|
||||
# instance.migration_context so make sure to not call
|
||||
# instance.drop_migration_context() until after drop_move_claim
|
||||
# is called.
|
||||
self.rt.drop_move_claim(context, instance, migration.source_node,
|
||||
old_instance_type, prefix='old_')
|
||||
instance.drop_migration_context()
|
||||
|
|
|
@ -459,31 +459,27 @@ class ResourceTracker(object):
|
|||
attributes. 'old_' or 'new_', with 'new_' being the
|
||||
default.
|
||||
"""
|
||||
# Remove usage for an instance that is tracked in migrations, such as
|
||||
# on the dest node during revert resize.
|
||||
if instance['uuid'] in self.tracked_migrations:
|
||||
migration = self.tracked_migrations.pop(instance['uuid'])
|
||||
|
||||
if not instance_type:
|
||||
instance_type = self._get_instance_type(instance, prefix,
|
||||
migration)
|
||||
|
||||
if instance_type is not None:
|
||||
numa_topology = self._get_migration_context_resource(
|
||||
'numa_topology', instance, prefix=prefix)
|
||||
usage = self._get_usage_dict(
|
||||
instance_type, instance, numa_topology=numa_topology)
|
||||
self._drop_pci_devices(instance, nodename, prefix)
|
||||
self._update_usage(usage, nodename, sign=-1)
|
||||
|
||||
ctxt = context.elevated()
|
||||
self._update(ctxt, self.compute_nodes[nodename])
|
||||
# Remove usage for an instance that is not tracked in migrations (such
|
||||
# as on the source node after a migration).
|
||||
# NOTE(lbeliveau): On resize on the same node, the instance is
|
||||
# included in both tracked_migrations and tracked_instances.
|
||||
elif (instance['uuid'] in self.tracked_instances):
|
||||
elif instance['uuid'] in self.tracked_instances:
|
||||
self.tracked_instances.remove(instance['uuid'])
|
||||
|
||||
if instance_type is not None:
|
||||
numa_topology = self._get_migration_context_resource(
|
||||
'numa_topology', instance, prefix=prefix)
|
||||
usage = self._get_usage_dict(
|
||||
instance_type, instance, numa_topology=numa_topology)
|
||||
self._drop_pci_devices(instance, nodename, prefix)
|
||||
# TODO(lbeliveau): Validate if numa needs the same treatment.
|
||||
self._update_usage(usage, nodename, sign=-1)
|
||||
|
||||
ctxt = context.elevated()
|
||||
self._update(ctxt, self.compute_nodes[nodename])
|
||||
|
|
|
@ -2122,13 +2122,8 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
|
|||
self._wait_for_state_change(self.api, server, 'ACTIVE')
|
||||
|
||||
# There should no resource usage for flavor1 on the source host.
|
||||
# FIXME(mriedem): This is bug 1818914 where the source host continues
|
||||
# to report old_flavor usage until the update_available_resource
|
||||
# periodic task runs. Uncomment this once fixed.
|
||||
# self.assert_hypervisor_usage(
|
||||
# source_rp_uuid, no_usage, volume_backed=False)
|
||||
self.assert_hypervisor_usage(
|
||||
source_rp_uuid, self.flavor1, volume_backed=False)
|
||||
source_rp_uuid, no_usage, volume_backed=False)
|
||||
# And resource usage for flavor2 should still be on the target host.
|
||||
self.assert_hypervisor_usage(
|
||||
dest_rp_uuid, self.flavor2, volume_backed=False)
|
||||
|
|
|
@ -7175,7 +7175,15 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
|
|||
def do_confirm_resize(mock_save, mock_drop, mock_delete,
|
||||
mock_confirm, mock_nwapi, mock_notify,
|
||||
mock_mig_save, mock_mig_get, mock_inst_get):
|
||||
self._mock_rt()
|
||||
|
||||
def fake_drop_move_claim(*args, **kwargs):
|
||||
# RT.drop_move_claim must be called before
|
||||
# instance.drop_migration_context.
|
||||
mock_drop.assert_not_called()
|
||||
|
||||
mock_rt = self._mock_rt()
|
||||
# Enforce order of drop_move_claim/drop_migration_context calls.
|
||||
mock_rt.drop_move_claim.side_effect = fake_drop_move_claim
|
||||
self.instance.migration_context = objects.MigrationContext(
|
||||
new_pci_devices=None,
|
||||
old_pci_devices=None)
|
||||
|
|
|
@ -2490,23 +2490,37 @@ class TestResize(BaseTestCase):
|
|||
|
||||
instance = _INSTANCE_FIXTURES[0].obj_clone()
|
||||
instance.task_state = task_states.RESIZE_MIGRATING
|
||||
instance.flavor = _INSTANCE_TYPE_OBJ_FIXTURES[2]
|
||||
instance.new_flavor = _INSTANCE_TYPE_OBJ_FIXTURES[2]
|
||||
instance.migration_context = objects.MigrationContext()
|
||||
instance.migration_context.new_pci_devices = objects.PciDeviceList(
|
||||
objects=pci_devs)
|
||||
|
||||
self.rt.tracked_instances = set([instance.uuid])
|
||||
# When reverting a resize and dropping the move claim, the
|
||||
# destination compute calls drop_move_claim to drop the new_flavor
|
||||
# usage and the instance should be in tracked_migrations from when
|
||||
# the resize_claim was made on the dest during prep_resize.
|
||||
self.rt.tracked_migrations = {
|
||||
instance.uuid: objects.Migration(migration_type='resize')}
|
||||
|
||||
# not using mock.sentinel.ctx because drop_move_claim calls elevated
|
||||
ctx = mock.MagicMock()
|
||||
|
||||
with test.nested(
|
||||
mock.patch.object(self.rt, '_update'),
|
||||
mock.patch.object(self.rt.pci_tracker, 'free_device')
|
||||
) as (update_mock, mock_pci_free_device):
|
||||
mock.patch.object(self.rt, '_update'),
|
||||
mock.patch.object(self.rt.pci_tracker, 'free_device'),
|
||||
mock.patch.object(self.rt, '_get_usage_dict'),
|
||||
mock.patch.object(self.rt, '_update_usage')
|
||||
) as (
|
||||
update_mock, mock_pci_free_device, mock_get_usage,
|
||||
mock_update_usage,
|
||||
):
|
||||
self.rt.drop_move_claim(ctx, instance, _NODENAME)
|
||||
mock_pci_free_device.assert_called_once_with(
|
||||
pci_dev, mock.ANY)
|
||||
mock_get_usage.assert_called_once_with(
|
||||
instance.new_flavor, instance, numa_topology=None)
|
||||
mock_update_usage.assert_called_once_with(
|
||||
mock_get_usage.return_value, _NODENAME, sign=-1)
|
||||
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance',
|
||||
return_value=False)
|
||||
|
|
Loading…
Reference in New Issue