Add functional recreate test for bug 1818914

The confirm resize flow in the compute manager
runs on the source host. It calls RT.drop_move_claim
to drop resource usage from the source host for the
old flavor. The problem with drop_move_claim is it
only decrements the old flavor from the reported usage
if the instance is in RT.tracked_migrations, which will
only be there on the source host if the update_available_resource
periodic task runs before the resize is confirmed, otherwise
the instance is still just tracked in RT.tracked_instances on
the source host. This leaves the source compute incorrectly
reporting resource usage for the old flavor until the next
periodic runs, which could be a large window if resizes are
configured to automatically confirm, e.g. resize_confirm_window=1,
and the periodic interval is big, e.g. update_resources_interval=600.

This change adds a functional recreate test for the bug which will
be updated in the change that fixes the bug.

Change-Id: I4aac187283c2f341b5c2712be85f722156e14f63
Related-Bug: #1818914
Related-Bug: #1498126
This commit is contained in:
Matt Riedemann 2019-03-06 18:46:22 -05:00
parent 2e5d0eda84
commit 54877e06f1
2 changed files with 94 additions and 0 deletions

View File

@ -27,6 +27,7 @@ import os_traits
from oslo_log import log as logging
from oslo_utils.fixture import uuidsentinel as uuids
from nova.compute import utils as compute_utils
import nova.conf
from nova import context
from nova.db import api as db
@ -868,3 +869,32 @@ class ProviderUsageBaseTestCase(test.TestCase, InstanceHelperMixin):
self.assertEqual(2, len(allocations))
self.assertFlavorMatchesUsage(source_root_rp_uuid, flavor)
self.assertFlavorMatchesUsage(dest_root_rp_uuid, flavor)
def assert_hypervisor_usage(self, compute_node_uuid, flavor,
volume_backed):
"""Asserts the given hypervisor's resource usage matches the
given flavor (assumes a single instance on the hypervisor).
:param compute_node_uuid: UUID of the ComputeNode to check.
:param flavor: "flavor" entry dict from from GET /flavors/{flavor_id}
:param volume_backed: True if the flavor is used with a volume-backed
server, False otherwise.
"""
# GET /os-hypervisors/{uuid} requires at least 2.53
with utils.temporary_mutation(self.admin_api, microversion='2.53'):
hypervisor = self.admin_api.api_get(
'/os-hypervisors/%s' % compute_node_uuid).body['hypervisor']
if volume_backed:
expected_disk_usage = 0
else:
expected_disk_usage = flavor['disk']
# Account for reserved_host_disk_mb.
expected_disk_usage += compute_utils.convert_mb_to_ceil_gb(
CONF.reserved_host_disk_mb)
self.assertEqual(expected_disk_usage, hypervisor['local_gb_used'])
# Account for reserved_host_memory_mb.
expected_ram_usage = CONF.reserved_host_memory_mb + flavor['ram']
self.assertEqual(expected_ram_usage, hypervisor['memory_mb_used'])
# Account for reserved_host_cpus.
expected_vcpu_usage = CONF.reserved_host_cpus + flavor['vcpus']
self.assertEqual(expected_vcpu_usage, hypervisor['vcpus_used'])

View File

@ -2712,6 +2712,70 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
self._delete_and_check_allocations(server)
def test_resize_confirm_assert_hypervisor_usage_no_periodics(self):
"""Resize confirm test for bug 1818914 to make sure the tracked
resource usage in the os-hypervisors API (not placement) is as
expected during a confirmed resize. This intentionally does not
use _test_resize_confirm in order to avoid running periodics.
"""
# There should be no usage from a server on either hypervisor.
source_rp_uuid = self._get_provider_uuid_by_host('host1')
dest_rp_uuid = self._get_provider_uuid_by_host('host2')
no_usage = {'vcpus': 0, 'disk': 0, 'ram': 0}
for rp_uuid in (source_rp_uuid, dest_rp_uuid):
self.assert_hypervisor_usage(
rp_uuid, no_usage, volume_backed=False)
# Create the server and wait for it to be ACTIVE.
server = self._boot_and_check_allocations(self.flavor1, 'host1')
# There should be resource usage for flavor1 on the source host.
self.assert_hypervisor_usage(
source_rp_uuid, self.flavor1, volume_backed=False)
# And still no usage on the dest host.
self.assert_hypervisor_usage(
dest_rp_uuid, no_usage, volume_backed=False)
# Resize the server to flavor2 and wait for VERIFY_RESIZE.
self.flags(allow_resize_to_same_host=False)
resize_req = {
'resize': {
'flavorRef': self.flavor2['id']
}
}
self.api.post_server_action(server['id'], resize_req)
self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE')
# There should be resource usage for flavor1 on the source host.
self.assert_hypervisor_usage(
source_rp_uuid, self.flavor1, volume_backed=False)
# And resource usage for flavor2 on the target host.
self.assert_hypervisor_usage(
dest_rp_uuid, self.flavor2, volume_backed=False)
# Now confirm the resize and check hypervisor usage again.
self.api.post_server_action(server['id'], {'confirmResize': None})
self._wait_for_state_change(self.api, server, 'ACTIVE')
# There should no resource usage for flavor1 on the source host.
# FIXME(mriedem): This is bug 1818914 where the source host continues
# to report old_flavor usage until the update_available_resource
# periodic task runs. Uncomment this once fixed.
# self.assert_hypervisor_usage(
# source_rp_uuid, no_usage, volume_backed=False)
self.assert_hypervisor_usage(
source_rp_uuid, self.flavor1, volume_backed=False)
# And resource usage for flavor2 should still be on the target host.
self.assert_hypervisor_usage(
dest_rp_uuid, self.flavor2, volume_backed=False)
# Run periodics and make sure usage is still as expected.
self._run_periodics()
self.assert_hypervisor_usage(
source_rp_uuid, no_usage, volume_backed=False)
self.assert_hypervisor_usage(
dest_rp_uuid, self.flavor2, volume_backed=False)
def _wait_for_notification_event_type(self, event_type, max_retries=50):
retry_counter = 0
while True: