Add recreate test for RT.stats bug 1784705
With change I6827137f35c0cb4f9fc4c6f753d9a035326ed01b in
Ocata, we changed the ComputeManager to manage a single
ResourceTracker and that single ResourceTracker will
manage multiple compute nodes. The only time a single
nova-compute service hosts multiple compute nodes is for
ironic where there is a compute node per instance. The
problem is the ResourceTracker.stats variable, unlike the
ResourceTracker.compute_nodes variable, is not node-specific
so it's possible for node stats to leak across nodes based
on how the stats are used (and copied).
This change adds a functional recreate test to show the issue
before it's fixed. The fixture setup had to be tweaked a
bit to avoid modifying class variables by reference between
test cases.
Conflicts:
nova/tests/functional/compute/test_resource_tracker.py
NOTE(mriedem): Conflicts are due to not having change
I433700e833f97c0fec946dafc2cdda9d49e1100b or change
I075785abcd4f4a8e180959daeadf215b9cd175c8 in Queens.
Change-Id: Icc5f615baa1042347ec1699eb84ba0670445b995
Related-Bug: #1784705
(cherry picked from commit fc05626d43
)
This commit is contained in:
parent
f07dd4436a
commit
cc8167a191
|
@ -23,6 +23,7 @@ from nova import context
|
|||
from nova import objects
|
||||
from nova.objects import fields
|
||||
from nova import test
|
||||
from nova.tests import fixtures as nova_fixtures
|
||||
from nova.tests.functional.api.openstack.placement import test_report_client
|
||||
from nova.tests import uuidsentinel as uuids
|
||||
|
||||
|
@ -150,10 +151,13 @@ class IronicResourceTrackerTest(test.TestCase):
|
|||
self.rt.scheduler_client.reportclient = self.report_client
|
||||
self.rt.reportclient = self.report_client
|
||||
self.url = 'http://localhost/placement'
|
||||
self.create_fixtures()
|
||||
self.instances = self.create_fixtures()
|
||||
|
||||
def create_fixtures(self):
|
||||
for flavor in self.FLAVOR_FIXTURES.values():
|
||||
# Clone the object so the class variable isn't
|
||||
# modified by reference.
|
||||
flavor = flavor.obj_clone()
|
||||
flavor._context = self.ctx
|
||||
flavor.obj_set_defaults()
|
||||
flavor.create()
|
||||
|
@ -162,14 +166,23 @@ class IronicResourceTrackerTest(test.TestCase):
|
|||
# data before adding integration for Ironic baremetal nodes with the
|
||||
# placement API...
|
||||
for cn in self.COMPUTE_NODE_FIXTURES.values():
|
||||
# Clone the object so the class variable isn't
|
||||
# modified by reference.
|
||||
cn = cn.obj_clone()
|
||||
cn._context = self.ctx
|
||||
cn.obj_set_defaults()
|
||||
cn.create()
|
||||
|
||||
instances = {}
|
||||
for instance in self.INSTANCE_FIXTURES.values():
|
||||
# Clone the object so the class variable isn't
|
||||
# modified by reference.
|
||||
instance = instance.obj_clone()
|
||||
instance._context = self.ctx
|
||||
instance.obj_set_defaults()
|
||||
instance.create()
|
||||
instances[instance.uuid] = instance
|
||||
return instances
|
||||
|
||||
def placement_get_inventory(self, rp_uuid):
|
||||
url = '/resource_providers/%s/inventories' % rp_uuid
|
||||
|
@ -293,7 +306,7 @@ class IronicResourceTrackerTest(test.TestCase):
|
|||
# RT's instance_claim().
|
||||
cn1_obj = self.COMPUTE_NODE_FIXTURES[uuids.cn1]
|
||||
cn1_nodename = cn1_obj.hypervisor_hostname
|
||||
inst = self.INSTANCE_FIXTURES[uuids.instance1]
|
||||
inst = self.instances[uuids.instance1]
|
||||
# Since we're pike, the scheduler would have created our
|
||||
# allocation for us. So, we can use our old update routine
|
||||
# here to mimic that before we go do the compute RT claim,
|
||||
|
@ -391,3 +404,87 @@ class IronicResourceTrackerTest(test.TestCase):
|
|||
# request a single amount of that custom resource class, we will
|
||||
# modify the allocation/claim to consume only the custom resource
|
||||
# class and not the VCPU, MEMORY_MB and DISK_GB.
|
||||
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance',
|
||||
new=mock.Mock(return_value=False))
|
||||
@mock.patch('nova.objects.compute_node.ComputeNode.save', new=mock.Mock())
|
||||
def test_node_stats_isolation(self):
|
||||
"""Regression test for bug 1784705 introduced in Ocata.
|
||||
|
||||
The ResourceTracker.stats field is meant to track per-node stats
|
||||
so this test registers three compute nodes with a single RT where
|
||||
each node has unique stats, and then makes sure that after updating
|
||||
usage for an instance, the nodes still have their unique stats and
|
||||
nothing is leaked from node to node.
|
||||
"""
|
||||
self.useFixture(nova_fixtures.PlacementFixture())
|
||||
# Before the resource tracker is "initialized", we shouldn't have
|
||||
# any compute nodes or stats in the RT's cache...
|
||||
self.assertEqual(0, len(self.rt.compute_nodes))
|
||||
self.assertEqual(0, len(self.rt.stats))
|
||||
|
||||
# Now "initialize" the resource tracker. This is what
|
||||
# nova.compute.manager.ComputeManager does when "initializing" the
|
||||
# nova-compute service. Do this in a predictable order so cn1 is
|
||||
# first and cn3 is last.
|
||||
for cn in sorted(self.COMPUTE_NODE_FIXTURES.values(),
|
||||
key=lambda _cn: _cn.hypervisor_hostname):
|
||||
nodename = cn.hypervisor_hostname
|
||||
# Fake that each compute node has unique extra specs stats and
|
||||
# the RT makes sure those are unique per node.
|
||||
stats = {'node:%s' % nodename: nodename}
|
||||
self.driver_mock.get_available_resource.return_value = {
|
||||
'hypervisor_hostname': nodename,
|
||||
'hypervisor_type': 'ironic',
|
||||
'hypervisor_version': 0,
|
||||
'vcpus': cn.vcpus,
|
||||
'vcpus_used': cn.vcpus_used,
|
||||
'memory_mb': cn.memory_mb,
|
||||
'memory_mb_used': cn.memory_mb_used,
|
||||
'local_gb': cn.local_gb,
|
||||
'local_gb_used': cn.local_gb_used,
|
||||
'numa_topology': None,
|
||||
'resource_class': None, # Act like admin hasn't set yet...
|
||||
'stats': stats,
|
||||
}
|
||||
self.driver_mock.get_inventory.return_value = {
|
||||
'CUSTOM_SMALL_IRON': {
|
||||
'total': 1,
|
||||
'reserved': 0,
|
||||
'min_unit': 1,
|
||||
'max_unit': 1,
|
||||
'step_size': 1,
|
||||
'allocation_ratio': 1.0,
|
||||
},
|
||||
}
|
||||
self.rt.update_available_resource(self.ctx, nodename)
|
||||
|
||||
self.assertEqual(3, len(self.rt.compute_nodes))
|
||||
|
||||
def _assert_stats(bad_node=None):
|
||||
# Make sure each compute node has a unique set of stats and
|
||||
# they don't accumulate across nodes.
|
||||
for _cn in self.rt.compute_nodes.values():
|
||||
node_stats_key = 'node:%s' % _cn.hypervisor_hostname
|
||||
if bad_node == _cn.hypervisor_hostname:
|
||||
# FIXME(mriedem): This is bug 1784705 where the
|
||||
# compute node has lost its stats and is getting
|
||||
# stats for a different node.
|
||||
self.assertNotIn(node_stats_key, _cn.stats)
|
||||
else:
|
||||
self.assertIn(node_stats_key, _cn.stats)
|
||||
node_stat_count = 0
|
||||
for stat in _cn.stats:
|
||||
if stat.startswith('node:'):
|
||||
node_stat_count += 1
|
||||
self.assertEqual(1, node_stat_count, _cn.stats)
|
||||
_assert_stats()
|
||||
|
||||
# Now "spawn" an instance to the first compute node by calling the
|
||||
# RT's instance_claim().
|
||||
cn1_obj = self.COMPUTE_NODE_FIXTURES[uuids.cn1]
|
||||
cn1_nodename = cn1_obj.hypervisor_hostname
|
||||
inst = self.instances[uuids.instance1]
|
||||
with self.rt.instance_claim(self.ctx, inst, cn1_nodename):
|
||||
# FIXME(mriedem): Remove bad_node once bug 1784705 is fixed.
|
||||
_assert_stats(bad_node=cn1_nodename)
|
||||
|
|
Loading…
Reference in New Issue