From d4179e3e96b3f3820b1055624ca81c441b94332e Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Tue, 31 Jul 2018 16:00:28 -0400 Subject: [PATCH] Add recreate test for RT.stats bug 1784705 With change I6827137f35c0cb4f9fc4c6f753d9a035326ed01b in Ocata, we changed the ComputeManager to manage a single ResourceTracker and that single ResourceTracker will manage multiple compute nodes. The only time a single nova-compute service hosts multiple compute nodes is for ironic where there is a compute node per instance. The problem is the ResourceTracker.stats variable, unlike the ResourceTracker.compute_nodes variable, is not node-specific so it's possible for node stats to leak across nodes based on how the stats are used (and copied). This change adds a functional recreate test to show the issue before it's fixed. The fixture setup had to be tweaked a bit to avoid modifying class variables by reference between test cases. Conflicts: nova/tests/functional/compute/test_resource_tracker.py NOTE(mriedem): The conflict is due to this test module not existing in Ocata. It was introduced in Pike with change I59be1cbedc99dcbb0ccde089a9f4737305176324 and changes were made to it over time. In this backport, the basics needed for the one test case specific to this patch are added and everything else, like the placement stuff, is removed. Change-Id: Icc5f615baa1042347ec1699eb84ba0670445b995 Related-Bug: #1784705 (cherry picked from commit fc05626d43571733da0803df0fd9a7c69766b8fd) (cherry picked from commit cc8167a1914a70ea7b336856285df2571a754ca0) (cherry picked from commit dbce613a9aede3fc5463a2d383b355ffb8cda73a) --- nova/tests/functional/compute/__init__.py | 0 .../compute/test_resource_tracker.py | 253 ++++++++++++++++++ 2 files changed, 253 insertions(+) create mode 100644 nova/tests/functional/compute/__init__.py create mode 100644 nova/tests/functional/compute/test_resource_tracker.py diff --git a/nova/tests/functional/compute/__init__.py b/nova/tests/functional/compute/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/nova/tests/functional/compute/test_resource_tracker.py b/nova/tests/functional/compute/test_resource_tracker.py new file mode 100644 index 000000000000..31e1a011c73e --- /dev/null +++ b/nova/tests/functional/compute/test_resource_tracker.py @@ -0,0 +1,253 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import mock + +from nova.compute import power_state +from nova.compute import resource_tracker +from nova.compute import task_states +from nova.compute import vm_states +from nova import conf +from nova import context +from nova import objects +from nova.objects import fields +from nova import test +from nova.tests import fixtures as nova_fixtures +from nova.tests.functional.api.openstack.placement import test_report_client +from nova.tests import uuidsentinel as uuids + +CONF = conf.CONF +VCPU = fields.ResourceClass.VCPU +MEMORY_MB = fields.ResourceClass.MEMORY_MB +DISK_GB = fields.ResourceClass.DISK_GB +COMPUTE_HOST = 'compute-host' + + +class IronicResourceTrackerTest(test.TestCase): + """Functional tests for the resource tracker managing multiple nodes like + in the case of an ironic deployment. + """ + + FLAVOR_FIXTURES = { + 'CUSTOM_SMALL_IRON': objects.Flavor( + name='CUSTOM_SMALL_IRON', + flavorid=42, + vcpus=4, + memory_mb=4096, + root_gb=1024, + swap=0, + ephemeral_gb=0, + extra_specs={}, + ), + 'CUSTOM_BIG_IRON': objects.Flavor( + name='CUSTOM_BIG_IRON', + flavorid=43, + vcpus=16, + memory_mb=65536, + root_gb=1024, + swap=0, + ephemeral_gb=0, + extra_specs={}, + ), + } + + COMPUTE_NODE_FIXTURES = { + uuids.cn1: objects.ComputeNode( + uuid=uuids.cn1, + hypervisor_hostname='cn1', + hypervisor_type='ironic', + hypervisor_version=0, + cpu_info="", + host=COMPUTE_HOST, + vcpus=4, + vcpus_used=0, + cpu_allocation_ratio=1.0, + memory_mb=4096, + memory_mb_used=0, + ram_allocation_ratio=1.0, + local_gb=1024, + local_gb_used=0, + disk_allocation_ratio=1.0, + ), + uuids.cn2: objects.ComputeNode( + uuid=uuids.cn2, + hypervisor_hostname='cn2', + hypervisor_type='ironic', + hypervisor_version=0, + cpu_info="", + host=COMPUTE_HOST, + vcpus=4, + vcpus_used=0, + cpu_allocation_ratio=1.0, + memory_mb=4096, + memory_mb_used=0, + ram_allocation_ratio=1.0, + local_gb=1024, + local_gb_used=0, + disk_allocation_ratio=1.0, + ), + uuids.cn3: objects.ComputeNode( + uuid=uuids.cn3, + hypervisor_hostname='cn3', + hypervisor_type='ironic', + hypervisor_version=0, + cpu_info="", + host=COMPUTE_HOST, + vcpus=16, + vcpus_used=0, + cpu_allocation_ratio=1.0, + memory_mb=65536, + memory_mb_used=0, + ram_allocation_ratio=1.0, + local_gb=2048, + local_gb_used=0, + disk_allocation_ratio=1.0, + ), + } + + INSTANCE_FIXTURES = { + uuids.instance1: objects.Instance( + uuid=uuids.instance1, + flavor=FLAVOR_FIXTURES['CUSTOM_SMALL_IRON'], + vm_state=vm_states.BUILDING, + task_state=task_states.SPAWNING, + power_state=power_state.RUNNING, + project_id='project', + user_id=uuids.user, + ), + } + + def setUp(self): + super(IronicResourceTrackerTest, self).setUp() + self.ctx = context.RequestContext('user', 'project') + self.report_client = test_report_client.NoAuthReportClient() + + driver = mock.MagicMock(autospec='nova.virt.driver.ComputeDriver') + driver.node_is_available.return_value = True + self.driver_mock = driver + self.rt = resource_tracker.ResourceTracker(COMPUTE_HOST, driver) + self.rt.scheduler_client.reportclient = self.report_client + self.rt.reportclient = self.report_client + self.instances = self.create_fixtures() + + def create_fixtures(self): + for flavor in self.FLAVOR_FIXTURES.values(): + # Clone the object so the class variable isn't + # modified by reference. + flavor = flavor.obj_clone() + flavor._context = self.ctx + flavor.obj_set_defaults() + flavor.create() + + # We create some compute node records in the Nova cell DB to simulate + # data before adding integration for Ironic baremetal nodes with the + # placement API... + for cn in self.COMPUTE_NODE_FIXTURES.values(): + # Clone the object so the class variable isn't + # modified by reference. + cn = cn.obj_clone() + cn._context = self.ctx + cn.obj_set_defaults() + cn.create() + + instances = {} + for instance in self.INSTANCE_FIXTURES.values(): + # Clone the object so the class variable isn't + # modified by reference. + instance = instance.obj_clone() + instance._context = self.ctx + instance.obj_set_defaults() + instance.create() + instances[instance.uuid] = instance + return instances + + @mock.patch('nova.objects.compute_node.ComputeNode.save', new=mock.Mock()) + def test_node_stats_isolation(self): + """Regression test for bug 1784705 introduced in Ocata. + + The ResourceTracker.stats field is meant to track per-node stats + so this test registers three compute nodes with a single RT where + each node has unique stats, and then makes sure that after updating + usage for an instance, the nodes still have their unique stats and + nothing is leaked from node to node. + """ + self.useFixture(nova_fixtures.PlacementFixture()) + # Before the resource tracker is "initialized", we shouldn't have + # any compute nodes or stats in the RT's cache... + self.assertEqual(0, len(self.rt.compute_nodes)) + self.assertEqual(0, len(self.rt.stats)) + + # Now "initialize" the resource tracker. This is what + # nova.compute.manager.ComputeManager does when "initializing" the + # nova-compute service. Do this in a predictable order so cn1 is + # first and cn3 is last. + for cn in sorted(self.COMPUTE_NODE_FIXTURES.values(), + key=lambda _cn: _cn.hypervisor_hostname): + nodename = cn.hypervisor_hostname + # Fake that each compute node has unique extra specs stats and + # the RT makes sure those are unique per node. + stats = {'node:%s' % nodename: nodename} + self.driver_mock.get_available_resource.return_value = { + 'hypervisor_hostname': nodename, + 'hypervisor_type': 'ironic', + 'hypervisor_version': 0, + 'vcpus': cn.vcpus, + 'vcpus_used': cn.vcpus_used, + 'memory_mb': cn.memory_mb, + 'memory_mb_used': cn.memory_mb_used, + 'local_gb': cn.local_gb, + 'local_gb_used': cn.local_gb_used, + 'numa_topology': None, + 'resource_class': None, # Act like admin hasn't set yet... + 'stats': stats, + } + self.driver_mock.get_inventory.return_value = { + 'CUSTOM_SMALL_IRON': { + 'total': 1, + 'reserved': 0, + 'min_unit': 1, + 'max_unit': 1, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + } + self.rt.update_available_resource(self.ctx, nodename) + + self.assertEqual(3, len(self.rt.compute_nodes)) + + def _assert_stats(bad_node=None): + # Make sure each compute node has a unique set of stats and + # they don't accumulate across nodes. + for _cn in self.rt.compute_nodes.values(): + node_stats_key = 'node:%s' % _cn.hypervisor_hostname + if bad_node == _cn.hypervisor_hostname: + # FIXME(mriedem): This is bug 1784705 where the + # compute node has lost its stats and is getting + # stats for a different node. + self.assertNotIn(node_stats_key, _cn.stats) + else: + self.assertIn(node_stats_key, _cn.stats) + node_stat_count = 0 + for stat in _cn.stats: + if stat.startswith('node:'): + node_stat_count += 1 + self.assertEqual(1, node_stat_count, _cn.stats) + _assert_stats() + + # Now "spawn" an instance to the first compute node by calling the + # RT's instance_claim(). + cn1_obj = self.COMPUTE_NODE_FIXTURES[uuids.cn1] + cn1_nodename = cn1_obj.hypervisor_hostname + inst = self.instances[uuids.instance1] + with self.rt.instance_claim(self.ctx, inst, cn1_nodename): + # FIXME(mriedem): Remove bad_node once bug 1784705 is fixed. + _assert_stats(bad_node=cn1_nodename)