From e279ac404673784f33610cad55288e2e60649f88 Mon Sep 17 00:00:00 2001
From: Matt Riedemann <mriedem.os@gmail.com>
Date: Tue, 31 Jul 2018 17:26:47 -0400
Subject: [PATCH] Make ResourceTracker.stats node-specific

As of change I6827137f35c0cb4f9fc4c6f753d9a035326ed01b in
Ocata, the ResourceTracker manages multiple compute nodes
via its "compute_nodes" variable, but the "stats" variable
was still being shared across all nodes, which leads to
leaking stats across nodes in an ironic deployment where
a single nova-compute service host is managing multiple
ironic instances (nodes).

This change makes ResourceTracker.stats node-specific
which fixes the ironic leak but also allows us to remove
the stats deepcopy while iterating over instances which
should improve performance for single-node deployments with
potentially a large number of instances, i.e. vCenter.

Conflicts:
      nova/compute/manager.py

NOTE(mriedem): The conflict was due to not having change
Iae904afb6cb4fcea8bb27741d774ffbe986a5fb4 in Pike.

NOTE(mriedem): The check for node is None was moved from
_do_build_and_run_instance to build_and_run_instance because
the functional tests are using the ChanceScheduler since we
don't have change I12de2e195022593ea2a3e2894f2c3b5226930d4f
and the ChanceScheduler does not return the nodename during
select_destinations so the compute has to get it from the
driver.

Change-Id: I0b9e5b711878fa47ba90e43c0b41437b57cf8ef6
Closes-Bug: #1784705
Closes-Bug: #1777422
(cherry picked from commit b5b7d86bb04f92d21cf954cd6b3463c9fcc637e6)
(cherry picked from commit 7d99f5753f992bd4b58b0aaa7f83f71fb044776f)
---
 nova/compute/manager.py                       | 22 ++++++------
 nova/compute/resource_tracker.py              | 35 +++++++++++++------
 .../compute/test_resource_tracker.py          | 14 +++-----
 3 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index cec2874a5709..88e0f6bc9bad 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -1708,16 +1708,16 @@ class ComputeManager(manager.Manager):
 
         return block_device_info
 
-    def _build_failed(self):
+    def _build_failed(self, node):
         if CONF.compute.consecutive_build_service_disable_threshold:
             rt = self._get_resource_tracker()
             # NOTE(danms): Update our counter, but wait for the next
             # update_available_resource() periodic to flush it to the DB
-            rt.stats.build_failed()
+            rt.build_failed(node)
 
-    def _build_succeeded(self):
+    def _build_succeeded(self, node):
         rt = self._get_resource_tracker()
-        rt.stats.build_succeeded()
+        rt.build_succeeded(node)
 
     @wrap_exception()
     @reverts_task_state
@@ -1728,6 +1728,11 @@ class ComputeManager(manager.Manager):
                      security_groups=None, block_device_mapping=None,
                      node=None, limits=None):
 
+        if node is None:
+            node = self.driver.get_available_nodes(refresh=True)[0]
+            LOG.debug('No node specified, defaulting to %s', node,
+                      instance=instance)
+
         @utils.synchronized(instance.uuid)
         def _locked_do_build_and_run_instance(*args, **kwargs):
             # NOTE(danms): We grab the semaphore with the instance uuid
@@ -1766,9 +1771,9 @@ class ComputeManager(manager.Manager):
                         rt.reportclient.delete_allocation_for_instance(
                             instance.uuid)
 
-                        self._build_failed()
+                        self._build_failed(node)
                     else:
-                        self._build_succeeded()
+                        self._build_succeeded(node)
 
         # NOTE(danms): We spawn here to return the RPC worker thread back to
         # the pool. Since what follows could take a really long time, we don't
@@ -1827,11 +1832,6 @@ class ComputeManager(manager.Manager):
         if limits is None:
             limits = {}
 
-        if node is None:
-            node = self.driver.get_available_nodes(refresh=True)[0]
-            LOG.debug('No node specified, defaulting to %s', node,
-                      instance=instance)
-
         try:
             with timeutils.StopWatch() as timer:
                 self._build_and_run_instance(context, instance, image,
diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py
index f5c5f3ba6421..5d946333a412 100644
--- a/nova/compute/resource_tracker.py
+++ b/nova/compute/resource_tracker.py
@@ -26,7 +26,7 @@ from oslo_serialization import jsonutils
 
 from nova.compute import claims
 from nova.compute import monitors
-from nova.compute import stats
+from nova.compute import stats as compute_stats
 from nova.compute import task_states
 from nova.compute import utils as compute_utils
 from nova.compute import vm_states
@@ -136,7 +136,8 @@ class ResourceTracker(object):
         self.pci_tracker = None
         # Dict of objects.ComputeNode objects, keyed by nodename
         self.compute_nodes = {}
-        self.stats = stats.Stats()
+        # Dict of Stats objects, keyed by nodename
+        self.stats = collections.defaultdict(compute_stats.Stats)
         self.tracked_instances = {}
         self.tracked_migrations = {}
         monitor_handler = monitors.MonitorHandler(self)
@@ -627,16 +628,18 @@ class ResourceTracker(object):
 
     def _copy_resources(self, compute_node, resources):
         """Copy resource values to supplied compute_node."""
+        nodename = resources['hypervisor_hostname']
+        stats = self.stats[nodename]
         # purge old stats and init with anything passed in by the driver
         # NOTE(danms): Preserve 'failed_builds' across the stats clearing,
         # as that is not part of resources
         # TODO(danms): Stop doing this when we get a column to store this
         # directly
-        prev_failed_builds = self.stats.get('failed_builds', 0)
-        self.stats.clear()
-        self.stats['failed_builds'] = prev_failed_builds
-        self.stats.digest_stats(resources.get('stats'))
-        compute_node.stats = copy.deepcopy(self.stats)
+        prev_failed_builds = stats.get('failed_builds', 0)
+        stats.clear()
+        stats['failed_builds'] = prev_failed_builds
+        stats.digest_stats(resources.get('stats'))
+        compute_node.stats = stats
 
         # update the allocation ratios for the related ComputeNode object
         compute_node.ram_allocation_ratio = self.ram_allocation_ratio
@@ -920,7 +923,8 @@ class ResourceTracker(object):
         cn.free_ram_mb = cn.memory_mb - cn.memory_mb_used
         cn.free_disk_gb = cn.local_gb - cn.local_gb_used
 
-        cn.running_vms = self.stats.num_instances
+        stats = self.stats[nodename]
+        cn.running_vms = stats.num_instances
 
         # Calculate the numa usage
         free = sign == -1
@@ -1082,8 +1086,9 @@ class ResourceTracker(object):
             sign = -1
 
         cn = self.compute_nodes[nodename]
-        self.stats.update_stats_for_instance(instance, is_removed_instance)
-        cn.stats = copy.deepcopy(self.stats)
+        stats = self.stats[nodename]
+        stats.update_stats_for_instance(instance, is_removed_instance)
+        cn.stats = stats
 
         # if it's a new or deleted instance:
         if is_new_instance or is_removed_instance:
@@ -1099,7 +1104,7 @@ class ResourceTracker(object):
             self._update_usage(self._get_usage_dict(instance), nodename,
                                sign=sign)
 
-        cn.current_workload = self.stats.calculate_workload()
+        cn.current_workload = stats.calculate_workload()
         if self.pci_tracker:
             obj = self.pci_tracker.stats.to_device_pools_obj()
             cn.pci_device_pools = obj
@@ -1428,3 +1433,11 @@ class ResourceTracker(object):
             if key in updates:
                 usage[key] = updates[key]
         return usage
+
+    def build_failed(self, nodename):
+        """Increments the failed_builds stats for the given node."""
+        self.stats[nodename].build_failed()
+
+    def build_succeeded(self, nodename):
+        """Resets the failed_builds stats for the given node."""
+        self.stats[nodename].build_succeeded()
diff --git a/nova/tests/functional/compute/test_resource_tracker.py b/nova/tests/functional/compute/test_resource_tracker.py
index e205bb7b4ee8..d156feb26af3 100644
--- a/nova/tests/functional/compute/test_resource_tracker.py
+++ b/nova/tests/functional/compute/test_resource_tracker.py
@@ -459,19 +459,14 @@ class IronicResourceTrackerTest(test.TestCase):
             self.rt.update_available_resource(self.ctx, nodename)
 
         self.assertEqual(3, len(self.rt.compute_nodes))
+        self.assertEqual(3, len(self.rt.stats))
 
-        def _assert_stats(bad_node=None):
+        def _assert_stats():
             # Make sure each compute node has a unique set of stats and
             # they don't accumulate across nodes.
             for _cn in self.rt.compute_nodes.values():
                 node_stats_key = 'node:%s' % _cn.hypervisor_hostname
-                if bad_node == _cn.hypervisor_hostname:
-                    # FIXME(mriedem): This is bug 1784705 where the
-                    # compute node has lost its stats and is getting
-                    # stats for a different node.
-                    self.assertNotIn(node_stats_key, _cn.stats)
-                else:
-                    self.assertIn(node_stats_key, _cn.stats)
+                self.assertIn(node_stats_key, _cn.stats)
                 node_stat_count = 0
                 for stat in _cn.stats:
                     if stat.startswith('node:'):
@@ -485,5 +480,4 @@ class IronicResourceTrackerTest(test.TestCase):
         cn1_nodename = cn1_obj.hypervisor_hostname
         inst = self.instances[uuids.instance1]
         with self.rt.instance_claim(self.ctx, inst, cn1_nodename):
-            # FIXME(mriedem): Remove bad_node once bug 1784705 is fixed.
-            _assert_stats(bad_node=cn1_nodename)
+            _assert_stats()