SIGHUP n-cpu to clear provider tree cache

An earlier change [1] allowed [compute]resource_provider_association_refresh to be set to zero to disable the resource tracker's periodic refresh of its local copy of provider traits and aggregates. To allow for out-of-band changes to placement (e.g. via the CLI) to be picked up by the resource tracker in this configuration (or a configuration where the timer is set to a high value) this change clears the provider tree cache when SIGHUP is sent to the compute service. The next periodic will repopulate it afresh from placement. [1] Iec33e656491848b26686fbf6fb5db4a4c94b9ea8 Change-Id: I65a7ee565ca5b3ec6c33a2fd9e39d461f7d90ed2
2018-11-05 14:31:35 -06:00 · 2018-11-05 14:31:35 -06:00 · bbc2fcb8fb
parent 8c318d0fb2
commit bbc2fcb8fb
5 changed files with 29 additions and 0 deletions
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@ -548,6 +548,7 @@ class ComputeManager(manager.Manager):
        LOG.info('Reloading compute RPC API')
        compute_rpcapi.LAST_VERSION = None
        self.compute_rpcapi = compute_rpcapi.ComputeAPI()
+        self._get_resource_tracker().reportclient.clear_provider_cache()

    def _get_resource_tracker(self):
        if not self._resource_tracker:
--- a/nova/conf/compute.py
+++ b/nova/conf/compute.py
@ -652,6 +652,8 @@ Related options:
        default=300,
        min=0,
        mutable=True,
+        # TODO(efried): Provide more/better explanation of what this option is
+        # all about. Reference bug(s). Unless we're just going to remove it.
        help="""
 Interval for updating nova-compute-side cache of the compute node resource
 provider's aggregates and traits info.
@ -662,6 +664,9 @@ node.

 A value of zero disables cache refresh completely.

+The cache can be cleared manually at any time by sending SIGHUP to the compute
+process, causing it to be repopulated the next time the data is accessed.
+
 Possible values:

 * Any positive integer in seconds, or zero to disable refresh.
--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@ -274,6 +274,11 @@ class SchedulerReportClient(object):
        # NOTE(danms): Keep track of how naggy we've been
        self._warn_count = 0

+    def clear_provider_cache(self):
+        LOG.info("Clearing the report client's provider cache.")
+        self._provider_tree = provider_tree.ProviderTree()
+        self._association_refresh_time = {}
+
    @utils.synchronized(PLACEMENT_CLIENT_SEMAPHORE)
    def _create_client(self):
        """Create the HTTP session accessing the placement service."""
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@ -4744,6 +4744,20 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
            mock_rpc.assert_called_once_with()
            self.assertIsNot(orig_rpc, self.compute.compute_rpcapi)

+    def test_reset_clears_provider_cache(self):
+        # Seed the cache so we can tell we've cleared it
+        reportclient = self.compute._get_resource_tracker().reportclient
+        ptree = reportclient._provider_tree
+        ptree.new_root('foo', uuids.foo)
+        self.assertEqual([uuids.foo], ptree.get_provider_uuids())
+        times = reportclient._association_refresh_time
+        times[uuids.foo] = time.time()
+        self.compute.reset()
+        ptree = reportclient._provider_tree
+        self.assertEqual([], ptree.get_provider_uuids())
+        times = reportclient._association_refresh_time
+        self.assertEqual({}, times)
+
    @mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid')
    @mock.patch('nova.compute.manager.ComputeManager._delete_instance')
    def test_terminate_instance_no_bdm_volume_id(self, mock_delete_instance,
--- a/releasenotes/notes/disable-rt-cache-refresh-9f6633e585516760.yaml
+++ b/releasenotes/notes/disable-rt-cache-refresh-9f6633e585516760.yaml
@ -7,4 +7,8 @@ features:
    1767309`_ allowing more aggressive reduction in the amount of traffic to
    the placement service.

+    The cache can be cleared manually at any time by sending SIGHUP to the
+    compute process. This will cause the cache to be repopulated the next time
+    the data is accessed.
+
    .. _`bug 1767309`: https://bugs.launchpad.net/nova/+bug/1767309