From afad847e4d52243f4899591aa0907e347b37b6cf Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Fri, 7 Apr 2023 07:44:34 -0700 Subject: [PATCH] Populate ComputeNode.service_id The ComputeNode object already has a service_id field that we stopped using a while ago. This moves us back to the point where we set it when creating new ComputeNode records, and also migrates existing records when they are loaded. The resource tracker is created before we may have created the service record, but is updated afterwards in the pre_start_hook(). So this adds a way for us to pass the service_ref to the resource tracker during that hook so that it is present before the first time we update all of our ComputeNode records. It also makes sure to pass the Service through from the actual Service manager instead of looking it up again to make sure we maintain the tight relationship and avoid any name-based ambiguity. Related to blueprint compute-object-ids Change-Id: I5e060d674b6145c9797c2251a2822106fc6d4a71 --- nova/compute/manager.py | 5 +- nova/compute/resource_tracker.py | 23 ++++++++ nova/manager.py | 4 +- nova/service.py | 5 +- nova/tests/unit/compute/test_compute.py | 6 +- nova/tests/unit/compute/test_compute_mgr.py | 4 +- .../unit/compute/test_resource_tracker.py | 57 +++++++++++++++++++ nova/tests/unit/test_service.py | 5 +- 8 files changed, 100 insertions(+), 9 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 5c42aa4d89a1..2e6f4db94837 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -675,6 +675,7 @@ class ComputeManager(manager.Manager): # NOTE(russellb) Load the driver last. It may call back into the # compute manager via the virtapi, so we want it to be fully # initialized before that happens. + self.service_ref = None self.driver = driver.load_compute_driver(self.virtapi, compute_driver) self.rt = resource_tracker.ResourceTracker( self.host, self.driver, reportclient=self.reportclient) @@ -1749,11 +1750,13 @@ class ComputeManager(manager.Manager): instance_uuid=migration.instance_uuid) self._waiting_live_migrations.clear() - def pre_start_hook(self): + def pre_start_hook(self, service_ref): """After the service is initialized, but before we fully bring the service up by listening on RPC queues, make sure to update our available resources (and indirectly our available nodes). """ + self.service_ref = service_ref + self.rt.set_service_ref(service_ref) self.update_available_resource(nova.context.get_admin_context(), startup=True) diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 9ee6670c176c..c266df97bd31 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -91,6 +91,7 @@ class ResourceTracker(object): """ def __init__(self, host, driver, reportclient=None): + self.service_ref = None self.host = host self.driver = driver self.pci_tracker = None @@ -125,6 +126,18 @@ class ResourceTracker(object): # smarter logging. self.absent_providers = set() + def set_service_ref(self, service_ref): + # NOTE(danms): Neither of these should ever happen, but sanity check + # just in case + if self.service_ref and self.service_ref.id != service_ref.id: + raise exception.ComputeServiceInUse( + 'Resource tracker re-initialized with a different service') + elif service_ref.host != self.host: + raise exception.ServiceNotUnique( + 'Resource tracker initialized with service that does ' + 'not match host') + self.service_ref = service_ref + @utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True) def instance_claim(self, context, instance, nodename, allocations, limits=None): @@ -704,6 +717,14 @@ class ResourceTracker(object): # to initialize if nodename in self.compute_nodes: cn = self.compute_nodes[nodename] + if 'service_id' not in cn or cn.service_id is None: + LOG.debug('Setting ComputeNode %s service_id to %i', + cn.uuid, self.service_ref.id) + cn.service_id = self.service_ref.id + elif cn.service_id != self.service_ref.id: + LOG.warning('Moving ComputeNode %s from service %i to %i', + cn.uuid, cn.service_id, self.service_ref.id) + cn.service_id = self.service_ref.id self._copy_resources(cn, resources) self._setup_pci_tracker(context, cn, resources) return False @@ -727,6 +748,7 @@ class ResourceTracker(object): LOG.info("ComputeNode %(name)s moving from %(old)s to %(new)s", {"name": nodename, "old": cn.host, "new": self.host}) cn.host = self.host + cn.service_id = self.service_ref.id self._update(context, cn) self.compute_nodes[nodename] = cn @@ -739,6 +761,7 @@ class ResourceTracker(object): # to be initialized with resource values. cn = objects.ComputeNode(context) cn.host = self.host + cn.service_id = self.service_ref.id self._copy_resources(cn, resources, initial=True) try: cn.create() diff --git a/nova/manager.py b/nova/manager.py index df0330536701..ab2414b63614 100644 --- a/nova/manager.py +++ b/nova/manager.py @@ -122,13 +122,15 @@ class Manager(PeriodicTasks, metaclass=ManagerMeta): """ pass - def pre_start_hook(self): + def pre_start_hook(self, service_ref): """Hook to provide the manager the ability to do additional start-up work before any RPC queues/consumers are created. This is called after other initialization has succeeded and a service record is created. Child classes should override this method. + + :param service_ref: The nova.objects.Service for this """ pass diff --git a/nova/service.py b/nova/service.py index bd3b49ae669a..ddc6ec156615 100644 --- a/nova/service.py +++ b/nova/service.py @@ -174,7 +174,7 @@ class Service(service.Service): self.service_ref = objects.Service.get_by_host_and_binary( ctxt, self.host, self.binary) - self.manager.pre_start_hook() + self.manager.pre_start_hook(self.service_ref) if self.backdoor_port is not None: self.manager.backdoor_port = self.backdoor_port @@ -443,9 +443,10 @@ class WSGIService(service.Service): service_ref = objects.Service.get_by_host_and_binary( ctxt, self.host, self.binary) + self.service_ref = service_ref if self.manager: self.manager.init_host() - self.manager.pre_start_hook() + self.manager.pre_start_hook(self.service_ref) if self.backdoor_port is not None: self.manager.backdoor_port = self.backdoor_port self.server.start() diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 36bcd368dc82..3d88a17e8be1 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -209,8 +209,10 @@ class BaseTestCase(test.TestCase): self.stub_out('nova.db.main.api.compute_node_delete', fake_compute_node_delete) - self.compute.update_available_resource( - context.get_admin_context()) + self.service = objects.Service(id=7, + host=self.compute.host) + # NOTE(danms): This calls update_available_resource() for us + self.compute.pre_start_hook(self.service) self.user_id = 'fake' self.project_id = 'fake' diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 73c9d32197ee..93998d1c003e 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -487,11 +487,13 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, """Very simple test just to make sure update_available_resource is called as expected. """ + fake_service = objects.Service(id=123, host=self.compute.host) with mock.patch.object( self.compute, 'update_available_resource') as update_res: - self.compute.pre_start_hook() + self.compute.pre_start_hook(fake_service) update_res.assert_called_once_with( get_admin_context.return_value, startup=True) + self.assertEqual(fake_service, self.compute.rt.service_ref) @mock.patch.object(objects.ComputeNodeList, 'get_all_by_host', side_effect=exception.NotFound) diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index 919dcb8334ed..47a0be592a6b 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -50,6 +50,7 @@ from nova.virt import driver _HOSTNAME = 'fake-host' _NODENAME = 'fake-node' +_SERVICE_ID = 123 CONF = cfg.CONF _VIRT_DRIVER_AVAIL_RESOURCES = { @@ -82,6 +83,7 @@ _COMPUTE_NODE_FIXTURES = [ hypervisor_type='fake', hypervisor_version=0, hypervisor_hostname=_NODENAME, + service_id=_SERVICE_ID, free_ram_mb=(_VIRT_DRIVER_AVAIL_RESOURCES['memory_mb'] - _VIRT_DRIVER_AVAIL_RESOURCES['memory_mb_used']), free_disk_gb=(_VIRT_DRIVER_AVAIL_RESOURCES['local_gb'] - @@ -505,6 +507,10 @@ def setup_rt(hostname, virt_resources=_VIRT_DRIVER_AVAIL_RESOURCES): return_value=report_client_mock), mock.patch('nova.rpc.get_notifier', return_value=notifier_mock)): rt = resource_tracker.ResourceTracker(hostname, vd) + + fake_service = objects.Service(id=_SERVICE_ID, host=hostname) + rt.set_service_ref(fake_service) + return (rt, query_client_mock, report_client_mock, vd) @@ -1424,6 +1430,7 @@ class TestInitComputeNode(BaseTestCase): hypervisor_hostname=resources['hypervisor_hostname'], # NOTE(sbauza): ResourceTracker adds host field host=_HOSTNAME, + service_id=_SERVICE_ID, # NOTE(sbauza): ResourceTracker adds CONF allocation ratios ram_allocation_ratio=CONF.initial_ram_allocation_ratio, cpu_allocation_ratio=CONF.initial_cpu_allocation_ratio, @@ -1441,6 +1448,8 @@ class TestInitComputeNode(BaseTestCase): uuids.compute_node_uuid) create_mock.assert_called_once_with() + self.assertEqual(obj_base.obj_to_primitive(expected_compute), + obj_base.obj_to_primitive(cn)) self.assertTrue(obj_base.obj_equal_prims(expected_compute, cn)) setup_pci.assert_called_once_with(mock.sentinel.ctx, cn, resources) self.assertFalse(update_mock.called) @@ -1568,6 +1577,23 @@ class TestInitComputeNode(BaseTestCase): mock.MagicMock, resources) + def test_init_compute_node_updates_service_id(self): + unset = object() + # Make sure that we can go from unset, none, or some other service_id + # to the one in our RT's service_ref + for iv in [unset, None, 121]: + compute = objects.ComputeNode(uuid=uuids.fakenode, id=7) + if iv is not unset: + compute.service_id = iv + self._setup_rt() + resources = {'hypervisor_hostname': 'fake-node', + 'uuid': uuids.fakenode} + self.rt.compute_nodes = {'fake-node': compute} + with mock.patch.object(self.rt, '_setup_pci_tracker'): + self.rt._init_compute_node(mock.MagicMock(), resources) + self.assertIn('service_id', compute) + self.assertEqual(self.rt.service_ref.id, compute.service_id) + @ddt.ddt class TestUpdateComputeNode(BaseTestCase): @@ -4284,6 +4310,37 @@ class ResourceTrackerTestCase(test.NoDBTestCase): self.assertRaises(AssertionError, _test_explict_unfair) self.assertRaises(AssertionError, _test_implicit_unfair) + def test_set_service_ref(self): + rt = resource_tracker.ResourceTracker( + _HOSTNAME, mock.sentinel.driver, mock.sentinel.reportclient) + + # The service ref should default to None and we should be able to set + # the service ref to anything from that state, as long as the host + # matches what the rt was initialized with + self.assertIsNone(rt.service_ref) + service = objects.Service(id=123, host=_HOSTNAME) + rt.set_service_ref(service) + self.assertEqual(service, rt.service_ref) + + # We should be able to set the service again with the same attributes + # without raising + service = service.obj_clone() + rt.set_service_ref(service) + + # A service with a different id is an error + service = objects.Service(id=456, host=_HOSTNAME) + self.assertRaises(exc.ComputeServiceInUse, + rt.set_service_ref, service) + + # A service with a different host is an error + service = objects.Service(id=123, host='foo') + self.assertRaises(exc.ServiceNotUnique, + rt.set_service_ref, service) + + # Make sure we haven't disturbed the stored ref with the above + self.assertEqual(123, rt.service_ref.id) + self.assertEqual(_HOSTNAME, rt.service_ref.host) + class ProviderConfigTestCases(BaseTestCase): def setUp(self): diff --git a/nova/tests/unit/test_service.py b/nova/tests/unit/test_service.py index acc1aeca7ffb..4d3104f79bec 100644 --- a/nova/tests/unit/test_service.py +++ b/nova/tests/unit/test_service.py @@ -134,7 +134,8 @@ class ServiceTestCase(test.NoDBTestCase): mock_create.assert_called_once_with() # pre_start_hook is called after service record is created, # but before RPC consumer is created - serv.manager.pre_start_hook.assert_called_once_with() + serv.manager.pre_start_hook.assert_called_once_with( + serv.service_ref) # post_start_hook is called after RPC consumer is created. serv.manager.post_start_hook.assert_called_once_with() @@ -221,7 +222,7 @@ class ServiceTestCase(test.NoDBTestCase): self.host, self.binary) mock_create.assert_called_once_with() - serv.manager.pre_start_hook.assert_called_once_with() + serv.manager.pre_start_hook.assert_called_once_with(serv.service_ref) serv.manager.post_start_hook.assert_called_once_with() serv.stop() mock_stop.assert_called_once_with()