diff --git a/doc/source/cli/nova-manage.rst b/doc/source/cli/nova-manage.rst index 25d988ef1d7a..f3b57e655d77 100644 --- a/doc/source/cli/nova-manage.rst +++ b/doc/source/cli/nova-manage.rst @@ -327,6 +327,10 @@ Nova Cells v2 found, 3 if a host with that name is not in a cell with that uuid, 4 if a host with that name has instances (host not empty). + .. note:: + + The scheduler caches host-to-cell mapping information so when deleting + a host the scheduler may need to be restarted or sent the SIGHUP signal. Placement ~~~~~~~~~ diff --git a/nova/cmd/manage.py b/nova/cmd/manage.py index dc28cf45efa7..4c72a234d254 100644 --- a/nova/cmd/manage.py +++ b/nova/cmd/manage.py @@ -1765,6 +1765,10 @@ class CellV2Commands(object): * The host has instances. Returns 0 if the host is deleted successfully. + + NOTE: The scheduler caches host-to-cell mapping information so when + deleting a host the scheduler may need to be restarted or sent the + SIGHUP signal. """ ctxt = context.get_admin_context() # Find the CellMapping given the uuid. diff --git a/nova/scheduler/host_manager.py b/nova/scheduler/host_manager.py index 6a8b7fc0b9fd..01a661881dd1 100644 --- a/nova/scheduler/host_manager.py +++ b/nova/scheduler/host_manager.py @@ -675,6 +675,10 @@ class HostManager(object): 'cells': ', '.join( [c.identity for c in disabled_cells])}) + # Dict, keyed by host name, to cell UUID to be used to look up the + # cell a particular host is in (used with self.cells). + self.host_to_cell_uuid = {} + def get_host_states_by_uuids(self, context, compute_uuids, spec_obj): if not self.cells: @@ -738,9 +742,40 @@ class HostManager(object): return [self.aggs_by_id[agg_id] for agg_id in self.host_aggregates_map[host]] + def _get_cell_mapping_for_host(self, context, host_name): + """Finds the CellMapping for a particular host name + + Relies on a cache to quickly fetch the CellMapping if we have looked + up this host before, otherwise gets the CellMapping via the + HostMapping record for the given host name. + + :param context: nova auth request context + :param host_name: compute service host name + :returns: CellMapping object + :raises: HostMappingNotFound if the host is not mapped to a cell + """ + # Check to see if we have the host in our cache. + if host_name in self.host_to_cell_uuid: + cell_uuid = self.host_to_cell_uuid[host_name] + if cell_uuid in self.cells: + return self.cells[cell_uuid] + # Something is wrong so log a warning and just fall through to + # lookup the HostMapping. + LOG.warning('Host %s is expected to be in cell %s but that cell ' + 'uuid was not found in our cache. The service may ' + 'need to be restarted to refresh the cache.', + host_name, cell_uuid) + + # We have not cached this host yet so get the HostMapping, cache the + # result and return the CellMapping. + hm = objects.HostMapping.get_by_host(context, host_name) + cell_mapping = hm.cell_mapping + self.host_to_cell_uuid[host_name] = cell_mapping.uuid + return cell_mapping + def _get_instances_by_host(self, context, host_name): try: - hm = objects.HostMapping.get_by_host(context, host_name) + cm = self._get_cell_mapping_for_host(context, host_name) except exception.HostMappingNotFound: # It's possible to hit this when the compute service first starts # up and casts to update_instance_info with an empty list but @@ -748,7 +783,7 @@ class HostManager(object): LOG.info('Host mapping not found for host %s. Not tracking ' 'instance info for this host.', host_name) return {} - with context_module.target_cell(context, hm.cell_mapping) as cctxt: + with context_module.target_cell(context, cm) as cctxt: uuids = objects.InstanceList.get_uuids_by_host(cctxt, host_name) # Putting the context in the otherwise fake Instance object at # least allows out of tree filters to lazy-load fields. diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index f02254470bc6..23a55e0ab216 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -85,6 +85,9 @@ class SchedulerManager(manager.Manager): # and enabled cells caches in the host manager. So every time an # existing cell is disabled or enabled or a new cell is created, a # SIGHUP signal has to be sent to the scheduler for proper scheduling. + # NOTE(mriedem): Similarly there is a host-to-cell cache which should + # be reset if a host is deleted from a cell and "discovered" in another + # cell. self.driver.host_manager.refresh_cells_caches() @messaging.expected_exceptions(exception.NoValidHost) diff --git a/nova/tests/unit/scheduler/test_host_manager.py b/nova/tests/unit/scheduler/test_host_manager.py index faad28d59eda..a7b1db00462f 100644 --- a/nova/tests/unit/scheduler/test_host_manager.py +++ b/nova/tests/unit/scheduler/test_host_manager.py @@ -104,6 +104,8 @@ class HostManagerTestCase(test.NoDBTestCase): transport_url='fake:///mq3', disabled=False) cells = [cell_mapping1, cell_mapping2, cell_mapping3] + # Throw a random host-to-cell in that cache to make sure it gets reset. + self.host_manager.host_to_cell_uuid['fake-host'] = cell_uuid1 with mock.patch('nova.objects.CellMappingList.get_all', return_value=cells) as mock_cm: self.host_manager.refresh_cells_caches() @@ -114,6 +116,8 @@ class HostManagerTestCase(test.NoDBTestCase): # But it is still in the full list. self.assertEqual(3, len(self.host_manager.cells)) self.assertIn(cell_uuid2, self.host_manager.cells) + # The host_to_cell_uuid cache should have been reset. + self.assertEqual({}, self.host_manager.host_to_cell_uuid) def test_refresh_cells_caches_except_cell0(self): ctxt = nova_context.RequestContext('fake-user', 'fake_project') @@ -131,6 +135,46 @@ class HostManagerTestCase(test.NoDBTestCase): mock_cm.assert_called_once() self.assertEqual(0, len(self.host_manager.cells)) + @mock.patch('nova.objects.HostMapping.get_by_host', + return_value=objects.HostMapping( + cell_mapping=objects.CellMapping(uuid=uuids.cell1))) + def test_get_cell_mapping_for_host(self, mock_get_by_host): + # Starting with an empty cache, assert that the HostMapping is looked + # up and the result is cached. + ctxt = nova_context.get_admin_context() + host = 'fake-host' + self.assertEqual({}, self.host_manager.host_to_cell_uuid) + cm = self.host_manager._get_cell_mapping_for_host(ctxt, host) + self.assertIs(cm, mock_get_by_host.return_value.cell_mapping) + self.assertIn(host, self.host_manager.host_to_cell_uuid) + self.assertEqual( + uuids.cell1, self.host_manager.host_to_cell_uuid[host]) + mock_get_by_host.assert_called_once_with(ctxt, host) + + # Reset the mock and do it again, assert we do not query the DB. + mock_get_by_host.reset_mock() + self.host_manager._get_cell_mapping_for_host(ctxt, host) + mock_get_by_host.assert_not_called() + + # Mix up the cache such that the host is mapped to a cell that + # is not in the cache which will make us query the DB. Also make the + # HostMapping query raise HostMappingNotFound to make sure that comes + # up to the caller. + mock_get_by_host.reset_mock() + self.host_manager.host_to_cell_uuid[host] = uuids.random_cell + mock_get_by_host.side_effect = exception.HostMappingNotFound(name=host) + with mock.patch('nova.scheduler.host_manager.LOG.warning') as warning: + self.assertRaises(exception.HostMappingNotFound, + self.host_manager._get_cell_mapping_for_host, + ctxt, host) + # We should have logged a warning because the host is cached to + # a cell uuid but that cell uuid is not in the cells cache. + warning.assert_called_once() + self.assertIn('Host %s is expected to be in cell %s', + warning.call_args[0][0]) + # And we should have also tried to lookup the HostMapping in the DB + mock_get_by_host.assert_called_once_with(ctxt, host) + @mock.patch.object(nova.objects.InstanceList, 'get_by_filters') @mock.patch.object(nova.objects.ComputeNodeList, 'get_all') def test_init_instance_info_batches(self, mock_get_all,