Make scheduler lazy-load the placement client

Like we did for conductor, this makes the scheduler lazy-load the
placement client instead of only doing it during __init__. This avoids
a startup crash if keystone or placement are not available, but
retains startup failures for other problems and errors likely to be
a result of misconfigurations.

Closes-Bug: #2012530
Change-Id: I42ed876b84d80536e83d9ae01696b0a64299c9f7
This commit is contained in:
Dan Smith 2023-03-22 08:20:58 -07:00
parent 3886f078de
commit d37cca361a
2 changed files with 70 additions and 1 deletions

View File

@ -23,6 +23,7 @@ import collections
import copy
import random
from keystoneauth1 import exceptions as ks_exc
from oslo_log import log as logging
import oslo_messaging as messaging
from oslo_serialization import jsonutils
@ -67,10 +68,42 @@ class SchedulerManager(manager.Manager):
self.host_manager = host_manager.HostManager()
self.servicegroup_api = servicegroup.API()
self.notifier = rpc.get_notifier('scheduler')
self.placement_client = report.report_client_singleton()
self._placement_client = None
try:
# Test our placement client during initialization
self.placement_client
except (ks_exc.EndpointNotFound,
ks_exc.DiscoveryFailure,
ks_exc.RequestTimeout,
ks_exc.GatewayTimeout,
ks_exc.ConnectFailure) as e:
# Non-fatal, likely transient (although not definitely);
# continue startup but log the warning so that when things
# fail later, it will be clear why we can not do certain
# things.
LOG.warning('Unable to initialize placement client (%s); '
'Continuing with startup, but scheduling '
'will not be possible.', e)
except (ks_exc.MissingAuthPlugin,
ks_exc.Unauthorized) as e:
# This is almost definitely fatal mis-configuration. The
# Unauthorized error might be transient, but it is
# probably reasonable to consider it fatal.
LOG.error('Fatal error initializing placement client; '
'config is incorrect or incomplete: %s', e)
raise
except Exception as e:
# Unknown/unexpected errors here are fatal
LOG.error('Fatal error initializing placement client: %s', e)
raise
super().__init__(service_name='scheduler', *args, **kwargs)
@property
def placement_client(self):
return report.report_client_singleton()
@periodic_task.periodic_task(
spacing=CONF.scheduler.discover_hosts_in_cells_interval,
run_immediately=True)

View File

@ -19,6 +19,7 @@ Tests For Scheduler
from unittest import mock
from keystoneauth1 import exceptions as ks_exc
import oslo_messaging as messaging
from oslo_serialization import jsonutils
from oslo_utils.fixture import uuidsentinel as uuids
@ -1688,6 +1689,41 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
mock_log_warning.assert_not_called()
mock_log_debug.assert_called_once_with(msg)
@mock.patch('nova.scheduler.client.report.report_client_singleton')
@mock.patch.object(manager, 'LOG')
@mock.patch('nova.scheduler.host_manager.HostManager')
@mock.patch('nova.servicegroup.API')
@mock.patch('nova.rpc.get_notifier')
def test_init_lazy_placement_client(self, mock_rpc, mock_sg, mock_hm,
mock_log, mock_report):
# Simulate keytone or placement being offline at startup
mock_report.side_effect = ks_exc.RequestTimeout
mgr = manager.SchedulerManager()
mock_report.assert_called_once_with()
self.assertTrue(mock_log.warning.called)
# Make sure we're raising the actual error to subsequent callers
self.assertRaises(ks_exc.RequestTimeout, lambda: mgr.placement_client)
# Simulate recovery of the keystone or placement service
mock_report.reset_mock(side_effect=True)
mgr.placement_client
mock_report.assert_called_once_with()
@mock.patch('nova.scheduler.client.report.report_client_singleton')
@mock.patch('nova.scheduler.host_manager.HostManager')
@mock.patch('nova.servicegroup.API')
@mock.patch('nova.rpc.get_notifier')
def test_init_lazy_placement_client_failures(self, mock_rpc, mock_sg,
mock_hm, mock_report):
# Certain keystoneclient exceptions are fatal
mock_report.side_effect = ks_exc.Unauthorized
self.assertRaises(ks_exc.Unauthorized, manager.SchedulerManager)
# Anything else is fatal
mock_report.side_effect = test.TestingException
self.assertRaises(test.TestingException, manager.SchedulerManager)
class SchedulerManagerAllocationCandidateTestCase(test.NoDBTestCase):