Fix the problem of the scheduler status

During the startup process of cinder-scheduler, the time corresponding
to the periodic_interval parameter will be sleeped. If the value of
this parameter is large, such as one hour, then during this period the
cinder-scheduler is always down and the volume cannot be created.

This patch fixes this problem.

Change-Id: I932a725c1665add590f09fa8d26e84b79b06e159
Closes-bug: #1802249
This commit is contained in:
wangwei 2018-11-08 16:47:57 +09:00
parent cf5b3b56a3
commit 0db593d86c
5 changed files with 116 additions and 6 deletions

View File

@ -91,6 +91,15 @@ class Scheduler(object):
return self.host_manager.has_all_capabilities()
def is_first_receive(self):
"""Returns True if Scheduler receives the capabilities at startup.
This is to handle the problem of too long sleep time during scheduler
service startup process.
"""
return self.host_manager.first_receive_capabilities()
def update_service_capabilities(self, service_name, host, capabilities,
cluster_name, timestamp):
"""Process a capability update from a service node."""

View File

@ -509,6 +509,10 @@ class HostManager(object):
{'service_name': service_name, 'host': host})
return
# Determine whether HostManager has just completed initialization, and
# has not received the rpc message returned by volume.
just_init = self._is_just_initialized()
# TODO(geguileo): In P - Remove the next line since we receive the
# timestamp
timestamp = timestamp or timeutils.utcnow()
@ -550,6 +554,8 @@ class HostManager(object):
'cluster': cluster_msg})
self._no_capabilities_backends.discard(backend)
if just_init:
self._update_backend_state_map(cinder_context.get_admin_context())
def notify_service_capabilities(self, service_name, backend, capabilities,
timestamp):
@ -584,6 +590,14 @@ class HostManager(object):
def has_all_capabilities(self):
return len(self._no_capabilities_backends) == 0
def _is_just_initialized(self):
return not self.service_states_last_update
def first_receive_capabilities(self):
return (not self._is_just_initialized() and
len(set(self.backend_state_map)) > 0 and
len(self._no_capabilities_backends) == 0)
def _update_backend_state_map(self, context):
# Get resource usage across the available volume nodes:

View File

@ -104,7 +104,10 @@ class SchedulerManager(manager.CleanableManager, manager.Manager):
ctxt = context.get_admin_context()
self.request_service_capabilities(ctxt)
eventlet.sleep(CONF.periodic_interval)
for __ in range(CONF.periodic_interval):
if self.driver.is_first_receive():
break
eventlet.sleep(1)
self._startup_delay = False
def reset(self):

View File

@ -111,12 +111,16 @@ class HostManagerTestCase(test.TestCase):
self.assertEqual(expected, mock_func.call_args_list)
self.assertEqual(set(self.fake_backends), set(result))
@mock.patch(
'cinder.scheduler.host_manager.HostManager._is_just_initialized')
@mock.patch('cinder.scheduler.host_manager.HostManager._get_updated_pools')
@mock.patch('oslo_utils.timeutils.utcnow')
def test_update_service_capabilities(self, _mock_utcnow,
_mock_get_updated_pools):
_mock_get_updated_pools,
_mock_is_just_initialized):
service_states = self.host_manager.service_states
self.assertDictEqual({}, service_states)
_mock_is_just_initialized.return_value = False
_mock_utcnow.side_effect = [31338, 31339]
_mock_get_updated_pools.return_value = []
@ -156,14 +160,18 @@ class HostManagerTestCase(test.TestCase):
'host3': host3_volume_capabs}
self.assertDictEqual(expected, service_states)
@mock.patch(
'cinder.scheduler.host_manager.HostManager._is_just_initialized')
@mock.patch(
'cinder.scheduler.host_manager.HostManager.get_usage_and_notify')
@mock.patch('oslo_utils.timeutils.utcnow')
def test_update_and_notify_service_capabilities_case1(
self, _mock_utcnow,
_mock_get_usage_and_notify):
_mock_get_usage_and_notify,
_mock_is_just_initialized):
_mock_utcnow.side_effect = [31337, 31338, 31339]
_mock_is_just_initialized.return_value = False
service_name = 'volume'
capab1 = {'pools': [{
@ -207,14 +215,18 @@ class HostManagerTestCase(test.TestCase):
self.assertDictEqual(dict(dict(timestamp=31339), **capab1),
self.host_manager_1.service_states['host1'])
@mock.patch(
'cinder.scheduler.host_manager.HostManager._is_just_initialized')
@mock.patch(
'cinder.scheduler.host_manager.HostManager.get_usage_and_notify')
@mock.patch('oslo_utils.timeutils.utcnow')
def test_update_and_notify_service_capabilities_case2(
self, _mock_utcnow,
_mock_get_usage_and_notify):
_mock_get_usage_and_notify,
_mock_is_just_initialized):
_mock_utcnow.side_effect = [31340, 31341, 31342]
_mock_is_just_initialized.return_value = False
service_name = 'volume'
@ -546,6 +558,71 @@ class HostManagerTestCase(test.TestCase):
None, timestamp)
self.assertTrue(self.host_manager.has_all_capabilities())
@mock.patch('cinder.objects.service.Service.is_up',
new_callable=mock.PropertyMock)
@mock.patch('cinder.db.service_get_all')
def test_first_receive_capabilities_case1(self, _mock_service_get_all,
_mock_service_is_up):
# No volume service startup
self.assertFalse(self.host_manager.first_receive_capabilities())
services = [
dict(id=1, host='host1', topic='volume', disabled=False,
availability_zone='zone1', updated_at=timeutils.utcnow(),
uuid='06acda71-b3b4-4f1b-8d87-db5c47e7ebd2', )
]
_mock_service_get_all.return_value = services
_mock_service_is_up.return_value = True
timestamp = jsonutils.to_primitive(datetime.utcnow())
host1_volume_capabs = dict(free_capacity_gb=4321)
service_name = 'volume'
self.host_manager.update_service_capabilities(service_name, 'host1',
host1_volume_capabs,
None, timestamp)
self.assertTrue(self.host_manager.first_receive_capabilities())
@mock.patch('cinder.objects.service.Service.is_up',
new_callable=mock.PropertyMock)
@mock.patch('cinder.db.service_get_all')
def test_first_receive_capabilities_case2(self, _mock_service_get_all,
_mock_service_is_up):
_mock_service_is_up.return_value = True
services = [
dict(id=1, host='host1', topic='volume', disabled=False,
availability_zone='zone1', updated_at=timeutils.utcnow(),
uuid='36ede0e2-1b3c-41b0-9cd3-66e1f56dc959'),
dict(id=2, host='host2', topic='volume', disabled=False,
availability_zone='zone1', updated_at=timeutils.utcnow(),
uuid='b124e8dc-bf5f-4923-802d-27153ac7fe56'),
dict(id=3, host='host3', topic='volume', disabled=False,
availability_zone='zone1', updated_at=timeutils.utcnow(),
uuid='4d0b1c5e-ce3c-424e-b2f4-a09a0f54d328'),
]
_mock_service_get_all.return_value = services
# Create host_manager again to let db.service_get_all mock run
self.host_manager = host_manager.HostManager()
self.assertFalse(self.host_manager.first_receive_capabilities())
timestamp = jsonutils.to_primitive(datetime.utcnow())
host1_volume_capabs = dict(free_capacity_gb=4321)
host2_volume_capabs = dict(free_capacity_gb=5432)
host3_volume_capabs = dict(free_capacity_gb=6543)
service_name = 'volume'
self.host_manager.update_service_capabilities(service_name, 'host1',
host1_volume_capabs,
None, timestamp)
self.assertFalse(self.host_manager.first_receive_capabilities())
self.host_manager.update_service_capabilities(service_name, 'host2',
host2_volume_capabs,
None, timestamp)
self.assertFalse(self.host_manager.first_receive_capabilities())
self.host_manager.update_service_capabilities(service_name, 'host3',
host3_volume_capabs,
None, timestamp)
self.assertTrue(self.host_manager.first_receive_capabilities())
@mock.patch('cinder.db.service_get_all')
@mock.patch('cinder.objects.service.Service.is_up',
new_callable=mock.PropertyMock)

View File

@ -66,13 +66,20 @@ class SchedulerManagerTestCase(test.TestCase):
manager = self.manager
self.assertIsInstance(manager.driver, self.driver_cls)
@mock.patch('cinder.scheduler.driver.Scheduler.is_first_receive')
@mock.patch('eventlet.sleep')
@mock.patch('cinder.volume.rpcapi.VolumeAPI.publish_service_capabilities')
def test_init_host_with_rpc(self, publish_capabilities_mock, sleep_mock):
def test_init_host_with_rpc_delay_after_3_tries(self,
publish_capabilities_mock,
sleep_mock,
is_first_receive_mock):
self.manager._startup_delay = True
is_first_receive_mock.side_effect = [False, False, True]
self.manager.init_host_with_rpc()
publish_capabilities_mock.assert_called_once_with(mock.ANY)
sleep_mock.assert_called_once_with(CONF.periodic_interval)
calls = [mock.call(1)] * 2
sleep_mock.assert_has_calls(calls)
self.assertEqual(2, sleep_mock.call_count)
self.assertFalse(self.manager._startup_delay)
@mock.patch('cinder.scheduler.driver.Scheduler.backend_passes_filters')