Make sure report_interval is less than service_down_time

Services that inherit service.py/Service class would register
themselves to DB and then update stats periodically (every
report_interval second).  The consumer of this kind of information,
like scheduler or 'os-service' API extension, will consider a service
is 'up' (active) if last update from that service is not longer than
'service_down_time' ago.

The problem is if 'report_interval' was configured/provided greater
than 'service_down_time' by mistake, services would then be always
considered in 'down' state, which can result in unsuccesful placement
of volume create request for example.  This is what Bug #1255685 is
about.

In previous fix: https://review.openstack.org/#/c/60760/, a
configuration check helper function basic_config_check() was added
*wrongly* to WSGIService class instead of Service class.  This patch
moves the configuration check helper function and the check to the
right place to make sure 'report_interval' is less then
'service_down_time'.

Closes-bug #1255685

Change-Id: I14bd8c54e5ce20719844f437808ad98a011820de
This commit is contained in:
Zhiteng Huang 2014-01-09 14:54:22 +08:00
parent cbc6e8ef5d
commit 80096b6fb6
2 changed files with 24 additions and 23 deletions

View File

@ -86,6 +86,7 @@ class Service(service.Service):
self.report_interval = report_interval
self.periodic_interval = periodic_interval
self.periodic_fuzzy_delay = periodic_fuzzy_delay
self.basic_config_check()
self.saved_args, self.saved_kwargs = args, kwargs
self.timers = []
@ -138,6 +139,22 @@ class Service(service.Service):
initial_delay=initial_delay)
self.timers.append(periodic)
def basic_config_check(self):
"""Perform basic config checks before starting service."""
# Make sure report interval is less than service down time
if self.report_interval:
if CONF.service_down_time <= self.report_interval:
new_down_time = int(self.report_interval * 2.5)
LOG.warn(_("Report interval must be less than service down "
"time. Current config service_down_time: "
"%(service_down_time)s, report_interval for this: "
"service is: %(report_interval)s. Setting global "
"service_down_time to: %(new_down_time)s") %
{'service_down_time': CONF.service_down_time,
'report_interval': self.report_interval,
'new_down_time': new_down_time})
CONF.set_override('service_down_time', new_down_time)
def _create_service_ref(self, context):
zone = CONF.storage_availability_zone
service_ref = db.service_create(context,
@ -283,28 +300,11 @@ class WSGIService(object):
{'name': name})
# Reset workers to default
self.workers = None
self.basic_config_check()
self.server = wsgi.Server(name,
self.app,
host=self.host,
port=self.port)
def basic_config_check(self):
"""Perform basic config checks before starting service."""
# Make sure report interval is less than service down time
report_interval = CONF.report_interval
if CONF.service_down_time <= report_interval:
new_service_down_time = int(report_interval * 2.5)
LOG.warn(_("Report interval must be less than service down "
"time. Current config: <service_down_time: "
"%(service_down_time)s, report_interval: "
"%(report_interval)s>. Setting service_down_time to: "
"%(new_service_down_time)s") %
{'service_down_time': CONF.service_down_time,
'report_interval': report_interval,
'new_service_down_time': new_service_down_time})
CONF.set_override('service_down_time', new_service_down_time)
def _get_manager(self):
"""Initialize a Manager object appropriate for this service.

View File

@ -195,6 +195,13 @@ class ServiceTestCase(test.TestCase):
self.assertFalse(serv.model_disconnected)
def test_service_with_long_report_interval(self):
CONF.set_override('service_down_time', 10)
CONF.set_override('report_interval', 10)
service.Service.create(binary="test_service",
manager="cinder.tests.test_service.FakeManager")
self.assertEqual(CONF.service_down_time, 25)
class TestWSGIService(test.TestCase):
@ -208,9 +215,3 @@ class TestWSGIService(test.TestCase):
test_service.start()
self.assertNotEqual(0, test_service.port)
test_service.stop()
def test_service_with_min_down_time(self):
CONF.set_override('service_down_time', 10)
CONF.set_override('report_interval', 10)
service.WSGIService("test_service")
self.assertEqual(CONF.service_down_time, 25)