Merge "Disable periodic tasks if interval set to 0"

This commit is contained in:
Zuul 2018-10-02 23:46:09 +00:00 committed by Gerrit Code Review
commit c80e912b3a
5 changed files with 87 additions and 13 deletions

View File

@ -1584,7 +1584,8 @@ class ConductorManager(base_manager.BaseConductorManager):
return
@METRICS.timer('ConductorManager._sync_power_states')
@periodics.periodic(spacing=CONF.conductor.sync_power_state_interval)
@periodics.periodic(spacing=CONF.conductor.sync_power_state_interval,
enabled=CONF.conductor.sync_power_state_interval > 0)
def _sync_power_states(self, context):
"""Periodic task to sync power states for the nodes.
@ -1744,7 +1745,10 @@ class ConductorManager(base_manager.BaseConductorManager):
eventlet.sleep(0)
@METRICS.timer('ConductorManager._check_deploy_timeouts')
@periodics.periodic(spacing=CONF.conductor.check_provision_state_interval)
@periodics.periodic(
spacing=CONF.conductor.check_provision_state_interval,
enabled=CONF.conductor.check_provision_state_interval > 0
and CONF.conductor.deploy_callback_timeout != 0)
def _check_deploy_timeouts(self, context):
"""Periodically checks whether a deploy RPC call has timed out.
@ -1752,6 +1756,8 @@ class ConductorManager(base_manager.BaseConductorManager):
:param context: request context.
"""
# FIXME(rloo): If the value is < 0, it will be enabled. That doesn't
# seem right.
callback_timeout = CONF.conductor.deploy_callback_timeout
if not callback_timeout:
return
@ -1767,7 +1773,9 @@ class ConductorManager(base_manager.BaseConductorManager):
sort_key, callback_method, err_handler)
@METRICS.timer('ConductorManager._check_orphan_nodes')
@periodics.periodic(spacing=CONF.conductor.check_provision_state_interval)
@periodics.periodic(
spacing=CONF.conductor.check_provision_state_interval,
enabled=CONF.conductor.check_provision_state_interval > 0)
def _check_orphan_nodes(self, context):
"""Periodically checks the status of nodes that were taken over.
@ -1954,7 +1962,10 @@ class ConductorManager(base_manager.BaseConductorManager):
task, 'console_restore', fields.NotificationStatus.ERROR)
@METRICS.timer('ConductorManager._check_cleanwait_timeouts')
@periodics.periodic(spacing=CONF.conductor.check_provision_state_interval)
@periodics.periodic(
spacing=CONF.conductor.check_provision_state_interval,
enabled=CONF.conductor.check_provision_state_interval > 0
and CONF.conductor.clean_callback_timeout != 0)
def _check_cleanwait_timeouts(self, context):
"""Periodically checks for nodes being cleaned.
@ -1963,6 +1974,8 @@ class ConductorManager(base_manager.BaseConductorManager):
:param context: request context.
"""
# FIXME(rloo): If the value is < 0, it will be enabled. That doesn't
# seem right.
callback_timeout = CONF.conductor.clean_callback_timeout
if not callback_timeout:
return
@ -1998,7 +2011,8 @@ class ConductorManager(base_manager.BaseConductorManager):
)
@METRICS.timer('ConductorManager._sync_local_state')
@periodics.periodic(spacing=CONF.conductor.sync_local_state_interval)
@periodics.periodic(spacing=CONF.conductor.sync_local_state_interval,
enabled=CONF.conductor.sync_local_state_interval > 0)
def _sync_local_state(self, context):
"""Perform any actions necessary to sync local state.
@ -2748,7 +2762,8 @@ class ConductorManager(base_manager.BaseConductorManager):
eventlet.sleep(0)
@METRICS.timer('ConductorManager._send_sensor_data')
@periodics.periodic(spacing=CONF.conductor.send_sensor_data_interval)
@periodics.periodic(spacing=CONF.conductor.send_sensor_data_interval,
enabled=CONF.conductor.send_sensor_data)
def _send_sensor_data(self, context):
"""Periodically sends sensor data to Ceilometer."""
@ -2961,13 +2976,18 @@ class ConductorManager(base_manager.BaseConductorManager):
state=task.node.provision_state)
@METRICS.timer('ConductorManager._check_inspect_wait_timeouts')
@periodics.periodic(spacing=CONF.conductor.check_provision_state_interval)
@periodics.periodic(
spacing=CONF.conductor.check_provision_state_interval,
enabled=CONF.conductor.check_provision_state_interval > 0
and CONF.conductor.inspect_wait_timeout != 0)
def _check_inspect_wait_timeouts(self, context):
"""Periodically checks inspect_wait_timeout and fails upon reaching it.
:param: context: request context
"""
# FIXME(rloo): If the value is < 0, it will be enabled. That doesn't
# seem right.
callback_timeout = CONF.conductor.inspect_wait_timeout
if not callback_timeout:
return

View File

@ -47,11 +47,12 @@ opts = [
cfg.IntOpt('sync_power_state_interval',
default=60,
help=_('Interval between syncing the node power state to the '
'database, in seconds.')),
'database, in seconds. Set to 0 to disable syncing.')),
cfg.IntOpt('check_provision_state_interval',
default=60,
min=0,
help=_('Interval between checks of provision timeouts, '
'in seconds.')),
'in seconds. Set to 0 to disable checks.')),
cfg.IntOpt('check_rescue_state_interval',
default=60,
min=1,
@ -90,6 +91,7 @@ opts = [
'notification bus')),
cfg.IntOpt('send_sensor_data_interval',
default=600,
min=1,
help=_('Seconds between conductor sending sensor data message '
'to ceilometer via the notification bus.')),
cfg.IntOpt('send_sensor_data_workers',
@ -115,8 +117,8 @@ opts = [
'local state as nodes are moved around the cluster. '
'This option controls how often, in seconds, each '
'conductor will check for nodes that it should '
'"take over". Set it to a negative value to disable '
'the check entirely.')),
'"take over". Set it to 0 (or a negative value) to '
'disable the check entirely.')),
cfg.StrOpt('configdrive_swift_container',
default='ironic_configdrive_container',
help=_('Name of the Swift container to store config drive '

View File

@ -18,6 +18,7 @@ from ironic.common.i18n import _
opts = [
cfg.IntOpt('query_raid_config_job_status_interval',
default=120,
min=1,
help=_('Interval (in seconds) between periodic RAID job status '
'checks to determine whether the asynchronous RAID '
'configuration was successfully finished or not.'))

View File

@ -37,7 +37,8 @@ METRICS = metrics_utils.get_metrics_logger(__name__)
class OneViewPeriodicTasks(object):
@periodics.periodic(spacing=CONF.oneview.periodic_check_interval,
enabled=CONF.oneview.enable_periodic_tasks)
enabled=CONF.oneview.enable_periodic_tasks
and CONF.oneview.periodic_check_interval > 0)
def _periodic_check_nodes_taken_by_oneview(self, manager, context):
"""Checks if nodes in Ironic were taken by OneView users.
@ -94,7 +95,8 @@ class OneViewPeriodicTasks(object):
manager.do_provisioning_action(context, node.uuid, 'manage')
@periodics.periodic(spacing=CONF.oneview.periodic_check_interval,
enabled=CONF.oneview.enable_periodic_tasks)
enabled=CONF.oneview.enable_periodic_tasks
and CONF.oneview.periodic_check_interval > 0)
def _periodic_check_nodes_freed_by_oneview(self, manager, context):
"""Checks if nodes taken by OneView users were freed.

View File

@ -0,0 +1,49 @@
---
features:
- |
Setting these configuration options to 0 will disable the periodic tasks:
* [conductor]sync_power_state_interval: sync power states for the nodes
* [conductor]check_provision_state_interval:
* check deployments and time out if the deployment takes too long
* check the status of cleaning a node and time out if it takes too long
* check the status of inspecting a node and time out if it takes too long
* check for and handle nodes that are taken over by new conductors (if an
old conductor disappeared)
* [conductor]send_sensor_data_interval: send sensor data to ceilometer
* [conductor]sync_local_state_interval: refresh a conductor's copy of the
consistent hash ring. If any mappings have changed, determines which,
if any, nodes need to be "taken over". The ensuing actions could include
preparing a PXE environment, updating the DHCP server, and so on.
* [oneview]periodic_check_interval:
* check for nodes taken over by OneView users
* check for nodes freed by OneView users
fixes:
- |
Fixes an issue where setting these configuration options to 0 caused a
ValueError exception to be raised. You can now set them to 0 to disable the
associated periodic tasks. (For more information, see `story 2002059
<https://storyboard.openstack.org/#!/story/2002059>`_.):
* [conductor]sync_power_state_interval: sync power states for the nodes
* [conductor]check_provision_state_interval:
* check deployments and time out if the deployment takes too long
* check the status of cleaning a node and time out if it takes too long
* check the status of inspecting a node and time out if it takes too long
* check for and handle nodes that are taken over by new conductors (if an
old conductor disappeared)
* [conductor]send_sensor_data_interval: send sensor data to ceilometer
* [conductor]sync_local_state_interval: refresh a conductor's copy of the
consistent hash ring. If any mappings have changed, determines which,
if any, nodes need to be "taken over". The ensuing actions could include
preparing a PXE environment, updating the DHCP server, and so on.
* [oneview]periodic_check_interval:
* check for nodes taken over by OneView users
* check for nodes freed by OneView users