diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 4e03dc98d7..b034b4a463 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -1584,7 +1584,8 @@ class ConductorManager(base_manager.BaseConductorManager): return @METRICS.timer('ConductorManager._sync_power_states') - @periodics.periodic(spacing=CONF.conductor.sync_power_state_interval) + @periodics.periodic(spacing=CONF.conductor.sync_power_state_interval, + enabled=CONF.conductor.sync_power_state_interval > 0) def _sync_power_states(self, context): """Periodic task to sync power states for the nodes. @@ -1744,7 +1745,10 @@ class ConductorManager(base_manager.BaseConductorManager): eventlet.sleep(0) @METRICS.timer('ConductorManager._check_deploy_timeouts') - @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval) + @periodics.periodic( + spacing=CONF.conductor.check_provision_state_interval, + enabled=CONF.conductor.check_provision_state_interval > 0 + and CONF.conductor.deploy_callback_timeout != 0) def _check_deploy_timeouts(self, context): """Periodically checks whether a deploy RPC call has timed out. @@ -1752,6 +1756,8 @@ class ConductorManager(base_manager.BaseConductorManager): :param context: request context. """ + # FIXME(rloo): If the value is < 0, it will be enabled. That doesn't + # seem right. callback_timeout = CONF.conductor.deploy_callback_timeout if not callback_timeout: return @@ -1767,7 +1773,9 @@ class ConductorManager(base_manager.BaseConductorManager): sort_key, callback_method, err_handler) @METRICS.timer('ConductorManager._check_orphan_nodes') - @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval) + @periodics.periodic( + spacing=CONF.conductor.check_provision_state_interval, + enabled=CONF.conductor.check_provision_state_interval > 0) def _check_orphan_nodes(self, context): """Periodically checks the status of nodes that were taken over. @@ -1954,7 +1962,10 @@ class ConductorManager(base_manager.BaseConductorManager): task, 'console_restore', fields.NotificationStatus.ERROR) @METRICS.timer('ConductorManager._check_cleanwait_timeouts') - @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval) + @periodics.periodic( + spacing=CONF.conductor.check_provision_state_interval, + enabled=CONF.conductor.check_provision_state_interval > 0 + and CONF.conductor.clean_callback_timeout != 0) def _check_cleanwait_timeouts(self, context): """Periodically checks for nodes being cleaned. @@ -1963,6 +1974,8 @@ class ConductorManager(base_manager.BaseConductorManager): :param context: request context. """ + # FIXME(rloo): If the value is < 0, it will be enabled. That doesn't + # seem right. callback_timeout = CONF.conductor.clean_callback_timeout if not callback_timeout: return @@ -1998,7 +2011,8 @@ class ConductorManager(base_manager.BaseConductorManager): ) @METRICS.timer('ConductorManager._sync_local_state') - @periodics.periodic(spacing=CONF.conductor.sync_local_state_interval) + @periodics.periodic(spacing=CONF.conductor.sync_local_state_interval, + enabled=CONF.conductor.sync_local_state_interval > 0) def _sync_local_state(self, context): """Perform any actions necessary to sync local state. @@ -2748,7 +2762,8 @@ class ConductorManager(base_manager.BaseConductorManager): eventlet.sleep(0) @METRICS.timer('ConductorManager._send_sensor_data') - @periodics.periodic(spacing=CONF.conductor.send_sensor_data_interval) + @periodics.periodic(spacing=CONF.conductor.send_sensor_data_interval, + enabled=CONF.conductor.send_sensor_data) def _send_sensor_data(self, context): """Periodically sends sensor data to Ceilometer.""" @@ -2961,13 +2976,18 @@ class ConductorManager(base_manager.BaseConductorManager): state=task.node.provision_state) @METRICS.timer('ConductorManager._check_inspect_wait_timeouts') - @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval) + @periodics.periodic( + spacing=CONF.conductor.check_provision_state_interval, + enabled=CONF.conductor.check_provision_state_interval > 0 + and CONF.conductor.inspect_wait_timeout != 0) def _check_inspect_wait_timeouts(self, context): """Periodically checks inspect_wait_timeout and fails upon reaching it. :param: context: request context """ + # FIXME(rloo): If the value is < 0, it will be enabled. That doesn't + # seem right. callback_timeout = CONF.conductor.inspect_wait_timeout if not callback_timeout: return diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py index b2879bf458..0bf16fd7f4 100644 --- a/ironic/conf/conductor.py +++ b/ironic/conf/conductor.py @@ -47,11 +47,12 @@ opts = [ cfg.IntOpt('sync_power_state_interval', default=60, help=_('Interval between syncing the node power state to the ' - 'database, in seconds.')), + 'database, in seconds. Set to 0 to disable syncing.')), cfg.IntOpt('check_provision_state_interval', default=60, + min=0, help=_('Interval between checks of provision timeouts, ' - 'in seconds.')), + 'in seconds. Set to 0 to disable checks.')), cfg.IntOpt('check_rescue_state_interval', default=60, min=1, @@ -90,6 +91,7 @@ opts = [ 'notification bus')), cfg.IntOpt('send_sensor_data_interval', default=600, + min=1, help=_('Seconds between conductor sending sensor data message ' 'to ceilometer via the notification bus.')), cfg.IntOpt('send_sensor_data_workers', @@ -115,8 +117,8 @@ opts = [ 'local state as nodes are moved around the cluster. ' 'This option controls how often, in seconds, each ' 'conductor will check for nodes that it should ' - '"take over". Set it to a negative value to disable ' - 'the check entirely.')), + '"take over". Set it to 0 (or a negative value) to ' + 'disable the check entirely.')), cfg.StrOpt('configdrive_swift_container', default='ironic_configdrive_container', help=_('Name of the Swift container to store config drive ' diff --git a/ironic/conf/drac.py b/ironic/conf/drac.py index fcc193012c..f132574bec 100644 --- a/ironic/conf/drac.py +++ b/ironic/conf/drac.py @@ -18,6 +18,7 @@ from ironic.common.i18n import _ opts = [ cfg.IntOpt('query_raid_config_job_status_interval', default=120, + min=1, help=_('Interval (in seconds) between periodic RAID job status ' 'checks to determine whether the asynchronous RAID ' 'configuration was successfully finished or not.')) diff --git a/ironic/drivers/modules/oneview/deploy.py b/ironic/drivers/modules/oneview/deploy.py index 71a3cb8a7d..acceed987d 100644 --- a/ironic/drivers/modules/oneview/deploy.py +++ b/ironic/drivers/modules/oneview/deploy.py @@ -37,7 +37,8 @@ METRICS = metrics_utils.get_metrics_logger(__name__) class OneViewPeriodicTasks(object): @periodics.periodic(spacing=CONF.oneview.periodic_check_interval, - enabled=CONF.oneview.enable_periodic_tasks) + enabled=CONF.oneview.enable_periodic_tasks + and CONF.oneview.periodic_check_interval > 0) def _periodic_check_nodes_taken_by_oneview(self, manager, context): """Checks if nodes in Ironic were taken by OneView users. @@ -94,7 +95,8 @@ class OneViewPeriodicTasks(object): manager.do_provisioning_action(context, node.uuid, 'manage') @periodics.periodic(spacing=CONF.oneview.periodic_check_interval, - enabled=CONF.oneview.enable_periodic_tasks) + enabled=CONF.oneview.enable_periodic_tasks + and CONF.oneview.periodic_check_interval > 0) def _periodic_check_nodes_freed_by_oneview(self, manager, context): """Checks if nodes taken by OneView users were freed. diff --git a/releasenotes/notes/disable_periodic_tasks-0ea39fa7a8a108c6.yaml b/releasenotes/notes/disable_periodic_tasks-0ea39fa7a8a108c6.yaml new file mode 100644 index 0000000000..a2811537e0 --- /dev/null +++ b/releasenotes/notes/disable_periodic_tasks-0ea39fa7a8a108c6.yaml @@ -0,0 +1,49 @@ +--- +features: + - | + Setting these configuration options to 0 will disable the periodic tasks: + + * [conductor]sync_power_state_interval: sync power states for the nodes + * [conductor]check_provision_state_interval: + + * check deployments and time out if the deployment takes too long + * check the status of cleaning a node and time out if it takes too long + * check the status of inspecting a node and time out if it takes too long + * check for and handle nodes that are taken over by new conductors (if an + old conductor disappeared) + + * [conductor]send_sensor_data_interval: send sensor data to ceilometer + * [conductor]sync_local_state_interval: refresh a conductor's copy of the + consistent hash ring. If any mappings have changed, determines which, + if any, nodes need to be "taken over". The ensuing actions could include + preparing a PXE environment, updating the DHCP server, and so on. + * [oneview]periodic_check_interval: + + * check for nodes taken over by OneView users + * check for nodes freed by OneView users + +fixes: + - | + Fixes an issue where setting these configuration options to 0 caused a + ValueError exception to be raised. You can now set them to 0 to disable the + associated periodic tasks. (For more information, see `story 2002059 + `_.): + + * [conductor]sync_power_state_interval: sync power states for the nodes + * [conductor]check_provision_state_interval: + + * check deployments and time out if the deployment takes too long + * check the status of cleaning a node and time out if it takes too long + * check the status of inspecting a node and time out if it takes too long + * check for and handle nodes that are taken over by new conductors (if an + old conductor disappeared) + + * [conductor]send_sensor_data_interval: send sensor data to ceilometer + * [conductor]sync_local_state_interval: refresh a conductor's copy of the + consistent hash ring. If any mappings have changed, determines which, + if any, nodes need to be "taken over". The ensuing actions could include + preparing a PXE environment, updating the DHCP server, and so on. + * [oneview]periodic_check_interval: + + * check for nodes taken over by OneView users + * check for nodes freed by OneView users