From 004e78c41368a3bb037726ce0c1ff550436a5717 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Fri, 12 Apr 2024 15:54:50 +0200 Subject: [PATCH] Fix the confusion around service_reboot/servicing_reboot We ended up using two names for the same flag (and forgot it in one place completely). To not just fix the issue but also prevent it in the future, refactor asynchronous steps handling into a new helper module with constants and helper functions. I've settled on servicing_reboot as opposed to service_reboot because that's the value we currently set (but not read), so it provides better compatibility when backporting. Remove excessive mocking in the Redfish unit tests. Change-Id: I32b5f860b5d10864ce68f8d5f1dac3f76cd158d6 --- ironic/common/async_steps.py | 168 ++++++++++++++++++ ironic/conductor/cleaning.py | 11 +- ironic/conductor/deployments.py | 12 +- ironic/conductor/servicing.py | 10 +- ironic/conductor/utils.py | 31 +--- ironic/drivers/modules/agent.py | 8 +- ironic/drivers/modules/agent_base.py | 31 ++-- ironic/drivers/modules/deploy_utils.py | 85 +-------- ironic/tests/unit/conductor/test_servicing.py | 16 +- ironic/tests/unit/conductor/test_utils.py | 16 +- .../drivers/modules/drac/test_management.py | 9 +- .../modules/redfish/test_management.py | 22 +-- .../unit/drivers/modules/redfish/test_raid.py | 58 +----- .../servicing-reboot-502f474a01f937a8.yaml | 5 + 14 files changed, 250 insertions(+), 232 deletions(-) create mode 100644 ironic/common/async_steps.py create mode 100644 releasenotes/notes/servicing-reboot-502f474a01f937a8.yaml diff --git a/ironic/common/async_steps.py b/ironic/common/async_steps.py new file mode 100644 index 0000000000..8bec847627 --- /dev/null +++ b/ironic/common/async_steps.py @@ -0,0 +1,168 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from ironic.common import states + +# These flags tell the conductor that we're rebooting inside or after a step. + +CLEANING_REBOOT = "cleaning_reboot" +DEPLOYMENT_REBOOT = "deployment_reboot" +SERVICING_REBOOT = "servicing_reboot" + +# These flags tell the conductor whether the currently running step should be +# skipped after the agent heartbeats again. Setting them to false causes +# the conductor to re-enter the previously running step after a reboot. + +SKIP_CURRENT_CLEAN_STEP = "skip_current_clean_step" +SKIP_CURRENT_DEPLOY_STEP = "skip_current_deploy_step" +SKIP_CURRENT_SERVICE_STEP = "skip_current_service_step" + +# These flags tell the conductor that something else (most likely, a periodic +# task in some hardware interface) is polling the step for completion. + +CLEANING_POLLING = "cleaning_polling" +DEPLOYMENT_POLLING = "deployment_polling" +SERVICING_POLLING = "servicing_polling" + +_ALL_FLAGS = [CLEANING_REBOOT, DEPLOYMENT_REBOOT, SERVICING_REBOOT, + SKIP_CURRENT_CLEAN_STEP, SKIP_CURRENT_DEPLOY_STEP, + SKIP_CURRENT_SERVICE_STEP, + CLEANING_POLLING, DEPLOYMENT_POLLING, SERVICING_POLLING] + + +def get_return_state(node): + """Returns state based on operation being invoked. + + :param node: an ironic node object. + :returns: states.CLEANWAIT if cleaning operation in progress, + or states.DEPLOYWAIT if deploy operation in progress, + or states.SERVICEWAIT if servicing in progress. + """ + # FIXME(dtantsur): this distinction is rather useless, create a new + # constant to use for all step types? + if node.clean_step: + return states.CLEANWAIT + elif node.service_step: + return states.SERVICEWAIT + else: + # TODO(dtantsur): ideally, check for node.deploy_step and raise + # something if this function is called without any step field set. + # Unfortunately, a lot of unit tests rely on exactly this. + return states.DEPLOYWAIT + + +def _check_agent_token_prior_to_agent_reboot(node): + """Removes the agent token if it was not pregenerated. + + Removal of the agent token in cases where it is not pregenerated + is a vital action prior to rebooting the agent, as without doing + so the agent will be unable to establish communication with + the ironic API after the reboot. Effectively locking itself out + as in cases where the value is not pregenerated, it is not + already included in the payload and must be generated again + upon lookup. + + :param node: The Node object. + """ + if not node.driver_internal_info.get('agent_secret_token_pregenerated', + False): + node.del_driver_internal_info('agent_secret_token') + + +def _step_type(node, step_type): + if step_type: + return step_type + if node.clean_step: + return 'clean' + elif node.service_step: + return 'service' + else: + return 'deploy' + + +def set_node_flags(node, reboot=None, skip_current_step=None, polling=None, + step_type=None): + """Sets appropriate reboot flags in driver_internal_info based on operation + + :param node: an ironic node object. + :param reboot: Boolean value to set for node's driver_internal_info flag + cleaning_reboot, servicing_reboot or deployment_reboot based on the + operation in progress. If it is None, corresponding reboot flag is + not set in node's driver_internal_info. + :param skip_current_step: Boolean value to set for node's + driver_internal_info flag skip_current_clean_step, + skip_current_service_step or skip_current_deploy_step based on the + operation in progress. If it is None, corresponding skip step flag is + not set in node's driver_internal_info. + :param polling: Boolean value to set for node's driver_internal_info flag + deployment_polling, servicing_polling or cleaning_polling. If it is + None, the corresponding polling flag is not set in the node's + driver_internal_info. + :param step_type: The type of steps to process: 'clean', 'service' + or 'deploy'. If None, detected from the node. + """ + step_type = _step_type(node, step_type) + if step_type == 'clean': + reboot_field = CLEANING_REBOOT + skip_field = SKIP_CURRENT_CLEAN_STEP + polling_field = CLEANING_POLLING + elif step_type == 'service': + reboot_field = SERVICING_REBOOT + skip_field = SKIP_CURRENT_SERVICE_STEP + polling_field = SERVICING_POLLING + else: + reboot_field = DEPLOYMENT_REBOOT + skip_field = SKIP_CURRENT_DEPLOY_STEP + polling_field = DEPLOYMENT_POLLING + + if reboot is not None: + node.set_driver_internal_info(reboot_field, reboot) + if reboot: + # If rebooting, we must ensure that we check and remove + # an agent token if necessary. + _check_agent_token_prior_to_agent_reboot(node) + if skip_current_step is not None: + node.set_driver_internal_info(skip_field, skip_current_step) + if polling is not None: + node.set_driver_internal_info(polling_field, polling) + node.save() + + +def remove_node_flags(node): + """Remove all flags for the node. + + :param node: A Node object + """ + for flag in _ALL_FLAGS: + node.del_driver_internal_info(flag) + + +def prepare_node_for_next_step(node, step_type=None): + """Remove the flags responsible for the next step. + + Cleans the polling and the skip-next step flags. + + :param node: A Node object + :param step_type: The type of steps to process: 'clean', 'service' + or 'deploy'. If None, detected from the node. + :returns: The last value of the skip-next flag. + """ + step_type = _step_type(node, step_type) + skip_current_step = node.del_driver_internal_info( + 'skip_current_%s_step' % step_type, True) + if step_type == 'clean': + node.del_driver_internal_info(CLEANING_POLLING) + elif step_type == 'service': + node.del_driver_internal_info(SERVICING_POLLING) + else: + node.del_driver_internal_info(DEPLOYMENT_POLLING) + return skip_current_step diff --git a/ironic/conductor/cleaning.py b/ironic/conductor/cleaning.py index dc3c3e9a17..7fea76263b 100644 --- a/ironic/conductor/cleaning.py +++ b/ironic/conductor/cleaning.py @@ -14,6 +14,7 @@ from oslo_log import log +from ironic.common import async_steps from ironic.common import exception from ironic.common.i18n import _ from ironic.common import states @@ -192,13 +193,14 @@ def do_next_clean_step(task, step_index, disable_ramdisk=None): except Exception as e: if isinstance(e, exception.AgentConnectionFailed): - if task.node.driver_internal_info.get('cleaning_reboot'): + if task.node.driver_internal_info.get( + async_steps.CLEANING_REBOOT): LOG.info('Agent is not yet running on node %(node)s ' 'after cleaning reboot, waiting for agent to ' 'come up to run next clean step %(step)s.', {'node': node.uuid, 'step': step}) - node.set_driver_internal_info('skip_current_clean_step', - False) + node.set_driver_internal_info( + async_steps.SKIP_CURRENT_CLEAN_STEP, False) target_state = (states.MANAGEABLE if manual_clean else None) task.process_event('wait', target_state=target_state) @@ -209,7 +211,8 @@ def do_next_clean_step(task, step_index, disable_ramdisk=None): 'executing a command. Error: %(error)s', {'node': task.node.uuid, 'error': e}) - node.set_driver_internal_info('skip_current_clean_step', False) + node.set_driver_internal_info( + async_steps.SKIP_CURRENT_CLEAN_STEP, False) target_state = states.MANAGEABLE if manual_clean else None task.process_event('wait', target_state=target_state) return diff --git a/ironic/conductor/deployments.py b/ironic/conductor/deployments.py index 1bc867f843..980baad03e 100644 --- a/ironic/conductor/deployments.py +++ b/ironic/conductor/deployments.py @@ -19,6 +19,7 @@ from oslo_db import exception as db_exception from oslo_log import log from oslo_utils import excutils +from ironic.common import async_steps from ironic.common import exception from ironic.common.glance_service import service_utils as glance_utils from ironic.common.i18n import _ @@ -302,19 +303,20 @@ def do_next_deploy_step(task, step_index): 'executing a command. Error: %(error)s', {'node': task.node.uuid, 'error': e}) - node.set_driver_internal_info('skip_current_deploy_step', - False) + node.set_driver_internal_info( + async_steps.SKIP_CURRENT_DEPLOY_STEP, False) task.process_event('wait') return except exception.IronicException as e: if isinstance(e, exception.AgentConnectionFailed): - if task.node.driver_internal_info.get('deployment_reboot'): + if task.node.driver_internal_info.get( + async_steps.DEPLOYMENT_REBOOT): LOG.info('Agent is not yet running on node %(node)s after ' 'deployment reboot, waiting for agent to come up ' 'to run next deploy step %(step)s.', {'node': node.uuid, 'step': step}) - node.set_driver_internal_info('skip_current_deploy_step', - False) + node.set_driver_internal_info( + async_steps.SKIP_CURRENT_DEPLOY_STEP, False) task.process_event('wait') return diff --git a/ironic/conductor/servicing.py b/ironic/conductor/servicing.py index 598038a18a..1dd929d39b 100644 --- a/ironic/conductor/servicing.py +++ b/ironic/conductor/servicing.py @@ -14,6 +14,7 @@ from oslo_log import log +from ironic.common import async_steps from ironic.common import exception from ironic.common.i18n import _ from ironic.common import states @@ -156,13 +157,14 @@ def do_next_service_step(task, step_index, disable_ramdisk=None): except Exception as e: if isinstance(e, exception.AgentConnectionFailed): - if task.node.driver_internal_info.get('service_reboot'): + if task.node.driver_internal_info.get( + async_steps.SERVICING_REBOOT): LOG.info('Agent is not yet running on node %(node)s ' 'after service reboot, waiting for agent to ' 'come up to run next service step %(step)s.', {'node': node.uuid, 'step': step}) - node.set_driver_internal_info('skip_current_service_step', - False) + node.set_driver_internal_info( + async_steps.SKIP_CURRENT_SERVICE_STEP, False) task.process_event('wait') return if isinstance(e, exception.AgentInProgress): @@ -172,7 +174,7 @@ def do_next_service_step(task, step_index, disable_ramdisk=None): {'node': task.node.uuid, 'error': e}) node.set_driver_internal_info( - 'skip_current_service_step', False) + async_steps.SKIP_CURRENT_SERVICE_STEP, False) task.process_event('wait') return diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 6757c90065..761b19fd75 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -29,6 +29,7 @@ from oslo_utils import excutils from oslo_utils import strutils from oslo_utils import timeutils +from ironic.common import async_steps from ironic.common import boot_devices from ironic.common import exception from ironic.common import faults @@ -493,9 +494,7 @@ def cleaning_error_handler(task, logmsg, errmsg=None, traceback=False, node.clean_step = {} # Clear any leftover metadata about cleaning node.del_driver_internal_info('clean_step_index') - node.del_driver_internal_info('cleaning_reboot') - node.del_driver_internal_info('cleaning_polling') - node.del_driver_internal_info('skip_current_clean_step') + async_steps.remove_node_flags(node) # We don't need to keep the old agent URL, or token # as it should change upon the next cleaning attempt. wipe_token_and_url(task) @@ -556,10 +555,8 @@ def wipe_deploy_internal_info(task): node.del_driver_internal_info('user_deploy_steps') node.del_driver_internal_info('agent_cached_deploy_steps') node.del_driver_internal_info('deploy_step_index') - node.del_driver_internal_info('deployment_reboot') - node.del_driver_internal_info('deployment_polling') - node.del_driver_internal_info('skip_current_deploy_step') node.del_driver_internal_info('steps_validated') + async_steps.remove_node_flags(node) def wipe_cleaning_internal_info(task): @@ -570,11 +567,9 @@ def wipe_cleaning_internal_info(task): node.set_driver_internal_info('clean_steps', None) node.del_driver_internal_info('agent_cached_clean_steps') node.del_driver_internal_info('clean_step_index') - node.del_driver_internal_info('cleaning_reboot') - node.del_driver_internal_info('cleaning_polling') node.del_driver_internal_info('cleaning_disable_ramdisk') - node.del_driver_internal_info('skip_current_clean_step') node.del_driver_internal_info('steps_validated') + async_steps.remove_node_flags(node) def wipe_service_internal_info(task): @@ -584,11 +579,9 @@ def wipe_service_internal_info(task): node.set_driver_internal_info('service_steps', None) node.del_driver_internal_info('agent_cached_service_steps') node.del_driver_internal_info('service_step_index') - node.del_driver_internal_info('service_reboot') - node.del_driver_internal_info('service_polling') node.del_driver_internal_info('service_disable_ramdisk') - node.del_driver_internal_info('skip_current_service_step') node.del_driver_internal_info('steps_validated') + async_steps.remove_node_flags(node) def deploying_error_handler(task, logmsg, errmsg=None, traceback=False, @@ -1268,14 +1261,8 @@ def update_next_step_index(task, step_type): :param step_type: The type of steps to process: 'clean' or 'deploy'. :returns: Index of the next step. """ - skip_current_step = task.node.del_driver_internal_info( - 'skip_current_%s_step' % step_type, True) - if step_type == 'clean': - task.node.del_driver_internal_info('cleaning_polling') - else: - task.node.del_driver_internal_info('deployment_polling') - task.node.save() - + skip_current_step = async_steps.prepare_node_for_next_step( + task.node, step_type) return _get_node_next_steps(task, step_type, skip_current_step=skip_current_step) @@ -1810,9 +1797,7 @@ def servicing_error_handler(task, logmsg, errmsg=None, traceback=False, node.service_step = {} # Clear any leftover metadata about cleaning node.del_driver_internal_info('service_step_index') - node.del_driver_internal_info('servicing_reboot') - node.del_driver_internal_info('servicing_polling') - node.del_driver_internal_info('skip_current_service_step') + async_steps.remove_node_flags(node) # We don't need to keep the old agent URL, or token # as it should change upon the next cleaning attempt. wipe_token_and_url(task) diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index 52a672f94f..a9822b026a 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -18,6 +18,7 @@ from ironic_lib import metrics_utils from oslo_log import log from oslo_utils import units +from ironic.common import async_steps from ironic.common import exception from ironic.common.glance_service import service_utils from ironic.common.i18n import _ @@ -266,10 +267,11 @@ class CustomAgentDeploy(agent_base.AgentBaseMixin, agent_base.AgentDeployMixin, elif task.driver.storage.should_write_image(task): # Check if the driver has already performed a reboot in a previous # deploy step. - if not task.node.driver_internal_info.get('deployment_reboot'): - manager_utils.node_power_action(task, states.REBOOT) - task.node.del_driver_internal_info('deployment_reboot') + already_rebooted = task.node.del_driver_internal_info( + async_steps.DEPLOYMENT_REBOOT) task.node.save() + if not already_rebooted: + manager_utils.node_power_action(task, states.REBOOT) return states.DEPLOYWAIT @METRICS.timer('CustomAgentDeployMixin.prepare_instance_boot') diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index c99fdeb767..936691a24c 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -23,6 +23,7 @@ from oslo_log import log from oslo_utils import strutils import tenacity +from ironic.common import async_steps from ironic.common import boot_devices from ironic.common import dhcp_factory from ironic.common import exception @@ -217,18 +218,7 @@ def _post_step_reboot(task, step_type): return # Signify that we've rebooted - if step_type == 'clean': - task.node.set_driver_internal_info('cleaning_reboot', True) - elif step_type == 'deploy': - task.node.set_driver_internal_info('deployment_reboot', True) - elif step_type == 'service': - task.node.set_driver_internal_info('servicing_reboot', True) - - if not task.node.driver_internal_info.get( - 'agent_secret_token_pregenerated', False): - # Wipes out the existing recorded token because the machine will - # need to re-establish the token. - task.node.del_driver_internal_info('agent_secret_token') + async_steps.set_node_flags(task.node, reboot=True, step_type=step_type) task.node.save() @@ -531,7 +521,7 @@ class HeartbeatMixin(object): # Check if the driver is polling for completion of # a step, via the 'deployment_polling' flag. polling = node.driver_internal_info.get( - 'deployment_polling', False) + async_steps.DEPLOYMENT_POLLING, False) if not polling: msg = _('Failed to process the next deploy step') self.process_next_step(task, 'deploy') @@ -567,7 +557,7 @@ class HeartbeatMixin(object): # Check if the driver is polling for completion of a step, # via the 'cleaning_polling' flag. polling = node.driver_internal_info.get( - 'cleaning_polling', False) + async_steps.CLEANING_POLLING, False) if not polling: self.continue_cleaning(task) except Exception as e: @@ -608,9 +598,9 @@ class HeartbeatMixin(object): else: msg = _('Node failed to check service progress') # Check if the driver is polling for completion of a step, - # via the 'cleaning_polling' flag. + # via the 'servicing_polling' flag. polling = node.driver_internal_info.get( - 'service_polling', False) + async_steps.SERVICING_POLLING, False) if not polling: self.continue_servicing(task) except Exception as e: @@ -1003,7 +993,8 @@ class AgentBaseMixin(object): 'continuing from current step %(step)s.', {'node': node.uuid, 'step': node.clean_step}) - node.set_driver_internal_info('skip_current_clean_step', False) + node.set_driver_internal_info( + async_steps.SKIP_CURRENT_CLEAN_STEP, False) node.save() else: # Restart the process, agent must have rebooted to new version @@ -1054,14 +1045,14 @@ class AgentBaseMixin(object): agent_commands = client.get_commands_status(task.node) if _freshly_booted(agent_commands, step_type): if step_type == 'clean': - field = 'cleaning_reboot' + field = async_steps.CLEANING_REBOOT elif step_type == 'service': - field = 'servicing_reboot' + field = async_steps.SERVICING_REBOOT else: # TODO(TheJulia): One day we should standardize the field # names here, but we also need to balance human ability # to understand what is going on so *shrug*. - field = 'deployment_reboot' + field = async_steps.DEPLOYMENT_REBOOT utils.pop_node_nested_field(node, 'driver_internal_info', field) node.save() return _continue_steps(task, step_type) diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py index 523567731f..c2b799727c 100644 --- a/ironic/drivers/modules/deploy_utils.py +++ b/ironic/drivers/modules/deploy_utils.py @@ -25,6 +25,7 @@ from oslo_utils import excutils from oslo_utils import fileutils from oslo_utils import strutils +from ironic.common import async_steps from ironic.common import exception from ironic.common import faults from ironic.common.glance_service import service_utils @@ -1449,85 +1450,9 @@ parse_instance_info_capabilities = ( utils.parse_instance_info_capabilities ) - -def get_async_step_return_state(node): - """Returns state based on operation being invoked. - - :param node: an ironic node object. - :returns: states.CLEANWAIT if cleaning operation in progress, - or states.DEPLOYWAIT if deploy operation in progress, - or states.SERVICEWAIT if servicing in progress. - """ - # FIXME(dtantsur): this distinction is rather useless, create a new - # constant to use for all step types? - if node.clean_step: - return states.CLEANWAIT - elif node.service_step: - return states.SERVICEWAIT - else: - # TODO(dtantsur): ideally, check for node.deploy_step and raise - # something if this function is called without any step field set. - # Unfortunately, a lot of unit tests rely on exactly this. - return states.DEPLOYWAIT - - -def _check_agent_token_prior_to_agent_reboot(node): - """Removes the agent token if it was not pregenerated. - - Removal of the agent token in cases where it is not pregenerated - is a vital action prior to rebooting the agent, as without doing - so the agent will be unable to establish communication with - the ironic API after the reboot. Effectively locking itself out - as in cases where the value is not pregenerated, it is not - already included in the payload and must be generated again - upon lookup. - - :param node: The Node object. - """ - if not node.driver_internal_info.get('agent_secret_token_pregenerated', - False): - node.del_driver_internal_info('agent_secret_token') - - -def set_async_step_flags(node, reboot=None, skip_current_step=None, - polling=None): - """Sets appropriate reboot flags in driver_internal_info based on operation - - :param node: an ironic node object. - :param reboot: Boolean value to set for node's driver_internal_info flag - cleaning_reboot or deployment_reboot based on cleaning or deployment - operation in progress. If it is None, corresponding reboot flag is - not set in node's driver_internal_info. - :param skip_current_step: Boolean value to set for node's - driver_internal_info flag skip_current_clean_step or - skip_current_deploy_step based on cleaning or deployment operation - in progress. If it is None, corresponding skip step flag is not set - in node's driver_internal_info. - :param polling: Boolean value to set for node's driver_internal_info flag - deployment_polling or cleaning_polling. If it is None, the - corresponding polling flag is not set in the node's - driver_internal_info. - """ - if node.clean_step: - reboot_field = 'cleaning_reboot' - skip_field = 'skip_current_clean_step' - polling_field = 'cleaning_polling' - else: - reboot_field = 'deployment_reboot' - skip_field = 'skip_current_deploy_step' - polling_field = 'deployment_polling' - - if reboot is not None: - node.set_driver_internal_info(reboot_field, reboot) - if reboot: - # If rebooting, we must ensure that we check and remove - # an agent token if necessary. - _check_agent_token_prior_to_agent_reboot(node) - if skip_current_step is not None: - node.set_driver_internal_info(skip_field, skip_current_step) - if polling is not None: - node.set_driver_internal_info(polling_field, polling) - node.save() +# NOTE(dtantsur): backward compatibility, do not use +get_async_step_return_state = async_steps.get_return_state +set_async_step_flags = async_steps.set_node_flags def prepare_agent_boot(task): @@ -1558,7 +1483,7 @@ def reboot_to_finish_step(task, timeout=None): prepare_agent_boot(task) manager_utils.node_power_action(task, states.REBOOT, timeout) - return get_async_step_return_state(task.node) + return async_steps.get_return_state(task.node) def step_error_handler(task, logmsg, errmsg=None): diff --git a/ironic/tests/unit/conductor/test_servicing.py b/ironic/tests/unit/conductor/test_servicing.py index dda03784c4..4604a9ad82 100644 --- a/ironic/tests/unit/conductor/test_servicing.py +++ b/ironic/tests/unit/conductor/test_servicing.py @@ -609,7 +609,7 @@ class DoNodeServiceTestCase(db_base.DbTestCase): last_error=None, driver_internal_info={'service_steps': self.service_steps, 'service_step_index': None, - 'service_reboot': True}, + 'servicing_reboot': True}, service_step={}) mock_execute.side_effect = exception.AgentConnectionFailed( reason='failed') @@ -642,7 +642,7 @@ class DoNodeServiceTestCase(db_base.DbTestCase): last_error=None, driver_internal_info={'service_steps': self.service_steps, 'service_step_index': None, - 'service_reboot': True}, + 'servicing_reboot': True}, service_step={}) mock_execute.side_effect = exception.AgentInProgress( reason='still meowing') @@ -667,7 +667,7 @@ class DoNodeServiceTestCase(db_base.DbTestCase): # Resume where last_step is the last service step tgt_prov_state = states.ACTIVE info = {'service_steps': self.service_steps, - 'service_reboot': True, + 'servicing_reboot': True, 'service_step_index': len(self.service_steps) - 1} node = obj_utils.create_test_node( @@ -689,7 +689,7 @@ class DoNodeServiceTestCase(db_base.DbTestCase): self.assertEqual(states.NOSTATE, node.target_provision_state) self.assertEqual({}, node.service_step) self.assertNotIn('service_step_index', node.driver_internal_info) - self.assertNotIn('service_reboot', node.driver_internal_info) + self.assertNotIn('servicing_reboot', node.driver_internal_info) self.assertIsNone(node.driver_internal_info['service_steps']) self.assertFalse(mock_execute.called) @@ -933,8 +933,8 @@ class DoNodeServiceAbortTestCase(db_base.DbTestCase): 'agent_url': 'some url', 'agent_secret_token': 'token', 'service_step_index': 2, - 'service_reboot': True, - 'service_polling': True, + 'servicing_reboot': True, + 'servicing_polling': True, 'skip_current_service_step': True}) with task_manager.acquire(self.context, node.uuid) as task: @@ -948,9 +948,9 @@ class DoNodeServiceAbortTestCase(db_base.DbTestCase): self.assertEqual({}, task.node.service_step) self.assertNotIn('service_step_index', task.node.driver_internal_info) - self.assertNotIn('service_reboot', + self.assertNotIn('servicing_reboot', task.node.driver_internal_info) - self.assertNotIn('service_polling', + self.assertNotIn('servicing_polling', task.node.driver_internal_info) self.assertNotIn('skip_current_service_step', task.node.driver_internal_info) diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index e99784bd11..ee651e0701 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -1497,8 +1497,8 @@ class ErrorHandlersTestCase(db_base.DbTestCase): target = 'baz' self.node.target_provision_state = target self.node.service_step = {'key': 'val'} - self.node.set_driver_internal_info('service_reboot', True) - self.node.set_driver_internal_info('service_polling', True) + self.node.set_driver_internal_info('servicing_reboot', True) + self.node.set_driver_internal_info('servicing_polling', True) self.node.set_driver_internal_info('skip_current_service_step', True) self.node.set_driver_internal_info('service_step_index', 0) self.node.set_driver_internal_info('agent_url', 'url') @@ -1513,8 +1513,8 @@ class ErrorHandlersTestCase(db_base.DbTestCase): self.node.save.assert_called_once_with() self.assertEqual({}, self.node.service_step) self.assertNotIn('service_step_index', self.node.driver_internal_info) - self.assertNotIn('service_reboot', self.node.driver_internal_info) - self.assertNotIn('service_polling', self.node.driver_internal_info) + self.assertNotIn('servicing_reboot', self.node.driver_internal_info) + self.assertNotIn('servicing_polling', self.node.driver_internal_info) self.assertNotIn('skip_current_service_step', self.node.driver_internal_info) self.assertNotIn('agent_secret_token', self.node.driver_internal_info) @@ -2842,16 +2842,16 @@ class ServiceUtilsTestCase(db_base.DbTestCase): self.node.driver_internal_info = { 'service_steps': {'foo': 'bar'}, 'agent_cached_service_steps': {'more_foo': None}, - 'service_reboot': False, - 'service_polling': 1, + 'servicing_reboot': False, + 'servicing_polling': 1, 'service_disable_ramdisk': False, 'skip_current_service_step': False, 'steps_validated': 'meow' 'agent_secret_token'} self.node.save() not_in_list = ['agent_cached_service_steps', - 'serivce_reboot', - 'service_polling', + 'servicing_reboot', + 'servicing_polling', 'service_disable_ramdisk', 'skip_current_service_step', 'steps_validated', diff --git a/ironic/tests/unit/drivers/modules/drac/test_management.py b/ironic/tests/unit/drivers/modules/drac/test_management.py index ea425e2c54..6d693fcf0b 100644 --- a/ironic/tests/unit/drivers/modules/drac/test_management.py +++ b/ironic/tests/unit/drivers/modules/drac/test_management.py @@ -930,8 +930,6 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest): self.assertRaises(exception.InvalidParameterValue, self.management.import_configuration, task, 'edge') - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @@ -941,11 +939,9 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest): def test_import_configuration_success( self, mock_get_system, mock_get_configuration, mock_log, mock_power, mock_build_agent_options, - mock_set_async_step_flags, mock_get_async_step_return_state): + mock_set_async_step_flags): deploy_opts = mock.Mock() mock_build_agent_options.return_value = deploy_opts - step_result = mock.Mock() - mock_get_async_step_return_state.return_value = step_result task = mock.Mock(node=self.node, context=self.context) fake_manager_oem1 = mock.Mock() fake_manager_oem1.import_system_configuration.side_effect = ( @@ -961,6 +957,7 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest): '"data": {"prop1": "value1", "prop2": 2}}}') result = self.management.import_configuration(task, 'edge') + self.assertEqual(states.DEPLOYWAIT, result) fake_manager_oem2.import_system_configuration.assert_called_once_with( '{"prop1": "value1", "prop2": 2}') @@ -971,8 +968,6 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest): mock_build_agent_options.assert_called_once_with(task.node) task.driver.boot.prepare_ramdisk.assert_called_once_with( task, deploy_opts) - mock_get_async_step_return_state.assert_called_once_with(task.node) - self.assertEqual(step_result, result) @mock.patch.object(drac_mgmt.DracRedfishManagement, 'import_configuration', autospec=True) diff --git a/ironic/tests/unit/drivers/modules/redfish/test_management.py b/ironic/tests/unit/drivers/modules/redfish/test_management.py index b1176476bf..bf782520f0 100644 --- a/ironic/tests/unit/drivers/modules/redfish/test_management.py +++ b/ironic/tests/unit/drivers/modules/redfish/test_management.py @@ -1012,13 +1012,10 @@ class RedfishManagementTestCase(db_base.DbTestCase): @mock.patch.object(redfish_boot.RedfishVirtualMediaBoot, 'prepare_ramdisk', spec_set=True, autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) def test_update_firmware(self, mock_get_update_service, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_prepare, build_mock): build_mock.return_value = {'a': 'b'} @@ -1032,12 +1029,13 @@ class RedfishManagementTestCase(db_base.DbTestCase): shared=False) as task: task.node.save = mock.Mock() - task.driver.management.update_firmware( + result = task.driver.management.update_firmware( task, [{'url': 'http://test1', 'checksum': 'aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d'}, {'url': 'http://test2', 'checksum': '9f6227549221920e312fed2cfc6586ee832cc546'}]) + self.assertEqual(states.DEPLOYWAIT, result) mock_get_update_service.assert_called_once_with(task.node) mock_update_service.simple_update.assert_called_once_with( @@ -1054,8 +1052,6 @@ class RedfishManagementTestCase(db_base.DbTestCase): task.node.driver_internal_info.get('firmware_cleanup')) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=True, skip_current_step=True, polling=True) - mock_get_async_step_return_state.assert_called_once_with( - task.node) mock_node_power_action.assert_called_once_with( task, states.REBOOT, None) @@ -1066,14 +1062,11 @@ class RedfishManagementTestCase(db_base.DbTestCase): @mock.patch.object(redfish_boot.RedfishVirtualMediaBoot, 'prepare_ramdisk', spec_set=True, autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) def test_update_firmware_stage( self, mock_get_update_service, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, - mock_prepare, build_mock, mock_stage): + mock_node_power_action, mock_prepare, build_mock, mock_stage): build_mock.return_value = {'a': 'b'} mock_task_monitor = mock.Mock() mock_task_monitor.task_monitor_uri = '/task/123' @@ -1109,8 +1102,6 @@ class RedfishManagementTestCase(db_base.DbTestCase): ['http'], task.node.driver_internal_info['firmware_cleanup']) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=True, skip_current_step=True, polling=True) - mock_get_async_step_return_state.assert_called_once_with( - task.node) mock_node_power_action.assert_called_once_with( task, states.REBOOT, None) @@ -1121,14 +1112,11 @@ class RedfishManagementTestCase(db_base.DbTestCase): @mock.patch.object(redfish_boot.RedfishVirtualMediaBoot, 'prepare_ramdisk', spec_set=True, autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) def test_update_firmware_stage_both( self, mock_get_update_service, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, - mock_prepare, build_mock, mock_stage): + mock_node_power_action, mock_prepare, build_mock, mock_stage): build_mock.return_value = {'a': 'b'} mock_task_monitor = mock.Mock() mock_task_monitor.task_monitor_uri = '/task/123' @@ -1170,8 +1158,6 @@ class RedfishManagementTestCase(db_base.DbTestCase): task.node.driver_internal_info['firmware_cleanup']) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=True, skip_current_step=True, polling=True) - mock_get_async_step_return_state.assert_called_once_with( - task.node) mock_node_power_action.assert_called_once_with( task, states.REBOOT, None) diff --git a/ironic/tests/unit/drivers/modules/redfish/test_raid.py b/ironic/tests/unit/drivers/modules/redfish/test_raid.py index 026c3c0802..17c90e79a6 100644 --- a/ironic/tests/unit/drivers/modules/redfish/test_raid.py +++ b/ironic/tests/unit/drivers/modules/redfish/test_raid.py @@ -181,13 +181,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_1a( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -215,13 +212,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_1b( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -264,13 +258,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_1b_apply_time_immediate( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -305,7 +296,7 @@ class RedfishRAIDTestCase(db_base.DbTestCase): self.node.save() with task_manager.acquire(self.context, self.node.uuid, shared=True) as task: - task.driver.raid.create_configuration(task) + self.assertIsNone(task.driver.raid.create_configuration(task)) pre = '/redfish/v1/Systems/1/Storage/1/Drives/' expected_payload = { 'RAIDType': 'RAID5', @@ -323,7 +314,6 @@ class RedfishRAIDTestCase(db_base.DbTestCase): expected_payload, apply_time=sushy.APPLY_TIME_IMMEDIATE) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=False, skip_current_step=True, polling=True) - self.assertEqual(mock_get_async_step_return_state.call_count, 0) self.assertEqual(mock_node_power_action.call_count, 0) self.assertEqual(mock_build_agent_options.call_count, 0) self.assertEqual(mock_prepare_ramdisk.call_count, 0) @@ -341,13 +331,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_1b_apply_time_on_reset( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -376,7 +363,8 @@ class RedfishRAIDTestCase(db_base.DbTestCase): self.node.save() with task_manager.acquire(self.context, self.node.uuid, shared=True) as task: - task.driver.raid.create_configuration(task) + result = task.driver.raid.create_configuration(task) + self.assertEqual(states.DEPLOYWAIT, result) pre = '/redfish/v1/Systems/1/Storage/1/Drives/' expected_payload = { 'RAIDType': 'RAID5', @@ -393,8 +381,6 @@ class RedfishRAIDTestCase(db_base.DbTestCase): expected_payload, apply_time=sushy.APPLY_TIME_ON_RESET) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=True, skip_current_step=True, polling=True) - mock_get_async_step_return_state.assert_called_once_with( - task.node) mock_node_power_action.assert_called_once_with( task, states.REBOOT, None) mock_build_agent_options.assert_called_once_with(task.node) @@ -406,13 +392,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_2( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -497,13 +480,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_2_on_reset( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -579,13 +559,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_3( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -635,13 +612,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_4( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -756,13 +730,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_5a( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -795,13 +766,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_5b( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -863,13 +831,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_6( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -903,13 +868,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_interface_type( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -996,13 +958,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_delete_config_immediate( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -1031,12 +990,11 @@ class RedfishRAIDTestCase(db_base.DbTestCase): 'raid_level': '1', 'size_gb': 100}], 'last_updated': last_updated} - task.driver.raid.delete_configuration(task) + self.assertIsNone(task.driver.raid.delete_configuration(task)) self.assertEqual(mock_volumes[0].delete.call_count, 1) self.assertEqual(mock_volumes[1].delete.call_count, 1) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=False, skip_current_step=True, polling=True) - self.assertEqual(mock_get_async_step_return_state.call_count, 0) self.assertEqual(mock_node_power_action.call_count, 0) self.assertEqual(mock_build_agent_options.call_count, 0) self.assertEqual(mock_prepare_ramdisk.call_count, 0) @@ -1050,13 +1008,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_delete_config_on_reset( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -1088,13 +1043,12 @@ class RedfishRAIDTestCase(db_base.DbTestCase): 'size_gb': 100}], 'last_updated': '2022-05-18 08:49:17.585443'} task.node.raid_config = raid_config - task.driver.raid.delete_configuration(task) + result = task.driver.raid.delete_configuration(task) + self.assertEqual(states.DEPLOYWAIT, result) self.assertEqual(mock_volumes[0].delete.call_count, 1) self.assertEqual(mock_volumes[1].delete.call_count, 0) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=True, skip_current_step=True, polling=True) - mock_get_async_step_return_state.assert_called_once_with( - task.node) mock_node_power_action.assert_called_once_with( task, states.REBOOT, None) mock_build_agent_options.assert_called_once_with(task.node) diff --git a/releasenotes/notes/servicing-reboot-502f474a01f937a8.yaml b/releasenotes/notes/servicing-reboot-502f474a01f937a8.yaml new file mode 100644 index 0000000000..02a358d6d7 --- /dev/null +++ b/releasenotes/notes/servicing-reboot-502f474a01f937a8.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes service steps that rely on a reboot. Previously, the reboot was not + properly recognized in the conductor logic.