From 6df82ee2bcc702168e80ec7c81e5573d766f9c71 Mon Sep 17 00:00:00 2001 From: Kaifeng Wang Date: Thu, 22 Mar 2018 17:35:00 +0800 Subject: [PATCH] Implementation of inspect wait state This patch provides implementations to feature of adding inspect wait state. Changes covered in this patch: * Added state and transitions, state diagram regenerated. * inspector and oneview inspect interface now return INSPECTWAIT instead of INSPECTING. Move node to inspect wait if inspect interface returns INSPECTING or INSPECTWAIT. * Add a timeout option to conductor, and a periodic task to check timeout in the inspect wait state. Story: #1725211 Task: #10630 Partial-Bug: #1725211 Change-Id: Ie76bfdad5966014a4dae826919ff5705462c743b --- doc/source/contributor/states.rst | 11 +- .../contributor/webapi-version-history.rst | 6 + doc/source/images/states.svg | 527 +++++++++--------- ironic/api/controllers/v1/node.py | 11 + ironic/api/controllers/v1/port.py | 10 + ironic/api/controllers/v1/portgroup.py | 9 + ironic/api/controllers/v1/utils.py | 9 + ironic/api/controllers/v1/versions.py | 5 +- ironic/common/release_mappings.py | 2 +- ironic/common/states.py | 30 +- ironic/conductor/manager.py | 44 +- ironic/conf/conductor.py | 3 +- ironic/drivers/modules/inspector.py | 8 +- ironic/drivers/modules/oneview/inspect.py | 4 +- .../unit/api/controllers/v1/test_node.py | 37 ++ .../unit/api/controllers/v1/test_port.py | 26 + .../unit/api/controllers/v1/test_portgroup.py | 32 ++ ironic/tests/unit/conductor/test_manager.py | 108 +++- .../unit/drivers/modules/test_inspector.py | 10 +- ...d-inspect-wait-state-948f83dfe342897b.yaml | 22 + 20 files changed, 602 insertions(+), 312 deletions(-) create mode 100644 releasenotes/notes/add-inspect-wait-state-948f83dfe342897b.yaml diff --git a/doc/source/contributor/states.rst b/doc/source/contributor/states.rst index 9209e119cc..43bc2d2622 100644 --- a/doc/source/contributor/states.rst +++ b/doc/source/contributor/states.rst @@ -68,8 +68,15 @@ manageable (stable state) inspecting ``inspecting`` will utilize node introspection to update hardware-derived - node properties to reflect the current state of the hardware. If - introspection fails, the node will transition to ``inspect failed``. + node properties to reflect the current state of the hardware. Typically, + the node will transition to ``manageable`` if inspection is synchronous, + or ``inspect wait`` if asynchronous. The node will transition to + ``inspect failed`` if error occurred. + +inspect wait + This is the provision state used when an asynchronous inspection is in + progress. A successfully inspected node shall transition to ``manageable`` + state. inspect failed This is the state a node will move into when inspection of the node fails. From diff --git a/doc/source/contributor/webapi-version-history.rst b/doc/source/contributor/webapi-version-history.rst index 033335cce0..c42e7cea41 100644 --- a/doc/source/contributor/webapi-version-history.rst +++ b/doc/source/contributor/webapi-version-history.rst @@ -2,6 +2,12 @@ REST API Version History ======================== +1.39 (Rocky, master) +-------------------- + +Added ``inspect wait`` to available provision states. A node is shown as +``inspect wait`` instead of ``inspecting`` during asynchronous inspection. + 1.38 (Queens, 10.1.0) --------------------- diff --git a/doc/source/images/states.svg b/doc/source/images/states.svg index b112291116..1988d04eac 100644 --- a/doc/source/images/states.svg +++ b/doc/source/images/states.svg @@ -4,479 +4,502 @@ - - + + Ironic states - + enroll - -enroll + +enroll verifying - -verifying + +verifying enroll->verifying - - -manage (via API) + + +manage (via API) verifying->enroll - - -fail + + +fail manageable - -manageable + +manageable verifying->manageable - - -done + + +done cleaning - -cleaning + +cleaning manageable->cleaning - - -provide (via API) + + +provide (via API) manageable->cleaning - - -clean (via API) + + +clean (via API) inspecting - -inspecting + +inspecting manageable->inspecting - - -inspect (via API) + + +inspect (via API) adopting - -adopting + +adopting manageable->adopting - - -adopt (via API) + + +adopt (via API) cleaning->manageable - - -manage + + +manage available - -available + +available cleaning->available - - -done + + +done clean failed - -clean failed + +clean failed cleaning->clean failed - - -fail + + +fail clean wait - -clean wait + +clean wait cleaning->clean wait - - -wait + + +wait inspecting->manageable - - -done + + +done inspect failed - -inspect failed + +inspect failed inspecting->inspect failed - - -fail + + +fail + + +inspect wait + +inspect wait + + +inspecting->inspect wait + + +wait active - -active + +active -adopting->active - - -done +adopting->active + + +done -adopt failed - -adopt failed +adopt failed + +adopt failed -adopting->adopt failed - - -fail +adopting->adopt failed + + +fail available->manageable - - -manage (via API) + + +manage (via API) deploying - -deploying + +deploying available->deploying - - -active (via API) + + +active (via API) deploying->active - - -done + + +done deploy failed - -deploy failed + +deploy failed deploying->deploy failed - - -fail + + +fail wait call-back - -wait call-back + +wait call-back deploying->wait call-back - - -wait + + +wait active->deploying - - -rebuild (via API) + + +rebuild (via API) deleting - -deleting + +deleting active->deleting - - -deleted (via API) + + +deleted (via API) rescuing - -rescuing + +rescuing active->rescuing - - -rescue (via API) + + +rescue (via API) deleting->cleaning - - -clean + + +clean error - -error + +error deleting->error - - -error + + +error rescue - -rescue + +rescue -rescuing->rescue - - -done +rescuing->rescue + + +done -rescue wait - -rescue wait +rescue wait + +rescue wait -rescuing->rescue wait - - -wait +rescuing->rescue wait + + +wait -rescue failed - -rescue failed +rescue failed + +rescue failed -rescuing->rescue failed - - -fail +rescuing->rescue failed + + +fail error->deploying - - -rebuild (via API) + + +rebuild (via API) error->deleting - - -deleted (via API) + + +deleted (via API) rescue->deleting - - -deleted (via API) + + +deleted (via API) rescue->rescuing - - -rescue (via API) + + +rescue (via API) unrescuing - -unrescuing + +unrescuing rescue->unrescuing - - -unrescue (via API) + + +unrescue (via API) -unrescuing->active - - -done +unrescuing->active + + +done -unrescue failed - -unrescue failed +unrescue failed + +unrescue failed -unrescuing->unrescue failed - - -fail +unrescuing->unrescue failed + + +fail deploy failed->deploying - - -rebuild (via API) + + +rebuild (via API) deploy failed->deploying - - -active (via API) + + +active (via API) deploy failed->deleting - - -deleted (via API) + + +deleted (via API) wait call-back->deploying - - -resume + + +resume wait call-back->deleting - - -deleted (via API) + + +deleted (via API) wait call-back->deploy failed - - -fail + + +fail clean failed->manageable - - -manage (via API) + + +manage (via API) clean wait->cleaning - - -resume + + +resume clean wait->clean failed - - -fail + + +fail clean wait->clean failed - - -abort (via API) + + +abort (via API) -inspect failed->manageable - - -manage (via API) +inspect failed->manageable + + +manage (via API) -inspect failed->inspecting - - -inspect (via API) +inspect failed->inspecting + + +inspect (via API) + + +inspect wait->manageable + + +done + + +inspect wait->inspect failed + + +fail -adopt failed->manageable - - -manage (via API) +adopt failed->manageable + + +manage (via API) -adopt failed->adopting - - -adopt (via API) +adopt failed->adopting + + +adopt (via API) -rescue wait->deleting - - -deleted (via API) +rescue wait->deleting + + +deleted (via API) -rescue wait->rescuing - - -resume +rescue wait->rescuing + + +resume -rescue wait->rescue failed - - -fail +rescue wait->rescue failed + + +fail -rescue wait->rescue failed - - -abort (via API) +rescue wait->rescue failed + + +abort (via API) -rescue failed->deleting - - -deleted (via API) +rescue failed->deleting + + +deleted (via API) -rescue failed->rescuing - - -rescue (via API) +rescue failed->rescuing + + +rescue (via API) -rescue failed->unrescuing - - -unrescue (via API) +rescue failed->unrescuing + + +unrescue (via API) -unrescue failed->deleting - - -deleted (via API) +unrescue failed->deleting + + +deleted (via API) -unrescue failed->rescuing - - -rescue (via API) +unrescue failed->rescuing + + +rescue (via API) -unrescue failed->unrescuing - - -unrescue (via API) +unrescue failed->unrescuing + + +unrescue (via API) diff --git a/ironic/api/controllers/v1/node.py b/ironic/api/controllers/v1/node.py index c32f483fa8..bd8a0fa416 100644 --- a/ironic/api/controllers/v1/node.py +++ b/ironic/api/controllers/v1/node.py @@ -173,6 +173,10 @@ def update_state_in_older_versions(obj): if (pecan.request.version.minor < versions.MINOR_2_AVAILABLE_STATE and obj.provision_state == ir_states.AVAILABLE): obj.provision_state = ir_states.NOSTATE + # if requested version < 1.39, convert INSPECTWAIT to INSPECTING + if (not api_utils.allow_inspect_wait_state() and + obj.provision_state == ir_states.INSPECTWAIT): + obj.provision_state = ir_states.INSPECTING class BootDeviceController(rest.RestController): @@ -1928,6 +1932,13 @@ class NodesController(rest.RestController): "is in progress.") raise wsme.exc.ClientSideError( msg % node_ident, status_code=http_client.CONFLICT) + elif (rpc_node.provision_state == ir_states.INSPECTING and + api_utils.allow_inspect_wait_state()): + msg = _('Cannot update node "%(node)s" while it is in state ' + '"%(state)s".') % {'node': rpc_node.uuid, + 'state': ir_states.INSPECTING} + raise wsme.exc.ClientSideError(msg, + status_code=http_client.CONFLICT) names = api_utils.get_patch_values(patch, '/name') if len(names): diff --git a/ironic/api/controllers/v1/port.py b/ironic/api/controllers/v1/port.py index 6f8b274922..2b4edcee6f 100644 --- a/ironic/api/controllers/v1/port.py +++ b/ironic/api/controllers/v1/port.py @@ -34,6 +34,7 @@ from ironic.api import expose from ironic.common import exception from ironic.common.i18n import _ from ironic.common import policy +from ironic.common import states as ir_states from ironic import objects METRICS = metrics_utils.get_metrics_logger(__name__) @@ -668,6 +669,15 @@ class PortsController(rest.RestController): rpc_port[field] = patch_val rpc_node = objects.Node.get_by_id(context, rpc_port.node_id) + if (rpc_node.provision_state == ir_states.INSPECTING and + api_utils.allow_inspect_wait_state()): + msg = _('Cannot update port "%(port)s" on "%(node)s" while it is ' + 'in state "%(state)s".') % {'port': rpc_port.uuid, + 'node': rpc_node.uuid, + 'state': ir_states.INSPECTING} + raise wsme.exc.ClientSideError(msg, + status_code=http_client.CONFLICT) + notify_extra = {'node_uuid': rpc_node.uuid, 'portgroup_uuid': port.portgroup_uuid} notify.emit_start_notification(context, rpc_port, 'update', diff --git a/ironic/api/controllers/v1/portgroup.py b/ironic/api/controllers/v1/portgroup.py index 09c4f409c8..b86559a849 100644 --- a/ironic/api/controllers/v1/portgroup.py +++ b/ironic/api/controllers/v1/portgroup.py @@ -30,6 +30,7 @@ from ironic.api import expose from ironic.common import exception from ironic.common.i18n import _ from ironic.common import policy +from ironic.common import states as ir_states from ironic import objects METRICS = metrics_utils.get_metrics_logger(__name__) @@ -543,6 +544,14 @@ class PortgroupsController(pecan.rest.RestController): rpc_portgroup[field] = patch_val rpc_node = objects.Node.get_by_id(context, rpc_portgroup.node_id) + if (rpc_node.provision_state == ir_states.INSPECTING and + api_utils.allow_inspect_wait_state()): + msg = _('Cannot update portgroup "%(portgroup)s" on node ' + '"%(node)s" while it is in state "%(state)s".') % { + 'portgroup': rpc_portgroup.uuid, 'node': rpc_node.uuid, + 'state': ir_states.INSPECTING} + raise wsme.exc.ClientSideError(msg, + status_code=http_client.CONFLICT) notify.emit_start_notification(context, rpc_portgroup, 'update', node_uuid=rpc_node.uuid) diff --git a/ironic/api/controllers/v1/utils.py b/ironic/api/controllers/v1/utils.py index 6f6c268a5e..86e4bd0937 100644 --- a/ironic/api/controllers/v1/utils.py +++ b/ironic/api/controllers/v1/utils.py @@ -721,6 +721,15 @@ def allow_traits(): return pecan.request.version.minor >= versions.MINOR_37_NODE_TRAITS +def allow_inspect_wait_state(): + """Check if inspect wait is allowed for the node. + + Version 1.39 of the API adds 'inspect wait' state to substitute + 'inspecting' state during asynchronous hardware inspection. + """ + return pecan.request.version.minor >= versions.MINOR_39_INSPECT_WAIT + + def handle_post_port_like_extra_vif(p_dict): """Handle a Post request that sets .extra['vif_port_id']. diff --git a/ironic/api/controllers/v1/versions.py b/ironic/api/controllers/v1/versions.py index 7166bafb57..87be879f55 100644 --- a/ironic/api/controllers/v1/versions.py +++ b/ironic/api/controllers/v1/versions.py @@ -75,7 +75,7 @@ BASE_VERSION = 1 # v1.36: Add Ironic Python Agent version support. # v1.37: Add node traits. # v1.38: Add rescue and unrescue provision states -# Add rescue_interface to the node object +# v1.39: Add inspect wait provision state. MINOR_0_JUNO = 0 MINOR_1_INITIAL_VERSION = 1 @@ -116,6 +116,7 @@ MINOR_35_REBUILD_CONFIG_DRIVE = 35 MINOR_36_AGENT_VERSION_HEARTBEAT = 36 MINOR_37_NODE_TRAITS = 37 MINOR_38_RESCUE_INTERFACE = 38 +MINOR_39_INSPECT_WAIT = 39 # When adding another version, update: # - MINOR_MAX_VERSION @@ -123,7 +124,7 @@ MINOR_38_RESCUE_INTERFACE = 38 # explanation of what changed in the new version # - common/release_mappings.py, RELEASE_MAPPING['master']['api'] -MINOR_MAX_VERSION = MINOR_38_RESCUE_INTERFACE +MINOR_MAX_VERSION = MINOR_39_INSPECT_WAIT # String representations of the minor and maximum versions _MIN_VERSION_STRING = '{}.{}'.format(BASE_VERSION, MINOR_1_INITIAL_VERSION) diff --git a/ironic/common/release_mappings.py b/ironic/common/release_mappings.py index 3079b827b2..92d07a0c82 100644 --- a/ironic/common/release_mappings.py +++ b/ironic/common/release_mappings.py @@ -100,7 +100,7 @@ RELEASE_MAPPING = { } }, 'master': { - 'api': '1.38', + 'api': '1.39', 'rpc': '1.44', 'objects': { 'Node': ['1.23'], diff --git a/ironic/common/states.py b/ironic/common/states.py index 0525f29d9a..a0d26405f6 100644 --- a/ironic/common/states.py +++ b/ironic/common/states.py @@ -164,13 +164,20 @@ INSPECTING = 'inspecting' """ Node is under inspection. This is the provision state used when inspection is started. A successfully -inspected node shall transition to MANAGEABLE state. +inspected node shall transition to MANAGEABLE state. For asynchronous +inspection, node shall transition to INSPECTWAIT state. """ - INSPECTFAIL = 'inspect failed' """ Node inspection failed. """ +INSPECTWAIT = 'inspect wait' +""" Node is under inspection. + +This is the provision state used when an asynchronous inspection is in +progress. A successfully inspected node shall transition to MANAGEABLE state. +""" + ADOPTING = 'adopting' """ Node is being adopted. @@ -209,8 +216,9 @@ UNRESCUEFAIL = 'unrescue failed' UNRESCUING = 'unrescuing' """ Node is being restored from rescue mode (to active state). """ -UPDATE_ALLOWED_STATES = (DEPLOYFAIL, INSPECTING, INSPECTFAIL, CLEANFAIL, ERROR, - VERIFYING, ADOPTFAIL, RESCUEFAIL, UNRESCUEFAIL) +UPDATE_ALLOWED_STATES = (DEPLOYFAIL, INSPECTING, INSPECTFAIL, INSPECTWAIT, + CLEANFAIL, ERROR, VERIFYING, ADOPTFAIL, RESCUEFAIL, + UNRESCUEFAIL) """Transitional states in which we allow updating a node.""" DELETE_ALLOWED_STATES = (AVAILABLE, MANAGEABLE, ENROLL, ADOPTFAIL) @@ -220,8 +228,8 @@ STABLE_STATES = (ENROLL, MANAGEABLE, AVAILABLE, ACTIVE, ERROR, RESCUE) """States that will not transition unless receiving a request.""" UNSTABLE_STATES = (DEPLOYING, DEPLOYWAIT, CLEANING, CLEANWAIT, VERIFYING, - DELETING, INSPECTING, ADOPTING, RESCUING, RESCUEWAIT, - UNRESCUING) + DELETING, INSPECTING, INSPECTWAIT, ADOPTING, RESCUING, + RESCUEWAIT, UNRESCUING) """States that can be changed without external request.""" ############## @@ -292,6 +300,7 @@ machine.add_transition(AVAILABLE, DEPLOYING, 'deploy') # Add inspect* states. machine.add_state(INSPECTING, target=MANAGEABLE, **watchers) machine.add_state(INSPECTFAIL, target=MANAGEABLE, **watchers) +machine.add_state(INSPECTWAIT, target=MANAGEABLE, **watchers) # Add adopt* states machine.add_state(ADOPTING, target=ACTIVE, **watchers) @@ -392,6 +401,15 @@ machine.add_transition(INSPECTING, MANAGEABLE, 'done') # Inspection may fail. machine.add_transition(INSPECTING, INSPECTFAIL, 'fail') +# Transition for asynchronous inspection +machine.add_transition(INSPECTING, INSPECTWAIT, 'wait') + +# Inspection is done +machine.add_transition(INSPECTWAIT, MANAGEABLE, 'done') + +# Inspection failed. +machine.add_transition(INSPECTWAIT, INSPECTFAIL, 'fail') + # Move the node to manageable state for any other # action. machine.add_transition(INSPECTFAIL, MANAGEABLE, 'manage') diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 95eb8e1dbb..8bf2420789 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -170,7 +170,8 @@ class ConductorManager(base_manager.BaseConductorManager): # TODO(dtantsur): reconsider allowing changing some (but not all) # interfaces for active nodes in the future. allowed_update_states = [states.ENROLL, states.INSPECTING, - states.MANAGEABLE, states.AVAILABLE] + states.INSPECTWAIT, states.MANAGEABLE, + states.AVAILABLE] action = _("Node %(node)s can not have %(field)s " "updated unless it is in one of allowed " "(%(allowed)s) states or in maintenance mode.") @@ -2231,7 +2232,7 @@ class ConductorManager(base_manager.BaseConductorManager): registered on another port already. :raises: InvalidState if port connectivity attributes are updated while node not in a MANAGEABLE or ENROLL or - INSPECTING state or not in MAINTENANCE mode. + INSPECTING or INSPECTWAIT state or not in MAINTENANCE mode. :raises: Conflict if trying to set extra/vif_port_id or pxe_enabled=True on port which is a member of portgroup with standalone_ports_supported=False. @@ -2268,6 +2269,7 @@ class ConductorManager(base_manager.BaseConductorManager): 'physical_network'} allowed_update_states = [states.ENROLL, states.INSPECTING, + states.INSPECTWAIT, states.MANAGEABLE] if (set(port_obj.obj_what_changed()) & connectivity_attr and not (node.provision_state in allowed_update_states @@ -2312,8 +2314,8 @@ class ConductorManager(base_manager.BaseConductorManager): :raises: PortgroupMACAlreadyExists if the update is setting a MAC which is registered on another portgroup already. :raises: InvalidState if portgroup-node association is updated while - node not in a MANAGEABLE or ENROLL or INSPECTING state or not - in MAINTENANCE mode. + node not in a MANAGEABLE or ENROLL or INSPECTING or + INSPECTWAIT state or not in MAINTENANCE mode. :raises: PortgroupNotEmpty if there are ports associated with this portgroup. :raises: Conflict when trying to set standalone_ports_supported=False @@ -2332,10 +2334,11 @@ class ConductorManager(base_manager.BaseConductorManager): if 'node_id' in portgroup_obj.obj_what_changed(): # NOTE(zhenguo): If portgroup update is modifying the # portgroup-node association then node should be in - # MANAGEABLE/INSPECTING/ENROLL provisioning state or in - # maintenance mode, otherwise InvalidState is raised. + # MANAGEABLE/INSPECTING/INSPECTWAIT/ENROLL provisioning state + # or in maintenance mode, otherwise InvalidState is raised. allowed_update_states = [states.ENROLL, states.INSPECTING, + states.INSPECTWAIT, states.MANAGEABLE] if (node.provision_state not in allowed_update_states and not node.maintenance): @@ -2771,24 +2774,24 @@ class ConductorManager(base_manager.BaseConductorManager): action='inspect', node=task.node.uuid, state=task.node.provision_state) - @METRICS.timer('ConductorManager._check_inspect_timeouts') + @METRICS.timer('ConductorManager._check_inspect_wait_timeouts') @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval) - def _check_inspect_timeouts(self, context): - """Periodically checks inspect_timeout and fails upon reaching it. + def _check_inspect_wait_timeouts(self, context): + """Periodically checks inspect_wait_timeout and fails upon reaching it. :param: context: request context """ - callback_timeout = CONF.conductor.inspect_timeout + callback_timeout = CONF.conductor.inspect_wait_timeout if not callback_timeout: return filters = {'reserved': False, - 'provision_state': states.INSPECTING, + 'provision_state': states.INSPECTWAIT, 'inspection_started_before': callback_timeout} sort_key = 'inspection_started_at' last_error = _("timeout reached while inspecting the node") - self._fail_if_in_state(context, filters, states.INSPECTING, + self._fail_if_in_state(context, filters, states.INSPECTWAIT, sort_key, last_error=last_error) @METRICS.timer('ConductorManager.set_target_raid_config') @@ -3484,7 +3487,7 @@ def _do_inspect_hardware(task): :param: task: a TaskManager instance with an exclusive lock on its node. :raises: HardwareInspectionFailure if driver doesn't - return the state as states.MANAGEABLE or + return the state as states.MANAGEABLE, states.INSPECTWAIT or states.INSPECTING. """ @@ -3512,7 +3515,20 @@ def _do_inspect_hardware(task): task.process_event('done') LOG.info('Successfully inspected node %(node)s', {'node': node.uuid}) - elif new_state != states.INSPECTING: + # TODO(kaifeng): remove INSPECTING support during S* cycle. + elif new_state in (states.INSPECTING, states.INSPECTWAIT): + task.process_event('wait') + if new_state == states.INSPECTING: + inspect_intf_name = task.driver.inspect.__class__.__name__ + LOG.warning('Received INSPECTING state from %(intf)s. Returning ' + 'INSPECTING from InspectInterface.inspect_hardware ' + 'is deprecated, and will cause node be moved to ' + 'INSPECTFAIL state after deprecation period. Please ' + 'return INSPECTWAIT instead if the inspection process ' + 'is asynchronous.', {'intf': inspect_intf_name}) + LOG.info('Successfully started introspection on node %(node)s', + {'node': node.uuid}) + else: error = (_("During inspection, driver returned unexpected " "state %(state)s") % {'state': new_state}) handle_failure(error) diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py index d3174bf29a..1d1175c31c 100644 --- a/ironic/conf/conductor.py +++ b/ironic/conf/conductor.py @@ -122,8 +122,9 @@ opts = [ help=_('Name of the Swift container to store config drive ' 'data. Used when configdrive_use_object_store is ' 'True.')), - cfg.IntOpt('inspect_timeout', + cfg.IntOpt('inspect_wait_timeout', default=1800, + deprecated_name='inspect_timeout', help=_('Timeout (seconds) for waiting for node inspection. ' '0 - unlimited.')), cfg.BoolOpt('automated_clean', diff --git a/ironic/drivers/modules/inspector.py b/ironic/drivers/modules/inspector.py index 2642a83a66..25104ee84b 100644 --- a/ironic/drivers/modules/inspector.py +++ b/ironic/drivers/modules/inspector.py @@ -119,7 +119,7 @@ class Inspector(base.InspectInterface): ironic-inspector. Results will be checked in a periodic task. :param task: a task from TaskManager. - :returns: states.INSPECTING + :returns: states.INSPECTWAIT """ LOG.debug('Starting inspection for node %(uuid)s using ' 'ironic-inspector', {'uuid': task.node.uuid}) @@ -128,13 +128,13 @@ class Inspector(base.InspectInterface): # we can release a lock as soon as possible and allow ironic-inspector # to operate on a node. eventlet.spawn_n(_start_inspection, task.node.uuid, task.context) - return states.INSPECTING + return states.INSPECTWAIT @periodics.periodic(spacing=CONF.inspector.status_check_period, enabled=CONF.inspector.enabled) def _periodic_check_result(self, manager, context): """Periodic task checking results of inspection.""" - filters = {'provision_state': states.INSPECTING} + filters = {'provision_state': states.INSPECTWAIT} node_iter = manager.iter_nodes(filters=filters) for node_uuid, driver in node_iter: @@ -171,7 +171,7 @@ def _start_inspection(node_uuid, context): def _check_status(task): """Check inspection status for node given by a task.""" node = task.node - if node.provision_state != states.INSPECTING: + if node.provision_state != states.INSPECTWAIT: return if not isinstance(task.driver.inspect, Inspector): return diff --git a/ironic/drivers/modules/oneview/inspect.py b/ironic/drivers/modules/oneview/inspect.py index c3d66e0b99..96313e59ce 100644 --- a/ironic/drivers/modules/oneview/inspect.py +++ b/ironic/drivers/modules/oneview/inspect.py @@ -66,7 +66,7 @@ class OneViewInspect(inspector.Inspector): @periodics.periodic(spacing=CONF.inspector.status_check_period, enabled=CONF.inspector.enabled) def _periodic_check_result(self, manager, context): - filters = {'provision_state': states.INSPECTING} + filters = {'provision_state': states.INSPECTWAIT} node_iter = manager.iter_nodes(filters=filters) for node_uuid, driver in node_iter: @@ -87,7 +87,7 @@ class OneViewInspect(inspector.Inspector): state_after = task.node.provision_state # inspection finished - if state_before == states.INSPECTING and state_after in [ + if state_before == states.INSPECTWAIT and state_after in [ states.MANAGEABLE, states.INSPECTFAIL ]: deploy_utils.deallocate_server_hardware_from_ironic(task.node) diff --git a/ironic/tests/unit/api/controllers/v1/test_node.py b/ironic/tests/unit/api/controllers/v1/test_node.py index 5f781bb0f5..14c5a91415 100644 --- a/ironic/tests/unit/api/controllers/v1/test_node.py +++ b/ironic/tests/unit/api/controllers/v1/test_node.py @@ -227,6 +227,20 @@ class TestListNodes(test_api_base.BaseApiTest): headers={api_base.Version.string: '1.36'}) self.assertNotIn('traits', data) + def test_node_inspect_wait_state_between_api_versions(self): + node = obj_utils.create_test_node(self.context, + provision_state='inspect wait') + lower_version_data = self.get_json( + '/nodes/%s' % node.uuid, + headers={api_base.Version.string: '1.38'}) + self.assertEqual('inspecting', lower_version_data['provision_state']) + + higher_version_data = self.get_json( + '/nodes/%s' % node.uuid, + headers={api_base.Version.string: '1.39'}) + self.assertEqual('inspect wait', + higher_version_data['provision_state']) + def test_get_one_custom_fields(self): node = obj_utils.create_test_node(self.context, chassis_id=self.chassis.id) @@ -1641,6 +1655,29 @@ class TestPatch(test_api_base.BaseApiTest): 'op': 'remove'}]) self.assertEqual(http_client.OK, response.status_code) + def test_update_in_inspecting_not_allowed(self): + node = obj_utils.create_test_node(self.context, + uuid=uuidutils.generate_uuid(), + provision_state=states.INSPECTING) + self.mock_update_node.return_value = node + response = self.patch_json('/nodes/%s' % node.uuid, + [{'path': '/instance_uuid', + 'op': 'remove'}], + headers={api_base.Version.string: "1.39"}, + expect_errors=True) + self.assertEqual(http_client.CONFLICT, response.status_code) + + def test_update_in_inspecting_allowed(self): + node = obj_utils.create_test_node(self.context, + uuid=uuidutils.generate_uuid(), + provision_state=states.INSPECTING) + self.mock_update_node.return_value = node + response = self.patch_json('/nodes/%s' % node.uuid, + [{'path': '/instance_uuid', + 'op': 'remove'}], + headers={api_base.Version.string: "1.38"}) + self.assertEqual(http_client.OK, response.status_code) + def test_add_state_in_deployfail(self): node = obj_utils.create_test_node(self.context, uuid=uuidutils.generate_uuid(), diff --git a/ironic/tests/unit/api/controllers/v1/test_port.py b/ironic/tests/unit/api/controllers/v1/test_port.py index c627daea4b..372a30bc77 100644 --- a/ironic/tests/unit/api/controllers/v1/test_port.py +++ b/ironic/tests/unit/api/controllers/v1/test_port.py @@ -33,6 +33,7 @@ from ironic.api.controllers.v1 import port as api_port from ironic.api.controllers.v1 import utils as api_utils from ironic.api.controllers.v1 import versions from ironic.common import exception +from ironic.common import states from ironic.common import utils as common_utils from ironic.conductor import rpcapi from ironic import objects @@ -1504,6 +1505,31 @@ class TestPatch(test_api_base.BaseApiTest): self.assertEqual(internal_info, response.json['internal_info']) self.assertTrue(mock_warn.called) + def test_update_in_inspecting_not_allowed(self, mock_upd): + self.node.provision_state = states.INSPECTING + self.node.save() + response = self.patch_json('/ports/%s' % self.port.uuid, + [{'path': '/extra/foo', + 'value': 'bar', + 'op': 'add'}], + headers={api_base.Version.string: "1.39"}, + expect_errors=True) + self.assertEqual(http_client.CONFLICT, response.status_code) + self.assertFalse(mock_upd.called) + + def test_update_in_inspecting_allowed(self, mock_upd): + self.node.provision_state = states.INSPECTING + self.node.save() + extra = {'foo': 'bar'} + response = self.patch_json('/ports/%s' % self.port.uuid, + [{'path': '/extra/foo', + 'value': 'bar', + 'op': 'add'}], + headers={api_base.Version.string: "1.38"}) + self.assertEqual(http_client.OK, response.status_code) + self.assertEqual(extra, response.json['extra']) + self.assertTrue(mock_upd.called) + @mock.patch.object(rpcapi.ConductorAPI, 'create_port', autospec=True, side_effect=_rpcapi_create_port) diff --git a/ironic/tests/unit/api/controllers/v1/test_portgroup.py b/ironic/tests/unit/api/controllers/v1/test_portgroup.py index 431393810a..006d433d73 100644 --- a/ironic/tests/unit/api/controllers/v1/test_portgroup.py +++ b/ironic/tests/unit/api/controllers/v1/test_portgroup.py @@ -31,6 +31,7 @@ from ironic.api.controllers.v1 import notification_utils from ironic.api.controllers.v1 import portgroup as api_portgroup from ironic.api.controllers.v1 import utils as api_utils from ironic.common import exception +from ironic.common import states from ironic.common import utils as common_utils from ironic.conductor import rpcapi from ironic import objects @@ -974,6 +975,37 @@ class TestPatch(test_api_base.BaseApiTest): self.assertTrue(response.json['error_message']) self.assertFalse(mock_upd.called) + def test_update_in_inspecting_not_allowed(self, mock_upd): + self.node.provision_state = states.INSPECTING + self.node.save() + address = 'AA:BB:CC:DD:EE:FF' + response = self.patch_json('/portgroups/%s' % self.portgroup.uuid, + [{'path': '/address', + 'value': address, + 'op': 'replace'}], + expect_errors=True, + headers={api_base.Version.string: "1.39"}) + self.assertEqual(http_client.CONFLICT, response.status_code) + self.assertFalse(mock_upd.called) + + def test_update_in_inspecting_allowed(self, mock_upd): + self.node.provision_state = states.INSPECTING + self.node.save() + address = 'AA:BB:CC:DD:EE:FF' + mock_upd.return_value = self.portgroup + mock_upd.return_value.address = address.lower() + response = self.patch_json('/portgroups/%s' % self.portgroup.uuid, + [{'path': '/address', + 'value': address, + 'op': 'replace'}], + expect_errors=True, + headers={api_base.Version.string: "1.38"}) + self.assertEqual(http_client.OK, response.status_int) + self.assertEqual(address.lower(), response.json['address']) + self.assertTrue(mock_upd.called) + kargs = mock_upd.call_args[0][1] + self.assertEqual(address.lower(), kargs.address) + @mock.patch.object(rpcapi.ConductorAPI, 'update_portgroup', autospec=True, side_effect=_rpcapi_update_portgroup) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 3a66485473..516f86d9ea 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -613,7 +613,7 @@ class UpdateNodeTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): def test_update_node_interface_in_allowed_state(self): for state in [states.ENROLL, states.MANAGEABLE, states.INSPECTING, - states.AVAILABLE]: + states.INSPECTWAIT, states.AVAILABLE]: self._test_update_node_interface_in_allowed_state(state) def test_update_node_interface_in_maintenance(self): @@ -4011,6 +4011,22 @@ class UpdatePortTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_val.assert_called_once_with(mock.ANY, mock.ANY) mock_pc.assert_called_once_with(mock.ANY, mock.ANY, port) + @mock.patch.object(n_flat.FlatNetwork, 'port_changed', autospec=True) + @mock.patch.object(n_flat.FlatNetwork, 'validate', autospec=True) + def test_update_port_to_node_in_inspect_wait_state(self, mock_val, + mock_pc): + node = obj_utils.create_test_node(self.context, driver='fake', + provision_state=states.INSPECTWAIT) + port = obj_utils.create_test_port(self.context, + node_id=node.id, + extra={'foo': 'bar'}) + port.pxe_enabled = True + self.service.update_port(self.context, port) + port.refresh() + self.assertEqual(True, port.pxe_enabled) + mock_val.assert_called_once_with(mock.ANY, mock.ANY) + mock_pc.assert_called_once_with(mock.ANY, mock.ANY, port) + @mock.patch.object(n_flat.FlatNetwork, 'port_changed', autospec=True) @mock.patch.object(n_flat.FlatNetwork, 'validate', autospec=True) def test_update_port_node_active_state_and_maintenance(self, mock_val, @@ -4637,6 +4653,32 @@ class UpdatePortgroupTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_val.assert_called_once_with(mock.ANY, mock.ANY) mock_pgc.assert_called_once_with(mock.ANY, mock.ANY, portgroup) + @mock.patch.object(dbapi.IMPL, 'get_ports_by_portgroup_id') + @mock.patch.object(n_flat.FlatNetwork, 'portgroup_changed', autospec=True) + @mock.patch.object(n_flat.FlatNetwork, 'validate', autospec=True) + def test_update_portgroup_to_node_in_inspect_wait_state(self, mock_val, + mock_pgc, + mock_get_ports): + node = obj_utils.create_test_node(self.context, driver='fake') + portgroup = obj_utils.create_test_portgroup(self.context, + node_id=node.id, + extra={'foo': 'bar'}) + update_node = obj_utils.create_test_node( + self.context, driver='fake', + provision_state=states.INSPECTWAIT, + uuid=uuidutils.generate_uuid()) + mock_get_ports.return_value = [] + + self._start_service() + + portgroup.node_id = update_node.id + self.service.update_portgroup(self.context, portgroup) + portgroup.refresh() + self.assertEqual(update_node.id, portgroup.node_id) + mock_get_ports.assert_called_once_with(portgroup.uuid) + mock_val.assert_called_once_with(mock.ANY, mock.ANY) + mock_pgc.assert_called_once_with(mock.ANY, mock.ANY, portgroup) + @mock.patch.object(dbapi.IMPL, 'get_ports_by_portgroup_id') @mock.patch.object(n_flat.FlatNetwork, 'portgroup_changed', autospec=True) @mock.patch.object(n_flat.FlatNetwork, 'validate', autospec=True) @@ -5893,8 +5935,22 @@ class NodeInspectHardware(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_inspect.return_value = states.INSPECTING manager._do_inspect_hardware(task) node.refresh() - self.assertEqual(states.INSPECTING, node.provision_state) - self.assertEqual(states.NOSTATE, node.target_provision_state) + self.assertEqual(states.INSPECTWAIT, node.provision_state) + self.assertEqual(states.MANAGEABLE, node.target_provision_state) + self.assertIsNone(node.last_error) + mock_inspect.assert_called_once_with(mock.ANY) + + @mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware') + def test_inspect_hardware_return_inspect_wait(self, mock_inspect): + self._start_service() + node = obj_utils.create_test_node(self.context, driver='fake', + provision_state=states.INSPECTING) + task = task_manager.TaskManager(self.context, node.uuid) + mock_inspect.return_value = states.INSPECTWAIT + manager._do_inspect_hardware(task) + node.refresh() + self.assertEqual(states.INSPECTWAIT, node.provision_state) + self.assertEqual(states.MANAGEABLE, node.target_provision_state) self.assertIsNone(node.last_error) mock_inspect.assert_called_once_with(mock.ANY) @@ -5915,17 +5971,17 @@ class NodeInspectHardware(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_inspect.assert_called_once_with(mock.ANY) self.assertTrue(log_mock.error.called) - def test__check_inspect_timeouts(self): + def test__check_inspect_wait_timeouts(self): self._start_service() - CONF.set_override('inspect_timeout', 1, group='conductor') + CONF.set_override('inspect_wait_timeout', 1, group='conductor') node = obj_utils.create_test_node( self.context, driver='fake', - provision_state=states.INSPECTING, + provision_state=states.INSPECTWAIT, target_provision_state=states.MANAGEABLE, provision_updated_at=datetime.datetime(2000, 1, 1, 0, 0), inspection_started_at=datetime.datetime(2000, 1, 1, 0, 0)) - self.service._check_inspect_timeouts(self.context) + self.service._check_inspect_wait_timeouts(self.context) self._stop_service() node.refresh() self.assertEqual(states.INSPECTFAIL, node.provision_state) @@ -6030,26 +6086,26 @@ class NodeInspectHardware(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): @mock.patch.object(task_manager, 'acquire') @mock.patch.object(manager.ConductorManager, '_mapped_to_this_conductor') @mock.patch.object(dbapi.IMPL, 'get_nodeinfo_list') -class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, - db_base.DbTestCase): +class ManagerCheckInspectWaitTimeoutsTestCase(mgr_utils.CommonMixIn, + db_base.DbTestCase): def setUp(self): - super(ManagerCheckInspectTimeoutsTestCase, self).setUp() - self.config(inspect_timeout=300, group='conductor') + super(ManagerCheckInspectWaitTimeoutsTestCase, self).setUp() + self.config(inspect_wait_timeout=300, group='conductor') self.service = manager.ConductorManager('hostname', 'test-topic') self.service.dbapi = self.dbapi - self.node = self._create_node(provision_state=states.INSPECTING, + self.node = self._create_node(provision_state=states.INSPECTWAIT, target_provision_state=states.MANAGEABLE) self.task = self._create_task(node=self.node) self.node2 = self._create_node( - provision_state=states.INSPECTING, + provision_state=states.INSPECTWAIT, target_provision_state=states.MANAGEABLE) self.task2 = self._create_task(node=self.node2) self.filters = {'reserved': False, 'inspection_started_before': 300, - 'provision_state': states.INSPECTING} + 'provision_state': states.INSPECTWAIT} self.columns = ['uuid', 'driver'] def _assert_get_nodeinfo_args(self, get_nodeinfo_mock): @@ -6059,9 +6115,9 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, def test__check_inspect_timeouts_disabled(self, get_nodeinfo_mock, mapped_mock, acquire_mock): - self.config(inspect_timeout=0, group='conductor') + self.config(inspect_wait_timeout=0, group='conductor') - self.service._check_inspect_timeouts(self.context) + self.service._check_inspect_wait_timeouts(self.context) self.assertFalse(get_nodeinfo_mock.called) self.assertFalse(mapped_mock.called) @@ -6072,7 +6128,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response() mapped_mock.return_value = False - self.service._check_inspect_timeouts(self.context) + self.service._check_inspect_wait_timeouts(self.context) self._assert_get_nodeinfo_args(get_nodeinfo_mock) mapped_mock.assert_called_once_with(self.node.uuid, self.node.driver) @@ -6084,7 +6140,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, mapped_mock.return_value = True acquire_mock.side_effect = self._get_acquire_side_effect(self.task) - self.service._check_inspect_timeouts(self.context) + self.service._check_inspect_wait_timeouts(self.context) self._assert_get_nodeinfo_args(get_nodeinfo_mock) mapped_mock.assert_called_once_with(self.node.uuid, self.node.driver) @@ -6101,7 +6157,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, acquire_mock.side_effect = exception.NodeNotFound(node='fake') # Exception eaten - self.service._check_inspect_timeouts(self.context) + self.service._check_inspect_wait_timeouts(self.context) self._assert_get_nodeinfo_args(get_nodeinfo_mock) mapped_mock.assert_called_once_with(self.node.uuid, @@ -6121,7 +6177,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, host='fake') # Exception eaten - self.service._check_inspect_timeouts(self.context) + self.service._check_inspect_wait_timeouts(self.context) self._assert_get_nodeinfo_args(get_nodeinfo_mock) mapped_mock.assert_called_once_with(self.node.uuid, @@ -6142,7 +6198,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, mapped_mock.return_value = True acquire_mock.side_effect = self._get_acquire_side_effect(task) - self.service._check_inspect_timeouts(self.context) + self.service._check_inspect_wait_timeouts(self.context) self._assert_get_nodeinfo_args(get_nodeinfo_mock) mapped_mock.assert_called_once_with( @@ -6155,7 +6211,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, def test__check_inspect_timeouts_to_maintenance_after_lock( self, get_nodeinfo_mock, mapped_mock, acquire_mock): task = self._create_task( - node_attrs=dict(provision_state=states.INSPECTING, + node_attrs=dict(provision_state=states.INSPECTWAIT, target_provision_state=states.MANAGEABLE, maintenance=True, uuid=self.node.uuid)) @@ -6165,7 +6221,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, acquire_mock.side_effect = ( self._get_acquire_side_effect([task, self.task2])) - self.service._check_inspect_timeouts(self.context) + self.service._check_inspect_wait_timeouts(self.context) self._assert_get_nodeinfo_args(get_nodeinfo_mock) self.assertEqual([mock.call(self.node.uuid, task.node.driver), @@ -6190,7 +6246,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, [(self.task, exception.NoFreeConductorWorker()), self.task2]) # Exception should be nuked - self.service._check_inspect_timeouts(self.context) + self.service._check_inspect_wait_timeouts(self.context) self._assert_get_nodeinfo_args(get_nodeinfo_mock) # mapped should be only called for the first node as we should @@ -6212,7 +6268,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, # Should re-raise self.assertRaises(exception.IronicException, - self.service._check_inspect_timeouts, + self.service._check_inspect_wait_timeouts, self.context) self._assert_get_nodeinfo_args(get_nodeinfo_mock) @@ -6238,7 +6294,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, acquire_mock.side_effect = ( self._get_acquire_side_effect([self.task] * 3)) - self.service._check_inspect_timeouts(self.context) + self.service._check_inspect_wait_timeouts(self.context) # Should only have ran 2. self.assertEqual([mock.call(self.node.uuid, self.node.driver)] * 2, diff --git a/ironic/tests/unit/drivers/modules/test_inspector.py b/ironic/tests/unit/drivers/modules/test_inspector.py index 010d04b95d..9cfe1f6447 100644 --- a/ironic/tests/unit/drivers/modules/test_inspector.py +++ b/ironic/tests/unit/drivers/modules/test_inspector.py @@ -150,7 +150,7 @@ class CommonFunctionsTestCase(BaseTestCase): class InspectHardwareTestCase(BaseTestCase): def test_ok(self, mock_client): mock_introspect = mock_client.return_value.introspect - self.assertEqual(states.INSPECTING, + self.assertEqual(states.INSPECTWAIT, self.driver.inspect.inspect_hardware(self.task)) mock_introspect.assert_called_once_with(self.node.uuid) @@ -169,7 +169,7 @@ class InspectHardwareTestCase(BaseTestCase): class CheckStatusTestCase(BaseTestCase): def setUp(self): super(CheckStatusTestCase, self).setUp() - self.node.provision_state = states.INSPECTING + self.node.provision_state = states.INSPECTWAIT def test_not_inspecting(self, mock_client): mock_get = mock_client.return_value.get_status @@ -177,6 +177,12 @@ class CheckStatusTestCase(BaseTestCase): inspector._check_status(self.task) self.assertFalse(mock_get.called) + def test_not_check_inspecting(self, mock_client): + mock_get = mock_client.return_value.get_status + self.node.provision_state = states.INSPECTING + inspector._check_status(self.task) + self.assertFalse(mock_get.called) + def test_not_inspector(self, mock_client): mock_get = mock_client.return_value.get_status self.task.driver.inspect = object() diff --git a/releasenotes/notes/add-inspect-wait-state-948f83dfe342897b.yaml b/releasenotes/notes/add-inspect-wait-state-948f83dfe342897b.yaml new file mode 100644 index 0000000000..94f6cfdb96 --- /dev/null +++ b/releasenotes/notes/add-inspect-wait-state-948f83dfe342897b.yaml @@ -0,0 +1,22 @@ +--- +upgrade: + - | + Adds an ``inspect wait`` state to handle asynchronous hardware + introspection. Caution should be taken due to the timeout monitoring + is shifted from ``inspecting`` to ``inspect wait``, please stop all + running asynchronous hardware inspection or wait until it is finished + before upgrading to the Rocky release. Otherwise nodes in asynchronous + inspection will be left at ``inspecting`` state forever unless the + database is manually updated. +deprecations: + - | + Adds an ``inspect wait`` state to handle asynchronous hardware + introspection. The ``[conductor]inspect_timeout`` configuration option + is deprecated for removal, please use ``[conductor]inspect_wait_timeout`` + instead to specify the timeout of inspection process. +other: + - | + Adds an ``inspect wait`` state to handle asynchronous hardware + introspection. Returning ``INSPECTING`` from the ``inspect_hardware`` + method of inspect interface is deprecated, ``INSPECTWAIT`` should be + returned instead. \ No newline at end of file