From 646d452d667e5470d17c93fb3bfa882d03c06a9e Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Wed, 16 Aug 2023 12:45:16 -0700 Subject: [PATCH] Add service steps and initial docs Adds service steps on a variety of internal interfaces, and begins to tie documentation together to provide clarity on the use and purpose of service steps. Change-Id: Ifd7241f06648c8d73c1b97fcf08673496f049f45 --- doc/source/admin/index.rst | 1 + doc/source/admin/servicing.rst | 205 +++++++++++++++++++ ironic/conductor/servicing.py | 2 +- ironic/conductor/steps.py | 4 +- ironic/drivers/modules/agent.py | 2 + ironic/drivers/modules/agent_base.py | 12 +- ironic/drivers/modules/ipmitool.py | 2 + ironic/drivers/modules/redfish/bios.py | 1 + ironic/drivers/modules/redfish/management.py | 19 +- ironic/drivers/modules/redfish/raid.py | 3 + 10 files changed, 235 insertions(+), 16 deletions(-) create mode 100644 doc/source/admin/servicing.rst diff --git a/doc/source/admin/index.rst b/doc/source/admin/index.rst index 204c6205fa..532d3f87e2 100644 --- a/doc/source/admin/index.rst +++ b/doc/source/admin/index.rst @@ -32,6 +32,7 @@ the services. Booting a Ramdisk or an ISO Hardware Burn-in Vendor Passthru + Servicing Drivers, Hardware Types and Hardware Interfaces ----------------------------------------------- diff --git a/doc/source/admin/servicing.rst b/doc/source/admin/servicing.rst new file mode 100644 index 0000000000..18358c4329 --- /dev/null +++ b/doc/source/admin/servicing.rst @@ -0,0 +1,205 @@ +.. _servicing: + +============== +Node servicing +============== + +Overview +======== + +In order to better enable operators to modify existing nodes, Ironic has +introduced the model of Node Servicing, where you can take a node in +``active`` state, modify it using steps similar to Deploy Steps or manual +cleaning through the Cleaning subsystem. + +For more information on cleaning, please see :ref:`cleaning`. + +Major differences +================= + +Service steps do not contain an automatic execution model, which is intrinisc +to the standard deployment and "automated" cleaning workflows. This *may* +change at some point in the future. + +This also means that while a priority value *can* be supplied, it is not +presently utilized. + +Similarities to Cleaning and Deployment +======================================= + +Similar to Clean and Deploy steps, when invoked an operator can validate +the curent running steps by viewing the ``driver_internal_info`` field +looking for a ``service_steps`` field. The *current* step being executed +can be viewed using the baremetal node ``service_step`` field, which is a +top level field. + +Service steps are internally decorated on driver interface methods utilizing +decorator. This means service steps do not *automatically* expose clean and +deploy steps to be executed at any time. The Ironic development team took a +cautious and intentional approach behind methods which are decorated. Besides, +some clean and deployment steps are geared explicitly for operating in +that mode, and would not be suitable to be triggered outside of the +original workflow it was designed for use in. + +Available Steps +=============== + + +Executing Service Steps +======================= + +In order for manual cleaning to work, you may need to configure a +`Servicing Network`_. + +Starting manual cleaning via API +-------------------------------- + +Servicing can only be performed when a node is in the ``active`` +provision state. The REST API request to initiate it is available in +API version 1.87 and higher:: + + PUT /v1/nodes//states/provision + +(Additional information is available `here `_.) + +This API will allow operators to put a node directly into ``servicing`` +provision state from ``active`` provision state via 'target': 'service'. +The PUT will also require the argument 'service_steps' to be specified. This +is an ordered list of steps. A step is represented by a +dictionary (JSON), in the form:: + + { + "interface": "", + "step": "", + "args": {"": "", ..., "": } + } + +The 'interface' and 'step' keys are required for all steps. If a cleaning step +method takes keyword arguments, the 'args' key may be specified. It +is a dictionary of keyword variable arguments, with each keyword-argument entry +being : . + +If any step is missing a required keyword argument, servicing will not be +performed and the node will be put in ``service failed`` provision state +with an appropriate error message. + +If, during the servicing process, a service step determines that it has +incorrect keyword arguments, all earlier steps will be performed and then the +node will be put in ``service failed`` provision state with an appropriate +error message. + +An example of the request body for this API:: + + { + "target":"service", + "sevice_steps": [{ + "interface": "raid", + "step": "apply_configuration", + "args": {"create_nonroot_volumes": True} + }, + { + "interface": "vendor", + "step": "send_raw" + "args": {"raw_bytes": "0x00 0x00 0x00 0x00"} + }] + } + +In the above example, the node's RAID interface would apply the set RAID +configuration, and then the vendor interface's ``send_raw`` step would be +called to send a raw command to the BMC. Please note, ``send_raw`` is only +available for the ``ipmi`` hardware type. + +Starting servicing via "openstack baremetal" CLI +------------------------------------------------ + +Servicing is available via the ``baremetal node service`` command, +starting with Bare Metal API version 1.87. + +The argument ``--service-steps`` must be specified. Its value is one of: + +- a JSON string +- path to a JSON file whose contents are passed to the API +- '-', to read from stdin. This allows piping in the clean steps. + Using '-' to signify stdin is common in Unix utilities. + +Examples of doing this with a JSON string:: + + baremetal node service \ + --clean-steps '[{"interface": "deploy", "step": "example_task"}]' + + baremetal node service \ + --service-steps '[{"interface": "deploy", "step": "example_task"}]' + +Or with a file:: + + baremetal node service \ + --service-steps my-service-steps.txt + +Or with stdin:: + + cat my-clean-steps.txt | baremetal node service \ + --service-steps - + +Available Steps in Ironic +------------------------- + +ipmi hardware type +~~~~~~~~~~~~~~~~~~ + +vendor.send_raw +^^^^^^^^^^^^^^^ + +This step is covered in the :doc:`/admin/drivers/ipmitool` documentation +and is usable as a service step in addition to a deploy step. + +redfish hardware type +~~~~~~~~~~~~~~~~~~~~~ + +bios.apply_configuration +^^^^^^^^^^^^^^^^^^^^^^^^ + +This is covered in the :ref:`bios` configuration documentation as it +started as a cleaning step. It is a standardized cross-interface name. + +management.update_firmware +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This step is covered in the :doc:`/admin/drivers/redfish` and is intended +to facilitate firmware updates via the BMC. + +raid.apply_configuration +^^^^^^^^^^^^^^^^^^^^^^^^ + +This step is covered in the :doc:`/admin/drivers/redfish` and is intended +to facilitate applying raid configuration. + +raid.delete_configuration +^^^^^^^^^^^^^^^^^^^^^^^^^ + +This step is covered in the :doc:`/admin/drivers/redfish` and is intended +to delete configuration. + +Agent +~~~~~ + +raid.apply_configuration +^^^^^^^^^^^^^^^^^^^^^^^^ + +This is the standardized RAID passthrough interface for the agent, and can +be leveraged like other RAID interfaces. + + +Available steps in Ironic-Python-Agent +-------------------------------------- + +.. note:: + Steps available from the agent will be populated once support has + merged in the agent to expose the steps to the ironic deployment. + +Servicing Network +================= + +If you are using the Neutron DHCP provider (the default) you will also need to +ensure you have configured a servicing network. This network will be used to +boot the ramdisk for in-band service operations. This setting is configured +utilizing the ``[neutron]servicing_network`` configuration parameter. diff --git a/ironic/conductor/servicing.py b/ironic/conductor/servicing.py index 4a14de19d8..1b39adf1f6 100644 --- a/ironic/conductor/servicing.py +++ b/ironic/conductor/servicing.py @@ -322,7 +322,7 @@ def do_node_service_abort(task): return last_error = get_last_error(node) - info_message = _('Clean operation aborted for node %s') % node.uuid + info_message = _('Service operation aborted for node %s') % node.uuid if node.service_step: info_message += ( _(' during or after the completion of step "%s"') diff --git a/ironic/conductor/steps.py b/ironic/conductor/steps.py index 1c2895d813..3dfbb2e854 100644 --- a/ironic/conductor/steps.py +++ b/ironic/conductor/steps.py @@ -498,7 +498,7 @@ def set_node_service_steps(task, disable_ramdisk=False): :raises: InvalidParameterValue if there is a problem with the user's clean steps. :raises: NodeCleaningFailure if there was a problem getting the - clean steps. + service steps. """ node = task.node steps = _validate_user_service_steps( @@ -848,7 +848,7 @@ def _validate_user_service_steps(task, user_steps, disable_ramdisk=False): :raises: InvalidParameterValue if validation of clean steps fails. :raises: NodeCleaningFailure if there was a problem getting the clean steps from the driver. - :return: validated clean steps update with information from the driver + :return: validated service steps update with information from the driver """ # We call with enabled = False below so we pickup auto-disabled # steps, since service steps are not automagic like cleaning can be. diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index 08e5555403..a9b3544b1a 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -657,6 +657,8 @@ class AgentRAID(base.RAIDInterface): return agent_base.get_steps(task, 'deploy', interface='raid') @METRICS.timer('AgentRAID.apply_configuration') + @base.service_step(priority=0, + argsinfo=_RAID_APPLY_CONFIGURATION_ARGSINFO) @base.deploy_step(priority=0, argsinfo=_RAID_APPLY_CONFIGURATION_ARGSINFO) def apply_configuration(self, task, raid_config, diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index 03bc57cf65..2cab035044 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -812,11 +812,11 @@ class AgentBaseMixin(object): """Boot into the agent to prepare for service. :param task: a TaskManager object containing the node - :raises: NodeCleaningFailure, NetworkError if: the previous cleaning - ports cannot be removed or if new cleaning ports cannot be created. + :raises: NodeServiceFailure, NetworkError if: the previous service + ports cannot be removed or if new service ports cannot be created. :raises: InvalidParameterValue if cleaning network UUID config option has an invalid value. - :returns: states.CLEANWAIT to signify an asynchronous prepare + :returns: states.SERVICEWAIT to signify an asynchronous prepare """ result = deploy_utils.prepare_inband_service(task) if result is None: @@ -826,11 +826,11 @@ class AgentBaseMixin(object): @METRICS.timer('AgentBaseMixin.tear_down_service') def tear_down_service(self, task): - """Clean up the PXE and DHCP files after cleaning. + """Clean up the PXE and DHCP files after service. :param task: a TaskManager object containing the node - :raises: NodeServiceFailure, NetworkError if the cleaning ports cannot - be removed + :raises: NodeServiceFailure, NetworkError if the servicing ports + cannot be removed """ deploy_utils.tear_down_inband_service( task) diff --git a/ironic/drivers/modules/ipmitool.py b/ironic/drivers/modules/ipmitool.py index faaa38c24d..7611403e60 100644 --- a/ironic/drivers/modules/ipmitool.py +++ b/ironic/drivers/modules/ipmitool.py @@ -1390,6 +1390,8 @@ class VendorPassthru(base.VendorInterface): } @METRICS.timer('VendorPassthru.send_raw') + @base.service_step(priority=0, + argsinfo=_send_raw_step_args) @base.deploy_step(priority=0, argsinfo=_send_raw_step_args) @base.clean_step(priority=0, abortable=False, diff --git a/ironic/drivers/modules/redfish/bios.py b/ironic/drivers/modules/redfish/bios.py index b0cc76b57b..5af1bddc27 100644 --- a/ironic/drivers/modules/redfish/bios.py +++ b/ironic/drivers/modules/redfish/bios.py @@ -197,6 +197,7 @@ class RedfishBIOS(base.BIOSInterface): {'node_uuid': node.uuid, 'attrs': current_attrs}) self._clear_reboot_requested(task) + @base.service_step(priority=0, argsinfo=_APPLY_CONFIGURATION_ARGSINFO) @base.clean_step(priority=0, argsinfo=_APPLY_CONFIGURATION_ARGSINFO) @base.deploy_step(priority=0, argsinfo=_APPLY_CONFIGURATION_ARGSINFO) @base.cache_bios_settings diff --git a/ironic/drivers/modules/redfish/management.py b/ironic/drivers/modules/redfish/management.py index 8bd7058a55..7f7dbe72a5 100644 --- a/ironic/drivers/modules/redfish/management.py +++ b/ironic/drivers/modules/redfish/management.py @@ -90,6 +90,14 @@ if sushy: v: k for k, v in INDICATOR_MAP.items()} +_FIRMWARE_UPDATE_ARGS = { + 'firmware_images': { + 'description': ( + 'A list of firmware images to apply.'), + 'required': True + }} + + def _set_boot_device(task, system, device, persistent=False): """An internal routine to set the boot device. @@ -746,13 +754,10 @@ class RedfishManagement(base.ManagementInterface): return redfish_utils.get_system(task.node).manufacturer @METRICS.timer('RedfishManagement.update_firmware') - @base.clean_step(priority=0, abortable=False, argsinfo={ - 'firmware_images': { - 'description': ( - 'A list of firmware images to apply.' - ), - 'required': True - }}) + @base.clean_step(priority=0, abortable=False, + argsinfo=_FIRMWARE_UPDATE_ARGS) + @base.service_step(priority=0, abortable=False, + argsinfo=_FIRMWARE_UPDATE_ARGS) def update_firmware(self, task, firmware_images): """Updates the firmware on the node. diff --git a/ironic/drivers/modules/redfish/raid.py b/ironic/drivers/modules/redfish/raid.py index 154cd53d38..eae68eea74 100644 --- a/ironic/drivers/modules/redfish/raid.py +++ b/ironic/drivers/modules/redfish/raid.py @@ -752,6 +752,8 @@ class RedfishRAID(base.RAIDInterface): raise exception.InvalidParameterValue( _('interface type `scsi` not supported by Redfish RAID')) + @base.service_step(priority=0, + argsinfo=base.RAID_APPLY_CONFIGURATION_ARGSINFO) @base.deploy_step(priority=0, argsinfo=base.RAID_APPLY_CONFIGURATION_ARGSINFO) def apply_configuration(self, task, raid_config, create_root_volume=True, @@ -850,6 +852,7 @@ class RedfishRAID(base.RAIDInterface): @base.clean_step(priority=0) @base.deploy_step(priority=0) + @base.service_step(priority=0) def delete_configuration(self, task): """Delete RAID configuration on the node.