Merge "Add support for verify steps"

This commit is contained in:
Zuul 2021-10-06 11:42:53 +00:00 committed by Gerrit Code Review
commit 63e4174146
8 changed files with 389 additions and 2 deletions

View File

@ -830,3 +830,7 @@ class NodeHistoryNotFound(NotFound):
class IncorrectConfiguration(IronicException):
_msg_fmt = _("Supplied configuration is incorrect and must be fixed. "
"Error: %(error)s")
class NodeVerifyFailure(IronicException):
_msg_fmt = _("Failed to verify node %(node)s: %(reason)s")

View File

@ -50,6 +50,25 @@ DEPLOYING_INTERFACE_PRIORITY = {
'raid': 1,
}
VERIFYING_INTERFACE_PRIORITY = {
# When two verify steps have the same priority, their order is determined
# by which interface is implementing the verify step. The verifying step of
# the interface with the highest value here, will be executed first in
# that case.
'power': 12,
'management': 11,
'boot': 8,
'inspect': 10,
'deploy': 9,
'bios': 7,
'raid': 6,
'vendor': 5,
'network': 4,
'storage': 3,
'console': 2,
'rescue': 1,
}
def _clean_step_key(step):
"""Sort by priority, then interface priority in event of tie.
@ -69,6 +88,15 @@ def _deploy_step_key(step):
DEPLOYING_INTERFACE_PRIORITY[step.get('interface')])
def _verify_step_key(step):
"""Sort by priority, then interface priority in event of tie.
:param step: verify step dict to get priority for.
"""
return (step.get('priority'),
VERIFYING_INTERFACE_PRIORITY[step.get('interface')])
def _sorted_steps(steps, sort_step_key):
"""Return a sorted list of steps.
@ -194,6 +222,37 @@ def _get_deployment_steps(task, enabled=False, sort=True):
enabled=enabled, sort_step_key=sort_key)
def _get_verify_steps(task, enabled=False, sort=True):
"""Get verify steps for task.node.
:param task: A TaskManager object
:param enabled: If True, returns only enabled (priority > 0) steps. If
False, returns all verify steps.
:param sort: If True, the steps are sorted from highest priority to lowest
priority. For steps having the same priority, they are sorted from
highest interface priority to lowest.
:raises: NodeVerifyFailure if there was a problem getting the
verify steps.
:returns: A list of verify step dictionaries
"""
sort_key = _verify_step_key if sort else None
if CONF.conductor.verify_step_priority_override:
vsp_override = {}
for element in CONF.conductor.verify_step_priority_override:
vsp_override.update(element)
verify_steps = _get_steps(task, VERIFYING_INTERFACE_PRIORITY,
'get_verify_steps', enabled=enabled,
sort_step_key=sort_key,
prio_overrides=vsp_override)
else:
verify_steps = _get_steps(task, VERIFYING_INTERFACE_PRIORITY,
'get_verify_steps', enabled=enabled,
sort_step_key=sort_key)
return verify_steps
def set_node_cleaning_steps(task, disable_ramdisk=False):
"""Set up the node with clean step information for cleaning.

View File

@ -631,6 +631,32 @@ def fail_on_error(error_callback, msg, *error_args, **error_kwargs):
return wrapper
def verifying_error_handler(task, logmsg, errmsg=None, traceback=False):
"""Handle errors during verification steps
:param task: the task
:param logmsg: message to be logged
:param errmsg: message for the user
:param traceback: Boolean; True to log a traceback
"""
errmsg = errmsg or logmsg
node = task.node
LOG.error(logmsg, exc_info=traceback)
node_history_record(node, event=errmsg, event_type=states.VERIFYING,
error=True)
node.save()
node.refresh()
if node.provision_state in (
states.VERIFYING):
# Clear verifying step; we leave the list of verify steps
# in node.driver_internal_info for debugging purposes.
node.verify_step = {}
node.save()
@task_manager.require_exclusive_lock
def abort_on_conductor_take_over(task):
"""Set node's state when a task was aborted due to conductor take over.

View File

@ -18,6 +18,7 @@ from ironic.common import exception
from ironic.common.i18n import _
from ironic.common import states
from ironic.conductor import notification_utils as notify_utils
from ironic.conductor import steps as conductor_steps
from ironic.conductor import task_manager
from ironic.conductor import utils
@ -49,6 +50,37 @@ def do_node_verify(task):
{'node': node.uuid, 'msg': e})
log_traceback = not isinstance(e, exception.IronicException)
LOG.error(error, exc_info=log_traceback)
verify_steps = conductor_steps._get_verify_steps(task,
enabled=True,
sort=True)
for step in verify_steps:
interface = getattr(task.driver, step.get('interface'))
LOG.info('Executing %(step)s on node %(node)s',
{'step': step, 'node': node.uuid})
try:
result = interface.execute_verify_step(task, step)
except Exception as e:
error = ('Node %(node)s failed verify step %(step)s '
'with unexpected error: %(err)s' %
{'node': node.uuid, 'step': node.verify_step,
'err': e})
utils.verifying_error_handler(
task, error,
_("Failed to verify. Exception: %s") % e,
traceback=True)
if result is not None:
# NOTE(rloo): This is an internal/dev error; shouldn't
# happen.
error = (_('While executing verify step %(step)s on '
'node %(node)s, step returned unexpected '
'state: %(val)s')
% {'step': step, 'node': node.uuid,
'val': result})
utils.verifying_error_handler(
task, error,
_("Failed to verify: %s") % node.deploy_step)
if error is None:
# NOTE(janders) this can eventually move to driver-specific
# verify steps, will leave this for a follow-up change.
@ -67,6 +99,10 @@ def do_node_verify(task):
else:
task.process_event('done')
LOG.info('Successfully verified node %(node)s',
{'node': node.uuid})
else:
utils.node_history_record(task.node, event=error,
event_type=states.VERIFY,

View File

@ -338,6 +338,16 @@ opts = [
'node_history_max_entries setting as users of '
'this setting are anticipated to need to retain '
'history by policy.')),
cfg.MultiOpt('verify_step_priority_override',
item_type=types.Dict(),
default={},
mutable=True,
help=_('Priority to run automated verify steps '
'provided in interface.step_name:priority format,'
'e.g. management.clear_job_queue:123. The option can '
'be specified multiple times to define priorities '
'for multiple steps. If set to 0, this specific step '
'will not run during cleaning. ')),
]

View File

@ -226,8 +226,9 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
"""
def __new__(cls, *args, **kwargs):
# Get the list of clean steps and deploy steps, when the interface is
# initialized by the conductor. We use __new__ instead of __init___
# Get the list of clean steps, deploy steps and verify steps when the
# interface is initialized by the conductor.
# We use __new__ instead of __init___
# to avoid breaking backwards compatibility with all the drivers.
# We want to return all steps, regardless of priority.
@ -238,6 +239,7 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
instance = super_new(cls, *args, **kwargs)
instance.clean_steps = []
instance.deploy_steps = []
instance.verify_steps = []
for n, method in inspect.getmembers(instance, inspect.ismethod):
if getattr(method, '_is_clean_step', False):
# Create a CleanStep to represent this method
@ -256,6 +258,13 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
'argsinfo': method._deploy_step_argsinfo,
'interface': instance.interface_type}
instance.deploy_steps.append(step)
if getattr(method, '_is_verify_step', False):
# Create a VerifyStep to represent this method
step = {'step': method.__name__,
'priority': method._verify_step_priority,
'interface': instance.interface_type}
instance.verify_steps.append(step)
if instance.clean_steps:
LOG.debug('Found clean steps %(steps)s for interface '
'%(interface)s',
@ -266,6 +275,12 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
'%(interface)s',
{'steps': instance.deploy_steps,
'interface': instance.interface_type})
if instance.verify_steps:
LOG.debug('Found verify steps %(steps)s for interface '
'%(interface)s',
{'steps': instance.deploy_steps,
'interface': instance.interface_type})
return instance
def _execute_step(self, task, step):
@ -360,6 +375,35 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
"""
return self._execute_step(task, step)
def get_verify_steps(self, task):
"""Get a list of (enabled and disabled) verify steps for the interface.
This function will return all verify steps (both enabled and disabled)
for the interface, in an unordered list.
:param task: A TaskManager object, useful for interfaces overriding
this function
:raises NodeVerifyFailure: if there is a problem getting the steps
from the driver. For example, when a node (using an agent driver)
has just been enrolled and the agent isn't alive yet to be queried
for the available verify steps.
:returns: A list of deploy step dictionaries
"""
return self.verify_steps
def execute_verify_step(self, task, step):
"""Execute the verify step on task.node.
A verify step must take a single positional argument: a TaskManager
object. It does not take keyword variable arguments.
:param task: A TaskManager object
:param step: The deploy step dictionary representing the step to
execute
:returns: None if this method has completed synchronously
"""
return self._execute_step(task, step)
class DeployInterface(BaseInterface):
"""Interface for deploy-related actions."""
@ -1868,3 +1912,44 @@ def deploy_step(priority, argsinfo=None):
func._deploy_step_argsinfo = argsinfo
return func
return decorator
def verify_step(priority):
"""Decorator for verify steps.
Only steps with priorities greater than 0 are used. These steps are
ordered by priority from highest value to lowest value.
For steps with the same priority, they are ordered by driver
interface priority (see conductor.steps.VERIFY_INTERFACE_PRIORITY).
execute_verify_step() will be called on each step.
Decorated verify steps must take as the only positional argument, a
TaskManager object.
Verify steps are synchronous and should return `None` when finished,
and the conductor will continue on to the next step. While the verify step
is executing, the node will be in `states.VERIFYING` provision state.
Examples::
class MyInterface(base.BaseInterface):
@base.verify_step(priority=100)
def example_verifying(self, task):
# do some verifying
:param priority: an integer (>=0) priority; used for determining the order
in which the step is run in the verification process.
:raises InvalidParameterValue: if any of the arguments are invalid
"""
def decorator(func):
func._is_verify_step = True
if isinstance(priority, int) and priority >= 0:
func._verify_step_priority = priority
else:
raise exception.InvalidParameterValue(
_('"priority" must be an integer value >= 0, instead of "%s"')
% priority)
return func
return decorator

View File

@ -19,6 +19,7 @@ from ironic.common import exception
from ironic.common import states
from ironic.conductor import steps as conductor_steps
from ironic.conductor import task_manager
from ironic.conductor import verify as verify_steps
from ironic import objects
from ironic.tests.unit.db import base as db_base
from ironic.tests.unit.db import utils as db_utils
@ -1138,3 +1139,163 @@ class ValidateUserDeployStepsTestCase(db_base.DbTestCase):
result = conductor_steps._get_validated_user_deploy_steps(task)
self.assertEqual([], result)
mock_validated.assert_not_called()
class NodeVerifyStepsTestCase(db_base.DbTestCase):
def setUp(self):
super(NodeVerifyStepsTestCase, self).setUp()
self.management_reset_idrac = {
'step': 'reset_idrac', 'priority': 0,
'interface': 'management'}
self.management_clear_job_queue = {
'step': 'clear_job_queue', 'priority': 10,
'interface': 'management'}
self.verify_steps = [self.management_clear_job_queue,
self.management_reset_idrac]
self.verify_steps_only_enabled = [self.management_clear_job_queue]
@mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
autospec=True)
def test__get_verify_steps_priority_override_ok(self,
mock_verify_steps):
# Test verify_step_priority_override
cfg.CONF.set_override('verify_step_priority_override',
["management.clear_job_queue:9"],
'conductor')
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.ENROLL,
target_provision_state=states.MANAGEABLE)
mock_verify_steps.return_value = [self.management_clear_job_queue]
with task_manager.acquire(
self.context, node.uuid, shared=True) as task:
steps = conductor_steps._get_verify_steps(task, enabled=True)
expected_step_priorities = [{'interface': 'management', 'priority': 9,
'step': 'clear_job_queue'}]
self.assertEqual(expected_step_priorities, steps)
@mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
autospec=True)
def test__get_verify_steps_priority_override_fail(self,
mock_verify_steps):
# Test verify_step_priority_override for failure
cfg.CONF.set_override('verify_step_priority_override',
["management.clear_job_queue:9"],
'conductor')
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.ENROLL,
target_provision_state=states.MANAGEABLE)
mock_verify_steps.return_value = [self.management_clear_job_queue]
with task_manager.acquire(
self.context, node.uuid, shared=True) as task:
steps = conductor_steps._get_verify_steps(task, enabled=True)
expected_step_priorities = [{'interface': 'management', 'priority': 10,
'step': 'clear_job_queue'}]
self.assertNotEqual(expected_step_priorities, steps)
@mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
autospec=True)
def test__get_verify_steps_priority_override_off(self,
mock_verify_steps):
# Test disabling steps with verify_step_priority_override
cfg.CONF.set_override('verify_step_priority_override',
["management.clear_job_queue:0"],
'conductor')
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.ENROLL,
target_provision_state=states.MANAGEABLE)
mock_verify_steps.return_value = [self.management_clear_job_queue]
with task_manager.acquire(
self.context, node.uuid, shared=True) as task:
steps = conductor_steps._get_verify_steps(task, enabled=True)
expected_step_priorities = []
self.assertEqual(expected_step_priorities, steps)
@mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
autospec=True)
def test__get_verify_steps(self, mock_verify_steps):
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.ENROLL,
target_provision_state=states.MANAGEABLE)
mock_verify_steps.return_value = [self.management_clear_job_queue,
self.management_reset_idrac]
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
steps = conductor_steps._get_verify_steps(task, enabled=False)
self.assertEqual(self.verify_steps, steps)
@mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
autospec=True)
def test__get_verify_steps_unsorted(self, mock_verify_steps):
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.ENROLL,
target_provision_state=states.MANAGEABLE)
mock_verify_steps.return_value = [self.management_clear_job_queue,
self.management_reset_idrac]
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
steps = conductor_steps._get_verify_steps(task, enabled=False,
sort=False)
self.assertEqual(self.verify_steps, steps)
@mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
autospec=True)
def test__get_verify_steps_only_enabled(self, mock_verify_steps):
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.ENROLL,
target_provision_state=states.MANAGEABLE)
mock_verify_steps.return_value = [self.management_clear_job_queue]
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
steps = conductor_steps._get_verify_steps(task, enabled=True)
self.assertEqual(self.verify_steps_only_enabled, steps)
@mock.patch('ironic.drivers.modules.fake.FakeManagement.'
'execute_verify_step',
autospec=True)
@mock.patch.object(conductor_steps, '_get_verify_steps', autospec=True)
def test_execute_verify_step(self, mock_steps, mock_execute):
mock_steps.return_value = self.verify_steps
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.VERIFYING,
target_provision_state=states.MANAGEABLE,
last_error=None,
clean_step=None)
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
verify_steps.do_node_verify(task)
node.refresh()
self.assertTrue(mock_execute.called)

View File

@ -0,0 +1,6 @@
---
features:
- |
Adds support for verify steps - a mechanism for running optional,
actions pre-defined in the driver while the node is in transition from
enroll to managable state, prior to inspection.