mistral/mistral/engine/default_engine.py

481 lines
17 KiB
Python

# Copyright 2013 - Mirantis, Inc.
# Copyright 2015 - StackStorm, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import traceback
from oslo_log import log as logging
from mistral import coordination
from mistral.db.v2 import api as db_api
from mistral.db.v2.sqlalchemy import models as db_models
from mistral.engine import action_handler
from mistral.engine import base
from mistral.engine import task_handler
from mistral.engine import workflow_handler as wf_handler
from mistral import exceptions as exc
from mistral.services import action_manager as a_m
from mistral.services import executions as wf_ex_service
from mistral.services import workflows as wf_service
from mistral import utils as u
from mistral.utils import wf_trace
from mistral.workbook import parser as spec_parser
from mistral.workflow import base as wf_base
from mistral.workflow import commands
from mistral.workflow import states
from mistral.workflow import utils as wf_utils
LOG = logging.getLogger(__name__)
# Submodules of mistral.engine will throw NoSuchOptError if configuration
# options required at top level of this __init__.py are not imported before
# the submodules are referenced.
class DefaultEngine(base.Engine, coordination.Service):
def __init__(self, engine_client):
self._engine_client = engine_client
coordination.Service.__init__(self, 'engine_group')
@u.log_exec(LOG)
def start_workflow(self, wf_identifier, wf_input, description='',
**params):
wf_ex_id = None
try:
# Create a persistent workflow execution in a separate transaction
# so that we can return it even in case of unexpected errors that
# lead to transaction rollback.
with db_api.transaction():
# The new workflow execution will be in an IDLE
# state on initial record creation.
wf_ex_id, wf_spec = wf_ex_service.create_workflow_execution(
wf_identifier,
wf_input,
description,
params
)
with db_api.transaction():
wf_ex = db_api.get_workflow_execution(wf_ex_id)
wf_handler.set_execution_state(wf_ex, states.RUNNING)
wf_ctrl = wf_base.get_controller(wf_ex, wf_spec)
self._dispatch_workflow_commands(
wf_ex,
wf_ctrl.continue_workflow(),
wf_spec
)
return wf_ex.get_clone()
except Exception as e:
LOG.error(
"Failed to start workflow '%s' id=%s: %s\n%s",
wf_identifier, wf_ex_id, e, traceback.format_exc()
)
wf_ex = self._fail_workflow(wf_ex_id, e)
if wf_ex:
return wf_ex.get_clone()
raise e
@u.log_exec(LOG)
def start_action(self, action_name, action_input,
description=None, **params):
with db_api.transaction():
action_def = action_handler.resolve_definition(action_name)
resolved_action_input = action_handler.get_action_input(
action_name,
action_input
)
action = a_m.get_action_class(action_def.name)(
**resolved_action_input
)
# If we see action is asynchronous, then we enforce 'save_result'.
if params.get('save_result') or not action.is_sync():
action_ex = action_handler.create_action_execution(
action_def,
resolved_action_input,
description=description
)
action_handler.run_action(
action_def,
resolved_action_input,
action_ex.id,
params.get('target')
)
return action_ex.get_clone()
else:
output = action_handler.run_action(
action_def,
resolved_action_input,
target=params.get('target'),
async=False
)
return db_models.ActionExecution(
name=action_name,
description=description,
input=action_input,
output=output
)
def on_task_state_change(self, task_ex_id, state, state_info=None):
with db_api.transaction():
task_ex = db_api.get_task_execution(task_ex_id)
# TODO(rakhmerov): The method is mostly needed for policy and
# we are supposed to get the same action execution as when the
# policy worked.
wf_ex_id = task_ex.workflow_execution_id
wf_ex = wf_handler.lock_workflow_execution(wf_ex_id)
wf_spec = spec_parser.get_workflow_spec(wf_ex.spec)
wf_trace.info(
task_ex,
"Task '%s' [%s -> %s] state_info : %s"
% (task_ex.name, task_ex.state, state, state_info)
)
task_ex.state = state
task_ex.state_info = state_info
self._on_task_state_change(task_ex, wf_ex, wf_spec)
def _on_task_state_change(self, task_ex, wf_ex, wf_spec):
task_spec = wf_spec.get_tasks()[task_ex.name]
if task_handler.is_task_completed(task_ex, task_spec):
task_handler.after_task_complete(task_ex, task_spec, wf_spec)
# Ignore DELAYED state.
if task_ex.state == states.RUNNING_DELAYED:
return
wf_ctrl = wf_base.get_controller(wf_ex, wf_spec)
# Calculate commands to process next.
try:
cmds = wf_ctrl.continue_workflow()
except exc.YaqlEvaluationException as e:
LOG.error(
'YAQL error occurred while calculating next workflow '
'commands [wf_ex_id=%s, task_ex_id=%s]: %s',
wf_ex.id, task_ex.id, e
)
wf_handler.fail_workflow(wf_ex, str(e))
return
# Mark task as processed after all decisions have been made
# upon its completion.
task_ex.processed = True
self._dispatch_workflow_commands(wf_ex, cmds, wf_spec)
self._check_workflow_completion(wf_ex, wf_ctrl, wf_spec)
elif task_handler.need_to_continue(task_ex, task_spec):
# Re-run existing task.
cmds = [commands.RunExistingTask(task_ex, reset=False)]
self._dispatch_workflow_commands(wf_ex, cmds, wf_spec)
@staticmethod
def _check_workflow_completion(wf_ex, wf_ctrl, wf_spec):
if states.is_paused_or_completed(wf_ex.state):
return
# Workflow is not completed if there are any incomplete task
# executions that are not in WAITING state. If all incomplete
# tasks are waiting and there are unhandled errors, then these
# tasks will not reach completion. In this case, mark the
# workflow complete.
incomplete_tasks = wf_utils.find_incomplete_task_executions(wf_ex)
if any(not states.is_waiting(t.state) for t in incomplete_tasks):
return
if wf_ctrl.all_errors_handled():
wf_handler.succeed_workflow(
wf_ex,
wf_ctrl.evaluate_workflow_final_context(),
wf_spec
)
else:
state_info = wf_utils.construct_fail_info_message(wf_ctrl, wf_ex)
wf_handler.fail_workflow(wf_ex, state_info)
@u.log_exec(LOG)
def on_action_complete(self, action_ex_id, result):
wf_ex_id = None
try:
with db_api.transaction():
action_ex = db_api.get_action_execution(action_ex_id)
# In case of single action execution there is no
# assigned task execution.
if not action_ex.task_execution:
return action_handler.store_action_result(
action_ex,
result
).get_clone()
wf_ex_id = action_ex.task_execution.workflow_execution_id
wf_ex = wf_handler.lock_workflow_execution(wf_ex_id)
wf_spec = spec_parser.get_workflow_spec(wf_ex.spec)
task_ex = task_handler.on_action_complete(
action_ex,
wf_spec,
result
)
# If workflow is on pause or completed then there's no
# need to continue workflow.
if states.is_paused_or_completed(wf_ex.state):
return action_ex.get_clone()
self._on_task_state_change(task_ex, wf_ex, wf_spec)
return action_ex.get_clone()
except Exception as e:
# TODO(rakhmerov): Need to refactor logging in a more elegant way.
LOG.error(
'Failed to handle action execution result [id=%s]: %s\n%s',
action_ex_id, e, traceback.format_exc()
)
# If an exception was thrown after we got the wf_ex_id
if wf_ex_id:
self._fail_workflow(wf_ex_id, e)
raise e
@u.log_exec(LOG)
def pause_workflow(self, execution_id):
with db_api.transaction():
wf_ex = wf_handler.lock_workflow_execution(execution_id)
wf_handler.set_execution_state(wf_ex, states.PAUSED)
return wf_ex
def _continue_workflow(self, wf_ex, task_ex=None, reset=True, env=None):
wf_ex = wf_service.update_workflow_execution_env(wf_ex, env)
wf_handler.set_execution_state(
wf_ex,
states.RUNNING,
set_upstream=True
)
wf_ctrl = wf_base.get_controller(wf_ex)
# TODO(rakhmerov): Add YAQL error handling.
# Calculate commands to process next.
cmds = wf_ctrl.continue_workflow(task_ex=task_ex, reset=reset, env=env)
# When resuming a workflow we need to ignore all 'pause'
# commands because workflow controller takes tasks that
# completed within the period when the workflow was paused.
cmds = list(
filter(
lambda c: not isinstance(c, commands.PauseWorkflow),
cmds
)
)
# Since there's no explicit task causing the operation
# we need to mark all not processed tasks as processed
# because workflow controller takes only completed tasks
# with flag 'processed' equal to False.
for t_ex in wf_ex.task_executions:
if states.is_completed(t_ex.state) and not t_ex.processed:
t_ex.processed = True
wf_spec = spec_parser.get_workflow_spec(wf_ex.spec)
self._dispatch_workflow_commands(wf_ex, cmds, wf_spec)
if not cmds:
if not wf_utils.find_incomplete_task_executions(wf_ex):
wf_handler.succeed_workflow(
wf_ex,
wf_ctrl.evaluate_workflow_final_context(),
wf_spec
)
return wf_ex.get_clone()
@u.log_exec(LOG)
def rerun_workflow(self, wf_ex_id, task_ex_id, reset=True, env=None):
try:
with db_api.transaction():
wf_ex = wf_handler.lock_workflow_execution(wf_ex_id)
task_ex = db_api.get_task_execution(task_ex_id)
if task_ex.workflow_execution.id != wf_ex_id:
raise ValueError('Workflow execution ID does not match.')
if wf_ex.state == states.PAUSED:
return wf_ex.get_clone()
return self._continue_workflow(wf_ex, task_ex, reset, env=env)
except Exception as e:
LOG.error(
"Failed to rerun execution id=%s at task=%s: %s\n%s",
wf_ex_id, task_ex_id, e, traceback.format_exc()
)
self._fail_workflow(wf_ex_id, e)
raise e
@u.log_exec(LOG)
def resume_workflow(self, wf_ex_id, env=None):
try:
with db_api.transaction():
wf_ex = wf_handler.lock_workflow_execution(wf_ex_id)
if (not states.is_paused(wf_ex.state) and
not states.is_idle(wf_ex.state)):
return wf_ex.get_clone()
return self._continue_workflow(wf_ex, env=env)
except Exception as e:
LOG.error(
"Failed to resume execution id=%s: %s\n%s",
wf_ex_id, e, traceback.format_exc()
)
self._fail_workflow(wf_ex_id, e)
raise e
@u.log_exec(LOG)
def stop_workflow(self, wf_ex_id, state, message=None):
with db_api.transaction():
wf_ex = wf_handler.lock_workflow_execution(wf_ex_id)
return self._stop_workflow(wf_ex, state, message)
@staticmethod
def _stop_workflow(wf_ex, state, message=None):
if state == states.SUCCESS:
wf_ctrl = wf_base.get_controller(wf_ex)
final_context = {}
try:
final_context = wf_ctrl.evaluate_workflow_final_context()
except Exception as e:
LOG.warning(
'Failed to get final context for %s: %s' % (wf_ex, e)
)
wf_spec = spec_parser.get_workflow_spec(wf_ex.spec)
return wf_handler.succeed_workflow(
wf_ex,
final_context,
wf_spec,
message
)
elif state == states.ERROR:
return wf_handler.fail_workflow(wf_ex, message)
return wf_ex
@u.log_exec(LOG)
def rollback_workflow(self, wf_ex_id):
# TODO(rakhmerov): Implement.
raise NotImplementedError
def _dispatch_workflow_commands(self, wf_ex, wf_cmds, wf_spec):
if not wf_cmds:
return
for cmd in wf_cmds:
if isinstance(cmd, commands.RunTask) and cmd.is_waiting():
task_handler.defer_task(cmd)
elif isinstance(cmd, commands.RunTask):
task_ex = task_handler.run_new_task(cmd, wf_spec)
if task_ex.state == states.ERROR:
wf_handler.fail_workflow(
wf_ex,
'Failed to start task [task_ex=%s]: %s' %
(task_ex, task_ex.state_info)
)
elif isinstance(cmd, commands.RunExistingTask):
task_ex = task_handler.run_existing_task(
cmd.task_ex.id,
reset=cmd.reset
)
if task_ex.state == states.ERROR:
wf_handler.fail_workflow(
wf_ex,
'Failed to start task [task_ex=%s]: %s' %
(task_ex, task_ex.state_info)
)
elif isinstance(cmd, commands.SetWorkflowState):
if states.is_completed(cmd.new_state):
self._stop_workflow(cmd.wf_ex, cmd.new_state, cmd.msg)
else:
wf_handler.set_execution_state(wf_ex, cmd.new_state)
elif isinstance(cmd, commands.Noop):
# Do nothing.
pass
else:
raise RuntimeError('Unsupported workflow command: %s' % cmd)
if wf_ex.state != states.RUNNING:
break
# TODO(rakhmerov): This method may not be needed at all because error
# handling is now implemented too roughly w/o distinguishing different
# errors. On most errors (like YAQLException) we shouldn't rollback
# transactions, we just need to fail corresponding execution objects
# where a problem happened (action, task or workflow).
@staticmethod
def _fail_workflow(wf_ex_id, exc):
"""Private helper to fail workflow on exceptions."""
with db_api.transaction():
wf_ex = db_api.load_workflow_execution(wf_ex_id)
if wf_ex is None:
LOG.error(
"Can't fail workflow execution with id='%s': not found.",
wf_ex_id
)
return None
wf_ex = wf_handler.lock_workflow_execution(wf_ex_id)
if not states.is_paused_or_completed(wf_ex.state):
wf_handler.set_execution_state(wf_ex, states.ERROR, str(exc))
return wf_ex