mistral/mistral/engine/tasks.py

823 lines
26 KiB
Python

# Copyright 2016 - Nokia Networks.
# Copyright 2016 - Brocade Communications Systems, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import copy
from oslo_log import log as logging
from osprofiler import profiler
import six
from mistral.db.v2 import api as db_api
from mistral.engine import actions
from mistral.engine import dispatcher
from mistral.engine import policies
from mistral import exceptions as exc
from mistral import expressions as expr
from mistral import utils
from mistral.utils import wf_trace
from mistral.workflow import base as wf_base
from mistral.workflow import data_flow
from mistral.workflow import states
LOG = logging.getLogger(__name__)
@six.add_metaclass(abc.ABCMeta)
class Task(object):
"""Task.
Represents a workflow task and defines interface that can be used by
Mistral engine or its components in order to manipulate with tasks.
"""
def __init__(self, wf_ex, wf_spec, task_spec, ctx, task_ex=None,
unique_key=None, waiting=False, triggered_by=None):
self.wf_ex = wf_ex
self.task_spec = task_spec
self.ctx = ctx
self.task_ex = task_ex
self.wf_spec = wf_spec
self.unique_key = unique_key
self.waiting = waiting
self.triggered_by = triggered_by
self.reset_flag = False
self.created = False
self.state_changed = False
def is_completed(self):
return self.task_ex and states.is_completed(self.task_ex.state)
def is_waiting(self):
return self.waiting
def is_created(self):
return self.created
def is_state_changed(self):
return self.state_changed
@abc.abstractmethod
def on_action_complete(self, action_ex):
"""Handle action completion.
:param action_ex: Action execution.
"""
raise NotImplementedError
@abc.abstractmethod
def on_action_update(self, action_ex):
"""Handle action update.
:param action_ex: Action execution.
"""
raise NotImplementedError
@abc.abstractmethod
def run(self):
"""Runs task."""
raise NotImplementedError
@profiler.trace('task-defer')
def defer(self):
"""Defers task.
This method puts task to a waiting state.
"""
with db_api.named_lock(self.unique_key):
if not self.task_ex:
t_execs = db_api.get_task_executions(
workflow_execution_id=self.wf_ex.id,
unique_key=self.unique_key
)
self.task_ex = t_execs[0] if t_execs else None
msg = 'Task is waiting.'
if not self.task_ex:
self._create_task_execution(
state=states.WAITING,
state_info=msg
)
elif self.task_ex.state != states.WAITING:
self.set_state(states.WAITING, msg)
def reset(self):
self.reset_flag = True
@profiler.trace('task-set-state')
def set_state(self, state, state_info, processed=None):
"""Sets task state without executing post completion logic.
:param state: New task state.
:param state_info: New state information (i.e. error message).
:param processed: New "processed" flag value.
:return: True if the state was changed as a result of this call,
False otherwise.
"""
assert self.task_ex
cur_state = self.task_ex.state
if cur_state != state or self.task_ex.state_info != state_info:
task_ex = db_api.update_task_execution_state(
id=self.task_ex.id,
cur_state=cur_state,
state=state
)
if task_ex is None:
# Do nothing because the update query did not change the DB.
return False
self.task_ex = task_ex
self.task_ex.state_info = state_info
self.state_changed = True
if processed is not None:
self.task_ex.processed = processed
wf_trace.info(
self.task_ex.workflow_execution,
"Task '%s' (%s) [%s -> %s, msg=%s]" %
(self.task_ex.name,
self.task_ex.id,
cur_state,
state,
state_info)
)
return True
@profiler.trace('task-complete')
def complete(self, state, state_info=None):
"""Complete task and set specified state.
Method sets specified task state and runs all necessary post
completion logic such as publishing workflow variables and
scheduling new workflow commands.
:param state: New task state.
:param state_info: New state information (i.e. error message).
"""
assert self.task_ex
# Ignore if task already completed.
if self.is_completed():
return
# If we were unable to change the task state it means that it was
# already changed by a concurrent process. In this case we need to
# skip all regular completion logic like scheduling new tasks,
# running engine commands and publishing.
if not self.set_state(state, state_info):
return
data_flow.publish_variables(self.task_ex, self.task_spec)
if not self.task_spec.get_keep_result():
# Destroy task result.
for ex in self.task_ex.action_executions:
if hasattr(ex, 'output'):
ex.output = {}
self._after_task_complete()
# Ignore DELAYED state.
if self.task_ex.state == states.RUNNING_DELAYED:
return
# If workflow is paused we shouldn't schedule new commands
# and mark task as processed.
if states.is_paused(self.wf_ex.state):
return
wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec)
# Calculate commands to process next.
cmds = wf_ctrl.continue_workflow(task_ex=self.task_ex)
# Mark task as processed after all decisions have been made
# upon its completion.
self.task_ex.processed = True
dispatcher.dispatch_workflow_commands(self.wf_ex, cmds)
@profiler.trace('task-update')
def update(self, state, state_info=None):
"""Update task and set specified state.
Method sets specified task state.
:param state: New task state.
:param state_info: New state information (i.e. error message).
"""
assert self.task_ex
# Ignore if task already completed.
if states.is_completed(self.task_ex.state):
return
# Update only if state transition is valid.
if not states.is_valid_transition(self.task_ex.state, state):
return
# We can't set the task state to RUNNING if some other
# child executions are paused.
child_states = [a_ex.state for a_ex in self.task_ex.executions]
if state == states.RUNNING and states.PAUSED in child_states:
return
self.set_state(state, state_info)
def _before_task_start(self):
policies_spec = self.task_spec.get_policies()
for p in policies.build_policies(policies_spec, self.wf_spec):
p.before_task_start(self.task_ex, self.task_spec)
def _after_task_complete(self):
policies_spec = self.task_spec.get_policies()
for p in policies.build_policies(policies_spec, self.wf_spec):
p.after_task_complete(self.task_ex, self.task_spec)
@profiler.trace('task-create-task-execution')
def _create_task_execution(self, state=states.RUNNING, state_info=None):
task_id = utils.generate_unicode_uuid()
task_name = self.task_spec.get_name()
task_type = self.task_spec.get_type()
values = {
'id': task_id,
'name': task_name,
'workflow_execution_id': self.wf_ex.id,
'workflow_name': self.wf_ex.workflow_name,
'workflow_namespace': self.wf_ex.workflow_namespace,
'workflow_id': self.wf_ex.workflow_id,
'state': state,
'state_info': state_info,
'spec': self.task_spec.to_dict(),
'unique_key': self.unique_key,
'in_context': self.ctx,
'published': {},
'runtime_context': {},
'project_id': self.wf_ex.project_id,
'type': task_type
}
if self.triggered_by:
values['runtime_context']['triggered_by'] = self.triggered_by
self.task_ex = db_api.create_task_execution(values)
self.created = True
def _get_action_defaults(self):
action_name = self.task_spec.get_action_name()
if not action_name:
return {}
env = self.wf_ex.params['env']
return env.get('__actions', {}).get(action_name, {})
class RegularTask(Task):
"""Regular task.
Takes care of processing regular tasks with one action.
"""
@profiler.trace('regular-task-on-action-complete', hide_args=True)
def on_action_complete(self, action_ex):
state = action_ex.state
# TODO(rakhmerov): Here we can define more informative messages
# cases when action is successful and when it's not. For example,
# in state_info we can specify the cause action.
state_info = (None if state == states.SUCCESS
else action_ex.output.get('result'))
self.complete(state, state_info)
@profiler.trace('regular-task-on-action-update', hide_args=True)
def on_action_update(self, action_ex):
self.update(action_ex.state)
@profiler.trace('task-run')
def run(self):
if not self.task_ex:
self._run_new()
else:
self._run_existing()
@profiler.trace('task-run-new')
def _run_new(self):
if self.waiting:
self.defer()
return
self._create_task_execution()
LOG.debug(
'Starting task [workflow=%s, task=%s, init_state=%s]',
self.wf_ex.name,
self.task_spec.get_name(),
self.task_ex.state
)
self._before_task_start()
# Policies could possibly change task state.
if self.task_ex.state != states.RUNNING:
return
self._schedule_actions()
@profiler.trace('task-run-existing')
def _run_existing(self):
if self.waiting:
return
# Explicitly change task state to RUNNING.
# Throw exception if the existing task already succeeded.
if self.task_ex.state == states.SUCCESS:
raise exc.MistralError(
'Rerunning succeeded tasks is not supported.'
)
self.set_state(states.RUNNING, None, processed=False)
self._update_inbound_context()
self._update_triggered_by()
self._reset_actions()
self._schedule_actions()
def _update_inbound_context(self):
assert self.task_ex
wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec)
self.ctx = wf_ctrl.get_task_inbound_context(self.task_spec)
utils.update_dict(self.task_ex.in_context, self.ctx)
def _update_triggered_by(self):
assert self.task_ex
if not self.triggered_by:
return
self.task_ex.runtime_context['triggered_by'] = self.triggered_by
def _reset_actions(self):
"""Resets task state.
Depending on task type this method may reset task state. For example,
delete all task actions etc.
"""
# Reset state of processed task and related action executions.
if self.reset_flag:
execs = self.task_ex.executions
else:
execs = [e for e in self.task_ex.executions if
(e.accepted and
e.state in [states.ERROR, states.CANCELLED])]
for ex in execs:
ex.accepted = False
def _schedule_actions(self):
# Regular task schedules just one action.
input_dict = self._get_action_input()
target = self._get_target(input_dict)
action = self._build_action()
action.validate_input(input_dict)
action.schedule(
input_dict,
target,
safe_rerun=self.task_spec.get_safe_rerun(),
timeout=self._get_timeout()
)
@profiler.trace('regular-task-get-target', hide_args=True)
def _get_target(self, input_dict):
ctx_view = data_flow.ContextView(
input_dict,
self.ctx,
data_flow.get_workflow_environment_dict(self.wf_ex),
self.wf_ex.context,
self.wf_ex.input
)
return expr.evaluate_recursively(
self.task_spec.get_target(),
ctx_view
)
@profiler.trace('regular-task-get-action-input', hide_args=True)
def _get_action_input(self, ctx=None):
input_dict = self._evaluate_expression(self.task_spec.get_input(), ctx)
if not isinstance(input_dict, dict):
raise exc.InputException(
"Wrong dynamic input for task: %s. Dict type is expected. "
"Actual type: %s. Actual value: %s" %
(self.task_spec.get_name(), type(input_dict), str(input_dict))
)
return utils.merge_dicts(
input_dict,
self._get_action_defaults(),
overwrite=False
)
def _evaluate_expression(self, expression, ctx=None):
ctx_view = data_flow.ContextView(
data_flow.get_current_task_dict(self.task_ex),
data_flow.get_workflow_environment_dict(self.wf_ex),
ctx or self.ctx,
self.wf_ex.context,
self.wf_ex.input
)
return expr.evaluate_recursively(
expression,
ctx_view
)
def _build_action(self):
action_name = self.task_spec.get_action_name()
wf_name = self.task_spec.get_workflow_name()
# For dynamic workflow evaluation we regenerate the action.
if wf_name:
return actions.WorkflowAction(
wf_name=self._evaluate_expression(wf_name),
task_ex=self.task_ex
)
# For dynamic action evaluation we just regenerate the name.
if action_name:
action_name = self._evaluate_expression(action_name)
if not action_name:
action_name = 'std.noop'
action_def = actions.resolve_action_definition(
action_name,
self.wf_ex.name,
self.wf_spec.get_name()
)
if action_def.spec:
return actions.AdHocAction(action_def, task_ex=self.task_ex,
task_ctx=self.ctx,
wf_ctx=self.wf_ex.context)
return actions.PythonAction(action_def, task_ex=self.task_ex)
def _get_timeout(self):
timeout = self.task_spec.get_policies().get_timeout()
if not isinstance(timeout, (int, float)):
wf_ex = self.task_ex.workflow_execution
ctx_view = data_flow.ContextView(
self.task_ex.in_context,
wf_ex.context,
wf_ex.input
)
timeout = expr.evaluate_recursively(data=timeout, context=ctx_view)
return timeout if timeout > 0 else None
class WithItemsTask(RegularTask):
"""With-items task.
Takes care of processing "with-items" tasks.
"""
_CONCURRENCY = 'concurrency'
_CAPACITY = 'capacity'
_COUNT = 'count'
_WITH_ITEMS = 'with_items'
_DEFAULT_WITH_ITEMS = {
_COUNT: 0,
_CONCURRENCY: 0,
_CAPACITY: 0
}
@profiler.trace('with-items-task-on-action-complete', hide_args=True)
def on_action_complete(self, action_ex):
assert self.task_ex
with db_api.named_lock('with-items-%s' % self.task_ex.id):
# NOTE: We need to refresh task execution object right
# after the lock is acquired to make sure that we're
# working with a fresh state of its runtime context.
# Otherwise, SQLAlchemy session can contain a stale
# cached version of it so that we don't modify actual
# values (i.e. capacity).
db_api.refresh(self.task_ex)
if self.is_completed():
return
self._increase_capacity()
if self.is_with_items_completed():
state = self._get_final_state()
# TODO(rakhmerov): Here we can define more informative messages
# in cases when action is successful and when it's not.
# For example, in state_info we can specify the cause action.
# The use of action_ex.output.get('result') for state_info is
# not accurate because there could be action executions that
# had failed or was cancelled prior to this action execution.
state_info = {
states.SUCCESS: None,
states.ERROR: 'One or more actions had failed.',
states.CANCELLED: 'One or more actions was cancelled.'
}
self.complete(state, state_info[state])
return
if self._has_more_iterations() and self._get_concurrency():
self._schedule_actions()
def _schedule_actions(self):
with_items_values = self._get_with_items_values()
if self._is_new():
self._validate_values(with_items_values)
action_count = len(six.next(iter(with_items_values.values())))
self._prepare_runtime_context(action_count)
input_dicts = self._get_input_dicts(with_items_values)
if not input_dicts:
self.complete(states.SUCCESS)
return
for i, input_dict in input_dicts:
target = self._get_target(input_dict)
action = self._build_action()
action.validate_input(input_dict)
action.schedule(
input_dict,
target,
index=i,
safe_rerun=self.task_spec.get_safe_rerun(),
timeout=self._get_timeout()
)
self._decrease_capacity(1)
def _get_with_items_values(self):
"""Returns all values evaluated from 'with-items' expression.
Example:
DSL:
with-items:
- var1 in <% $.arrayI %>
- var2 in <% $.arrayJ %>
where arrayI = [1,2,3] and arrayJ = [a,b,c]
The result of the method in this case will be:
{
'var1': [1,2,3],
'var2': [a,b,c]
}
:return: Evaluated 'with-items' expression values.
"""
ctx_view = data_flow.ContextView(
self.ctx,
self.wf_ex.context,
self.wf_ex.input
)
return expr.evaluate_recursively(
self.task_spec.get_with_items(),
ctx_view
)
def _validate_values(self, with_items_values):
# Take only mapped values and check them.
values = list(with_items_values.values())
if not all([isinstance(v, list) for v in values]):
raise exc.InputException(
"Wrong input format for: %s. List type is"
" expected for each value." % with_items_values
)
required_len = len(values[0])
if not all(len(v) == required_len for v in values):
raise exc.InputException(
"Wrong input format for: %s. All arrays must"
" have the same length." % with_items_values
)
def _get_input_dicts(self, with_items_values):
"""Calculate input dictionaries for another portion of actions.
:return: a list of tuples containing indexes and
corresponding input dicts.
"""
result = []
for i in self._get_next_indexes():
ctx = {}
for k, v in with_items_values.items():
ctx.update({k: v[i]})
ctx = utils.merge_dicts(ctx, self.ctx)
result.append((i, self._get_action_input(ctx)))
return result
def _get_with_items_context(self):
return self.task_ex.runtime_context.get(
self._WITH_ITEMS,
self._DEFAULT_WITH_ITEMS
)
def _get_with_items_count(self):
return self._get_with_items_context()[self._COUNT]
def _get_with_items_capacity(self):
return self._get_with_items_context()[self._CAPACITY]
def _get_concurrency(self):
return self.task_ex.runtime_context.get(self._CONCURRENCY)
def is_with_items_completed(self):
find_cancelled = lambda x: x.accepted and x.state == states.CANCELLED
if list(filter(find_cancelled, self.task_ex.executions)):
return True
execs = list([t for t in self.task_ex.executions if t.accepted])
count = self._get_with_items_count() or 1
# We need to make sure that method on_action_complete() has been
# called for every action. Just looking at number of actions and
# their 'accepted' flag is not enough because action gets accepted
# before on_action_complete() is called for it. This call is
# mandatory in order to do all needed processing from task
# perspective. So we can simply check if capacity is fully reset
# to its initial state.
full_capacity = (
not self._get_concurrency() or
self._get_with_items_capacity() == self._get_concurrency()
)
return count == len(execs) and full_capacity
def _get_final_state(self):
find_cancelled = lambda x: x.accepted and x.state == states.CANCELLED
find_error = lambda x: x.accepted and x.state == states.ERROR
if list(filter(find_cancelled, self.task_ex.executions)):
return states.CANCELLED
elif list(filter(find_error, self.task_ex.executions)):
return states.ERROR
else:
return states.SUCCESS
def _get_accepted_executions(self):
# Choose only if not accepted but completed.
return list(
[x for x in self.task_ex.executions
if x.accepted and states.is_completed(x.state)]
)
def _get_unaccepted_executions(self):
# Choose only if not accepted but completed.
return list(
filter(
lambda x: not x.accepted and states.is_completed(x.state),
self.task_ex.executions
)
)
def _get_next_start_index(self):
f = lambda x: (
x.accepted or
states.is_running(x.state) or
states.is_idle(x.state)
)
return len(list(filter(f, self.task_ex.executions)))
def _get_next_indexes(self):
capacity = self._get_with_items_capacity()
count = self._get_with_items_count()
def _get_indexes(exs):
return sorted(set([ex.runtime_context['index'] for ex in exs]))
accepted = _get_indexes(self._get_accepted_executions())
unaccepted = _get_indexes(self._get_unaccepted_executions())
candidates = sorted(list(set(unaccepted) - set(accepted)))
if candidates:
indices = copy.copy(candidates)
if max(candidates) < count - 1:
indices += list(six.moves.range(max(candidates) + 1, count))
else:
i = self._get_next_start_index()
indices = list(six.moves.range(i, count))
return indices[:capacity]
def _increase_capacity(self):
ctx = self._get_with_items_context()
concurrency = self._get_concurrency()
if concurrency and ctx[self._CAPACITY] < concurrency:
ctx[self._CAPACITY] += 1
self.task_ex.runtime_context.update({self._WITH_ITEMS: ctx})
def _decrease_capacity(self, count):
ctx = self._get_with_items_context()
capacity = ctx[self._CAPACITY]
if capacity is not None:
if capacity >= count:
ctx[self._CAPACITY] -= count
else:
raise RuntimeError(
"Can't decrease with-items capacity"
" [capacity=%s, count=%s]" % (capacity, count)
)
self.task_ex.runtime_context.update({self._WITH_ITEMS: ctx})
def _is_new(self):
return not self.task_ex.runtime_context.get(self._WITH_ITEMS)
def _prepare_runtime_context(self, action_count):
runtime_ctx = self.task_ex.runtime_context
if not runtime_ctx.get(self._WITH_ITEMS):
# Prepare current indexes and parallel limitation.
runtime_ctx[self._WITH_ITEMS] = {
self._CAPACITY: self._get_concurrency(),
self._COUNT: action_count
}
def _has_more_iterations(self):
# See action executions which have been already
# accepted or are still running.
action_exs = list(filter(
lambda x: x.accepted or x.state == states.RUNNING,
self.task_ex.executions
))
return self._get_with_items_count() > len(action_exs)