# Copyright 2016 - Nokia Networks. # Copyright 2016 - Brocade Communications Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import abc import copy from oslo_log import log as logging from osprofiler import profiler import six from mistral.db.v2 import api as db_api from mistral.engine import actions from mistral.engine import dispatcher from mistral.engine import policies from mistral import exceptions as exc from mistral import expressions as expr from mistral import utils from mistral.utils import wf_trace from mistral.workflow import base as wf_base from mistral.workflow import data_flow from mistral.workflow import states LOG = logging.getLogger(__name__) @six.add_metaclass(abc.ABCMeta) class Task(object): """Task. Represents a workflow task and defines interface that can be used by Mistral engine or its components in order to manipulate with tasks. """ def __init__(self, wf_ex, wf_spec, task_spec, ctx, task_ex=None, unique_key=None, waiting=False, triggered_by=None): self.wf_ex = wf_ex self.task_spec = task_spec self.ctx = ctx self.task_ex = task_ex self.wf_spec = wf_spec self.unique_key = unique_key self.waiting = waiting self.triggered_by = triggered_by self.reset_flag = False self.created = False self.state_changed = False def is_completed(self): return self.task_ex and states.is_completed(self.task_ex.state) def is_waiting(self): return self.waiting def is_created(self): return self.created def is_state_changed(self): return self.state_changed @abc.abstractmethod def on_action_complete(self, action_ex): """Handle action completion. :param action_ex: Action execution. """ raise NotImplementedError @abc.abstractmethod def on_action_update(self, action_ex): """Handle action update. :param action_ex: Action execution. """ raise NotImplementedError @abc.abstractmethod def run(self): """Runs task.""" raise NotImplementedError @profiler.trace('task-defer') def defer(self): """Defers task. This method puts task to a waiting state. """ with db_api.named_lock(self.unique_key): if not self.task_ex: t_execs = db_api.get_task_executions( workflow_execution_id=self.wf_ex.id, unique_key=self.unique_key ) self.task_ex = t_execs[0] if t_execs else None msg = 'Task is waiting.' if not self.task_ex: self._create_task_execution( state=states.WAITING, state_info=msg ) elif self.task_ex.state != states.WAITING: self.set_state(states.WAITING, msg) def reset(self): self.reset_flag = True @profiler.trace('task-set-state') def set_state(self, state, state_info, processed=None): """Sets task state without executing post completion logic. :param state: New task state. :param state_info: New state information (i.e. error message). :param processed: New "processed" flag value. :return: True if the state was changed as a result of this call, False otherwise. """ assert self.task_ex cur_state = self.task_ex.state if cur_state != state or self.task_ex.state_info != state_info: task_ex = db_api.update_task_execution_state( id=self.task_ex.id, cur_state=cur_state, state=state ) if task_ex is None: # Do nothing because the update query did not change the DB. return False self.task_ex = task_ex self.task_ex.state_info = state_info self.state_changed = True if processed is not None: self.task_ex.processed = processed wf_trace.info( self.task_ex.workflow_execution, "Task '%s' (%s) [%s -> %s, msg=%s]" % (self.task_ex.name, self.task_ex.id, cur_state, state, state_info) ) return True @profiler.trace('task-complete') def complete(self, state, state_info=None): """Complete task and set specified state. Method sets specified task state and runs all necessary post completion logic such as publishing workflow variables and scheduling new workflow commands. :param state: New task state. :param state_info: New state information (i.e. error message). """ assert self.task_ex # Ignore if task already completed. if self.is_completed(): return # If we were unable to change the task state it means that it was # already changed by a concurrent process. In this case we need to # skip all regular completion logic like scheduling new tasks, # running engine commands and publishing. if not self.set_state(state, state_info): return data_flow.publish_variables(self.task_ex, self.task_spec) if not self.task_spec.get_keep_result(): # Destroy task result. for ex in self.task_ex.action_executions: if hasattr(ex, 'output'): ex.output = {} self._after_task_complete() # Ignore DELAYED state. if self.task_ex.state == states.RUNNING_DELAYED: return # If workflow is paused we shouldn't schedule new commands # and mark task as processed. if states.is_paused(self.wf_ex.state): return wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) # Calculate commands to process next. cmds = wf_ctrl.continue_workflow(task_ex=self.task_ex) # Mark task as processed after all decisions have been made # upon its completion. self.task_ex.processed = True dispatcher.dispatch_workflow_commands(self.wf_ex, cmds) @profiler.trace('task-update') def update(self, state, state_info=None): """Update task and set specified state. Method sets specified task state. :param state: New task state. :param state_info: New state information (i.e. error message). """ assert self.task_ex # Ignore if task already completed. if states.is_completed(self.task_ex.state): return # Update only if state transition is valid. if not states.is_valid_transition(self.task_ex.state, state): return # We can't set the task state to RUNNING if some other # child executions are paused. child_states = [a_ex.state for a_ex in self.task_ex.executions] if state == states.RUNNING and states.PAUSED in child_states: return self.set_state(state, state_info) def _before_task_start(self): policies_spec = self.task_spec.get_policies() for p in policies.build_policies(policies_spec, self.wf_spec): p.before_task_start(self.task_ex, self.task_spec) def _after_task_complete(self): policies_spec = self.task_spec.get_policies() for p in policies.build_policies(policies_spec, self.wf_spec): p.after_task_complete(self.task_ex, self.task_spec) @profiler.trace('task-create-task-execution') def _create_task_execution(self, state=states.RUNNING, state_info=None): task_id = utils.generate_unicode_uuid() task_name = self.task_spec.get_name() task_type = self.task_spec.get_type() values = { 'id': task_id, 'name': task_name, 'workflow_execution_id': self.wf_ex.id, 'workflow_name': self.wf_ex.workflow_name, 'workflow_namespace': self.wf_ex.workflow_namespace, 'workflow_id': self.wf_ex.workflow_id, 'state': state, 'state_info': state_info, 'spec': self.task_spec.to_dict(), 'unique_key': self.unique_key, 'in_context': self.ctx, 'published': {}, 'runtime_context': {}, 'project_id': self.wf_ex.project_id, 'type': task_type } if self.triggered_by: values['runtime_context']['triggered_by'] = self.triggered_by self.task_ex = db_api.create_task_execution(values) self.created = True def _get_action_defaults(self): action_name = self.task_spec.get_action_name() if not action_name: return {} env = self.wf_ex.params['env'] return env.get('__actions', {}).get(action_name, {}) class RegularTask(Task): """Regular task. Takes care of processing regular tasks with one action. """ @profiler.trace('regular-task-on-action-complete', hide_args=True) def on_action_complete(self, action_ex): state = action_ex.state # TODO(rakhmerov): Here we can define more informative messages # cases when action is successful and when it's not. For example, # in state_info we can specify the cause action. state_info = (None if state == states.SUCCESS else action_ex.output.get('result')) self.complete(state, state_info) @profiler.trace('regular-task-on-action-update', hide_args=True) def on_action_update(self, action_ex): self.update(action_ex.state) @profiler.trace('task-run') def run(self): if not self.task_ex: self._run_new() else: self._run_existing() @profiler.trace('task-run-new') def _run_new(self): if self.waiting: self.defer() return self._create_task_execution() LOG.debug( 'Starting task [workflow=%s, task=%s, init_state=%s]', self.wf_ex.name, self.task_spec.get_name(), self.task_ex.state ) self._before_task_start() # Policies could possibly change task state. if self.task_ex.state != states.RUNNING: return self._schedule_actions() @profiler.trace('task-run-existing') def _run_existing(self): if self.waiting: return # Explicitly change task state to RUNNING. # Throw exception if the existing task already succeeded. if self.task_ex.state == states.SUCCESS: raise exc.MistralError( 'Rerunning succeeded tasks is not supported.' ) self.set_state(states.RUNNING, None, processed=False) self._update_inbound_context() self._update_triggered_by() self._reset_actions() self._schedule_actions() def _update_inbound_context(self): assert self.task_ex wf_ctrl = wf_base.get_controller(self.wf_ex, self.wf_spec) self.ctx = wf_ctrl.get_task_inbound_context(self.task_spec) utils.update_dict(self.task_ex.in_context, self.ctx) def _update_triggered_by(self): assert self.task_ex if not self.triggered_by: return self.task_ex.runtime_context['triggered_by'] = self.triggered_by def _reset_actions(self): """Resets task state. Depending on task type this method may reset task state. For example, delete all task actions etc. """ # Reset state of processed task and related action executions. if self.reset_flag: execs = self.task_ex.executions else: execs = [e for e in self.task_ex.executions if (e.accepted and e.state in [states.ERROR, states.CANCELLED])] for ex in execs: ex.accepted = False def _schedule_actions(self): # Regular task schedules just one action. input_dict = self._get_action_input() target = self._get_target(input_dict) action = self._build_action() action.validate_input(input_dict) action.schedule( input_dict, target, safe_rerun=self.task_spec.get_safe_rerun(), timeout=self._get_timeout() ) @profiler.trace('regular-task-get-target', hide_args=True) def _get_target(self, input_dict): ctx_view = data_flow.ContextView( input_dict, self.ctx, data_flow.get_workflow_environment_dict(self.wf_ex), self.wf_ex.context, self.wf_ex.input ) return expr.evaluate_recursively( self.task_spec.get_target(), ctx_view ) @profiler.trace('regular-task-get-action-input', hide_args=True) def _get_action_input(self, ctx=None): input_dict = self._evaluate_expression(self.task_spec.get_input(), ctx) if not isinstance(input_dict, dict): raise exc.InputException( "Wrong dynamic input for task: %s. Dict type is expected. " "Actual type: %s. Actual value: %s" % (self.task_spec.get_name(), type(input_dict), str(input_dict)) ) return utils.merge_dicts( input_dict, self._get_action_defaults(), overwrite=False ) def _evaluate_expression(self, expression, ctx=None): ctx_view = data_flow.ContextView( data_flow.get_current_task_dict(self.task_ex), data_flow.get_workflow_environment_dict(self.wf_ex), ctx or self.ctx, self.wf_ex.context, self.wf_ex.input ) return expr.evaluate_recursively( expression, ctx_view ) def _build_action(self): action_name = self.task_spec.get_action_name() wf_name = self.task_spec.get_workflow_name() # For dynamic workflow evaluation we regenerate the action. if wf_name: return actions.WorkflowAction( wf_name=self._evaluate_expression(wf_name), task_ex=self.task_ex ) # For dynamic action evaluation we just regenerate the name. if action_name: action_name = self._evaluate_expression(action_name) if not action_name: action_name = 'std.noop' action_def = actions.resolve_action_definition( action_name, self.wf_ex.name, self.wf_spec.get_name() ) if action_def.spec: return actions.AdHocAction(action_def, task_ex=self.task_ex, task_ctx=self.ctx, wf_ctx=self.wf_ex.context) return actions.PythonAction(action_def, task_ex=self.task_ex) def _get_timeout(self): timeout = self.task_spec.get_policies().get_timeout() if not isinstance(timeout, (int, float)): wf_ex = self.task_ex.workflow_execution ctx_view = data_flow.ContextView( self.task_ex.in_context, wf_ex.context, wf_ex.input ) timeout = expr.evaluate_recursively(data=timeout, context=ctx_view) return timeout if timeout > 0 else None class WithItemsTask(RegularTask): """With-items task. Takes care of processing "with-items" tasks. """ _CONCURRENCY = 'concurrency' _CAPACITY = 'capacity' _COUNT = 'count' _WITH_ITEMS = 'with_items' _DEFAULT_WITH_ITEMS = { _COUNT: 0, _CONCURRENCY: 0, _CAPACITY: 0 } @profiler.trace('with-items-task-on-action-complete', hide_args=True) def on_action_complete(self, action_ex): assert self.task_ex with db_api.named_lock('with-items-%s' % self.task_ex.id): # NOTE: We need to refresh task execution object right # after the lock is acquired to make sure that we're # working with a fresh state of its runtime context. # Otherwise, SQLAlchemy session can contain a stale # cached version of it so that we don't modify actual # values (i.e. capacity). db_api.refresh(self.task_ex) if self.is_completed(): return self._increase_capacity() if self.is_with_items_completed(): state = self._get_final_state() # TODO(rakhmerov): Here we can define more informative messages # in cases when action is successful and when it's not. # For example, in state_info we can specify the cause action. # The use of action_ex.output.get('result') for state_info is # not accurate because there could be action executions that # had failed or was cancelled prior to this action execution. state_info = { states.SUCCESS: None, states.ERROR: 'One or more actions had failed.', states.CANCELLED: 'One or more actions was cancelled.' } self.complete(state, state_info[state]) return if self._has_more_iterations() and self._get_concurrency(): self._schedule_actions() def _schedule_actions(self): with_items_values = self._get_with_items_values() if self._is_new(): self._validate_values(with_items_values) action_count = len(six.next(iter(with_items_values.values()))) self._prepare_runtime_context(action_count) input_dicts = self._get_input_dicts(with_items_values) if not input_dicts: self.complete(states.SUCCESS) return for i, input_dict in input_dicts: target = self._get_target(input_dict) action = self._build_action() action.validate_input(input_dict) action.schedule( input_dict, target, index=i, safe_rerun=self.task_spec.get_safe_rerun(), timeout=self._get_timeout() ) self._decrease_capacity(1) def _get_with_items_values(self): """Returns all values evaluated from 'with-items' expression. Example: DSL: with-items: - var1 in <% $.arrayI %> - var2 in <% $.arrayJ %> where arrayI = [1,2,3] and arrayJ = [a,b,c] The result of the method in this case will be: { 'var1': [1,2,3], 'var2': [a,b,c] } :return: Evaluated 'with-items' expression values. """ ctx_view = data_flow.ContextView( self.ctx, self.wf_ex.context, self.wf_ex.input ) return expr.evaluate_recursively( self.task_spec.get_with_items(), ctx_view ) def _validate_values(self, with_items_values): # Take only mapped values and check them. values = list(with_items_values.values()) if not all([isinstance(v, list) for v in values]): raise exc.InputException( "Wrong input format for: %s. List type is" " expected for each value." % with_items_values ) required_len = len(values[0]) if not all(len(v) == required_len for v in values): raise exc.InputException( "Wrong input format for: %s. All arrays must" " have the same length." % with_items_values ) def _get_input_dicts(self, with_items_values): """Calculate input dictionaries for another portion of actions. :return: a list of tuples containing indexes and corresponding input dicts. """ result = [] for i in self._get_next_indexes(): ctx = {} for k, v in with_items_values.items(): ctx.update({k: v[i]}) ctx = utils.merge_dicts(ctx, self.ctx) result.append((i, self._get_action_input(ctx))) return result def _get_with_items_context(self): return self.task_ex.runtime_context.get( self._WITH_ITEMS, self._DEFAULT_WITH_ITEMS ) def _get_with_items_count(self): return self._get_with_items_context()[self._COUNT] def _get_with_items_capacity(self): return self._get_with_items_context()[self._CAPACITY] def _get_concurrency(self): return self.task_ex.runtime_context.get(self._CONCURRENCY) def is_with_items_completed(self): find_cancelled = lambda x: x.accepted and x.state == states.CANCELLED if list(filter(find_cancelled, self.task_ex.executions)): return True execs = list([t for t in self.task_ex.executions if t.accepted]) count = self._get_with_items_count() or 1 # We need to make sure that method on_action_complete() has been # called for every action. Just looking at number of actions and # their 'accepted' flag is not enough because action gets accepted # before on_action_complete() is called for it. This call is # mandatory in order to do all needed processing from task # perspective. So we can simply check if capacity is fully reset # to its initial state. full_capacity = ( not self._get_concurrency() or self._get_with_items_capacity() == self._get_concurrency() ) return count == len(execs) and full_capacity def _get_final_state(self): find_cancelled = lambda x: x.accepted and x.state == states.CANCELLED find_error = lambda x: x.accepted and x.state == states.ERROR if list(filter(find_cancelled, self.task_ex.executions)): return states.CANCELLED elif list(filter(find_error, self.task_ex.executions)): return states.ERROR else: return states.SUCCESS def _get_accepted_executions(self): # Choose only if not accepted but completed. return list( [x for x in self.task_ex.executions if x.accepted and states.is_completed(x.state)] ) def _get_unaccepted_executions(self): # Choose only if not accepted but completed. return list( filter( lambda x: not x.accepted and states.is_completed(x.state), self.task_ex.executions ) ) def _get_next_start_index(self): f = lambda x: ( x.accepted or states.is_running(x.state) or states.is_idle(x.state) ) return len(list(filter(f, self.task_ex.executions))) def _get_next_indexes(self): capacity = self._get_with_items_capacity() count = self._get_with_items_count() def _get_indexes(exs): return sorted(set([ex.runtime_context['index'] for ex in exs])) accepted = _get_indexes(self._get_accepted_executions()) unaccepted = _get_indexes(self._get_unaccepted_executions()) candidates = sorted(list(set(unaccepted) - set(accepted))) if candidates: indices = copy.copy(candidates) if max(candidates) < count - 1: indices += list(six.moves.range(max(candidates) + 1, count)) else: i = self._get_next_start_index() indices = list(six.moves.range(i, count)) return indices[:capacity] def _increase_capacity(self): ctx = self._get_with_items_context() concurrency = self._get_concurrency() if concurrency and ctx[self._CAPACITY] < concurrency: ctx[self._CAPACITY] += 1 self.task_ex.runtime_context.update({self._WITH_ITEMS: ctx}) def _decrease_capacity(self, count): ctx = self._get_with_items_context() capacity = ctx[self._CAPACITY] if capacity is not None: if capacity >= count: ctx[self._CAPACITY] -= count else: raise RuntimeError( "Can't decrease with-items capacity" " [capacity=%s, count=%s]" % (capacity, count) ) self.task_ex.runtime_context.update({self._WITH_ITEMS: ctx}) def _is_new(self): return not self.task_ex.runtime_context.get(self._WITH_ITEMS) def _prepare_runtime_context(self, action_count): runtime_ctx = self.task_ex.runtime_context if not runtime_ctx.get(self._WITH_ITEMS): # Prepare current indexes and parallel limitation. runtime_ctx[self._WITH_ITEMS] = { self._CAPACITY: self._get_concurrency(), self._COUNT: action_count } def _has_more_iterations(self): # See action executions which have been already # accepted or are still running. action_exs = list(filter( lambda x: x.accepted or x.state == states.RUNNING, self.task_ex.executions )) return self._get_with_items_count() > len(action_exs)