mistral/mistral/engine/default_engine.py

# Copyright 2013 - Mirantis, Inc.
# Copyright 2015 - StackStorm, Inc.
#
#    Licensed under the Apache License, Version 2.0 (the "License");
#    you may not use this file except in compliance with the License.
#    You may obtain a copy of the License at
#
#        http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS,
#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#    See the License for the specific language governing permissions and
#    limitations under the License.

import traceback

from oslo_log import log as logging

from mistral import coordination
from mistral.db.v2 import api as db_api
from mistral.db.v2.sqlalchemy import models as db_models
from mistral.engine import action_handler
from mistral.engine import base
from mistral.engine import task_handler
from mistral.engine import workflow_handler as wf_handler
from mistral import exceptions as exc
from mistral.services import action_manager as a_m
from mistral.services import executions as wf_ex_service
from mistral.services import workflows as wf_service
from mistral import utils as u
from mistral.utils import wf_trace
from mistral.workbook import parser as spec_parser
from mistral.workflow import base as wf_base
from mistral.workflow import commands
from mistral.workflow import states
from mistral.workflow import utils as wf_utils

LOG = logging.getLogger(__name__)


# Submodules of mistral.engine will throw NoSuchOptError if configuration
# options required at top level of this  __init__.py are not imported before
# the submodules are referenced.


class DefaultEngine(base.Engine, coordination.Service):
    def __init__(self, engine_client):
        self._engine_client = engine_client

        coordination.Service.__init__(self, 'engine_group')

    @u.log_exec(LOG)
    def start_workflow(self, wf_identifier, wf_input, description='',
                       **params):
        wf_ex_id = None

        try:
            # Create a persistent workflow execution in a separate transaction
            # so that we can return it even in case of unexpected errors that
            # lead to transaction rollback.
            with db_api.transaction():
                # The new workflow execution will be in an IDLE
                # state on initial record creation.
                wf_ex_id, wf_spec = wf_ex_service.create_workflow_execution(
                    wf_identifier,
                    wf_input,
                    description,
                    params
                )

            with db_api.transaction():
                wf_ex = db_api.get_workflow_execution(wf_ex_id)
                wf_handler.set_execution_state(wf_ex, states.RUNNING)

                wf_ctrl = wf_base.get_controller(wf_ex, wf_spec)

                self._dispatch_workflow_commands(
                    wf_ex,
                    wf_ctrl.continue_workflow(),
                    wf_spec
                )

                return wf_ex.get_clone()
        except Exception as e:
            LOG.error(
                "Failed to start workflow '%s' id=%s: %s\n%s",
                wf_identifier, wf_ex_id, e, traceback.format_exc()
            )

            wf_ex = self._fail_workflow(wf_ex_id, e)

            if wf_ex:
                return wf_ex.get_clone()

            raise e

    @u.log_exec(LOG)
    def start_action(self, action_name, action_input,
                     description=None, **params):
        with db_api.transaction():
            action_def = action_handler.resolve_definition(action_name)
            resolved_action_input = action_handler.get_action_input(
                action_name,
                action_input
            )
            action = a_m.get_action_class(action_def.name)(
                **resolved_action_input
            )

            # If we see action is asynchronous, then we enforce 'save_result'.
            if params.get('save_result') or not action.is_sync():
                action_ex = action_handler.create_action_execution(
                    action_def,
                    resolved_action_input,
                    description=description
                )

                action_handler.run_action(
                    action_def,
                    resolved_action_input,
                    action_ex.id,
                    params.get('target')
                )

                return action_ex.get_clone()
            else:
                output = action_handler.run_action(
                    action_def,
                    resolved_action_input,
                    target=params.get('target'),
                    async=False
                )

                return db_models.ActionExecution(
                    name=action_name,
                    description=description,
                    input=action_input,
                    output=output
                )

    def on_task_state_change(self, task_ex_id, state, state_info=None):
        with db_api.transaction():
            task_ex = db_api.get_task_execution(task_ex_id)
            # TODO(rakhmerov): The method is mostly needed for policy and
            # we are supposed to get the same action execution as when the
            # policy worked.

            wf_ex_id = task_ex.workflow_execution_id
            wf_ex = wf_handler.lock_workflow_execution(wf_ex_id)
            wf_spec = spec_parser.get_workflow_spec(wf_ex.spec)

            wf_trace.info(
                task_ex,
                "Task '%s' [%s -> %s] state_info : %s"
                % (task_ex.name, task_ex.state, state, state_info)
            )

            task_ex.state = state
            task_ex.state_info = state_info

            self._on_task_state_change(task_ex, wf_ex, wf_spec)

    def _on_task_state_change(self, task_ex, wf_ex, wf_spec):
        task_spec = wf_spec.get_tasks()[task_ex.name]

        if task_handler.is_task_completed(task_ex, task_spec):
            task_handler.after_task_complete(task_ex, task_spec, wf_spec)

            # Ignore DELAYED state.
            if task_ex.state == states.RUNNING_DELAYED:
                return

            wf_ctrl = wf_base.get_controller(wf_ex, wf_spec)

            # Calculate commands to process next.
            try:
                cmds = wf_ctrl.continue_workflow()
            except exc.YaqlEvaluationException as e:
                LOG.error(
                    'YAQL error occurred while calculating next workflow '
                    'commands [wf_ex_id=%s, task_ex_id=%s]: %s',
                    wf_ex.id, task_ex.id, e
                )

                wf_handler.fail_workflow(wf_ex, str(e))

                return

            # Mark task as processed after all decisions have been made
            # upon its completion.
            task_ex.processed = True

            self._dispatch_workflow_commands(wf_ex, cmds, wf_spec)

            self._check_workflow_completion(wf_ex, wf_ctrl, wf_spec)
        elif task_handler.need_to_continue(task_ex, task_spec):
            # Re-run existing task.
            cmds = [commands.RunExistingTask(task_ex, reset=False)]

            self._dispatch_workflow_commands(wf_ex, cmds, wf_spec)

    @staticmethod
    def _check_workflow_completion(wf_ex, wf_ctrl, wf_spec):
        if states.is_paused_or_completed(wf_ex.state):
            return

        # Workflow is not completed if there are any incomplete task
        # executions that are not in WAITING state. If all incomplete
        # tasks are waiting and there are unhandled errors, then these
        # tasks will not reach completion. In this case, mark the
        # workflow complete.
        incomplete_tasks = wf_utils.find_incomplete_task_executions(wf_ex)

        if any(not states.is_waiting(t.state) for t in incomplete_tasks):
            return

        if wf_ctrl.all_errors_handled():
            wf_handler.succeed_workflow(
                wf_ex,
                wf_ctrl.evaluate_workflow_final_context(),
                wf_spec
            )
        else:
            state_info = wf_utils.construct_fail_info_message(wf_ctrl, wf_ex)

            wf_handler.fail_workflow(wf_ex, state_info)

    @u.log_exec(LOG)
    def on_action_complete(self, action_ex_id, result):
        wf_ex_id = None

        try:
            with db_api.transaction():
                action_ex = db_api.get_action_execution(action_ex_id)

                # In case of single action execution there is no
                # assigned task execution.
                if not action_ex.task_execution:
                    return action_handler.store_action_result(
                        action_ex,
                        result
                    ).get_clone()

                wf_ex_id = action_ex.task_execution.workflow_execution_id
                wf_ex = wf_handler.lock_workflow_execution(wf_ex_id)

                wf_spec = spec_parser.get_workflow_spec(wf_ex.spec)

                task_ex = task_handler.on_action_complete(
                    action_ex,
                    wf_spec,
                    result
                )

                # If workflow is on pause or completed then there's no
                # need to continue workflow.
                if states.is_paused_or_completed(wf_ex.state):
                    return action_ex.get_clone()

                self._on_task_state_change(task_ex, wf_ex, wf_spec)

                return action_ex.get_clone()
        except Exception as e:
            # TODO(rakhmerov): Need to refactor logging in a more elegant way.
            LOG.error(
                'Failed to handle action execution result [id=%s]: %s\n%s',
                action_ex_id, e, traceback.format_exc()
            )

            # If an exception was thrown after we got the wf_ex_id
            if wf_ex_id:
                self._fail_workflow(wf_ex_id, e)

            raise e

    @u.log_exec(LOG)
    def pause_workflow(self, execution_id):
        with db_api.transaction():
            wf_ex = wf_handler.lock_workflow_execution(execution_id)

            wf_handler.set_execution_state(wf_ex, states.PAUSED)

        return wf_ex

    def _continue_workflow(self, wf_ex, task_ex=None, reset=True, env=None):
        wf_ex = wf_service.update_workflow_execution_env(wf_ex, env)

        wf_handler.set_execution_state(
            wf_ex,
            states.RUNNING,
            set_upstream=True
        )

        wf_ctrl = wf_base.get_controller(wf_ex)

        # TODO(rakhmerov): Add YAQL error handling.
        # Calculate commands to process next.
        cmds = wf_ctrl.continue_workflow(task_ex=task_ex, reset=reset, env=env)

        # When resuming a workflow we need to ignore all 'pause'
        # commands because workflow controller takes tasks that
        # completed within the period when the workflow was paused.
        cmds = list(
            filter(
                lambda c: not isinstance(c, commands.PauseWorkflow),
                cmds
            )
        )

        # Since there's no explicit task causing the operation
        # we need to mark all not processed tasks as processed
        # because workflow controller takes only completed tasks
        # with flag 'processed' equal to False.
        for t_ex in wf_ex.task_executions:
            if states.is_completed(t_ex.state) and not t_ex.processed:
                t_ex.processed = True

        wf_spec = spec_parser.get_workflow_spec(wf_ex.spec)

        self._dispatch_workflow_commands(wf_ex, cmds, wf_spec)

        if not cmds:
            if not wf_utils.find_incomplete_task_executions(wf_ex):
                wf_handler.succeed_workflow(
                    wf_ex,
                    wf_ctrl.evaluate_workflow_final_context(),
                    wf_spec
                )

        return wf_ex.get_clone()

    @u.log_exec(LOG)
    def rerun_workflow(self, wf_ex_id, task_ex_id, reset=True, env=None):
        try:
            with db_api.transaction():
                wf_ex = wf_handler.lock_workflow_execution(wf_ex_id)

                task_ex = db_api.get_task_execution(task_ex_id)

                if task_ex.workflow_execution.id != wf_ex_id:
                    raise ValueError('Workflow execution ID does not match.')

                if wf_ex.state == states.PAUSED:
                    return wf_ex.get_clone()

                return self._continue_workflow(wf_ex, task_ex, reset, env=env)
        except Exception as e:
            LOG.error(
                "Failed to rerun execution id=%s at task=%s: %s\n%s",
                wf_ex_id, task_ex_id, e, traceback.format_exc()
            )
            self._fail_workflow(wf_ex_id, e)
            raise e

    @u.log_exec(LOG)
    def resume_workflow(self, wf_ex_id, env=None):
        try:
            with db_api.transaction():
                wf_ex = wf_handler.lock_workflow_execution(wf_ex_id)

                if (not states.is_paused(wf_ex.state) and
                        not states.is_idle(wf_ex.state)):
                    return wf_ex.get_clone()

                return self._continue_workflow(wf_ex, env=env)
        except Exception as e:
            LOG.error(
                "Failed to resume execution id=%s: %s\n%s",
                wf_ex_id, e, traceback.format_exc()
            )
            self._fail_workflow(wf_ex_id, e)
            raise e

    @u.log_exec(LOG)
    def stop_workflow(self, wf_ex_id, state, message=None):
        with db_api.transaction():
            wf_ex = wf_handler.lock_workflow_execution(wf_ex_id)

            return self._stop_workflow(wf_ex, state, message)

    @staticmethod
    def _stop_workflow(wf_ex, state, message=None):
        if state == states.SUCCESS:
            wf_ctrl = wf_base.get_controller(wf_ex)

            final_context = {}

            try:
                final_context = wf_ctrl.evaluate_workflow_final_context()
            except Exception as e:
                LOG.warning(
                    'Failed to get final context for %s: %s' % (wf_ex, e)
                )

            wf_spec = spec_parser.get_workflow_spec(wf_ex.spec)

            return wf_handler.succeed_workflow(
                wf_ex,
                final_context,
                wf_spec,
                message
            )
        elif state == states.ERROR:
            return wf_handler.fail_workflow(wf_ex, message)

        return wf_ex

    @u.log_exec(LOG)
    def rollback_workflow(self, wf_ex_id):
        # TODO(rakhmerov): Implement.
        raise NotImplementedError

    def _dispatch_workflow_commands(self, wf_ex, wf_cmds, wf_spec):
        if not wf_cmds:
            return

        for cmd in wf_cmds:
            if isinstance(cmd, commands.RunTask) and cmd.is_waiting():
                task_handler.defer_task(cmd)
            elif isinstance(cmd, commands.RunTask):
                task_ex = task_handler.run_new_task(cmd, wf_spec)

                if task_ex.state == states.ERROR:
                    wf_handler.fail_workflow(
                        wf_ex,
                        'Failed to start task [task_ex=%s]: %s' %
                        (task_ex, task_ex.state_info)
                    )
            elif isinstance(cmd, commands.RunExistingTask):
                task_ex = task_handler.run_existing_task(
                    cmd.task_ex.id,
                    reset=cmd.reset
                )

                if task_ex.state == states.ERROR:
                    wf_handler.fail_workflow(
                        wf_ex,
                        'Failed to start task [task_ex=%s]: %s' %
                        (task_ex, task_ex.state_info)
                    )
            elif isinstance(cmd, commands.SetWorkflowState):
                if states.is_completed(cmd.new_state):
                    self._stop_workflow(cmd.wf_ex, cmd.new_state, cmd.msg)
                else:
                    wf_handler.set_execution_state(wf_ex, cmd.new_state)
            elif isinstance(cmd, commands.Noop):
                # Do nothing.
                pass
            else:
                raise RuntimeError('Unsupported workflow command: %s' % cmd)

            if wf_ex.state != states.RUNNING:
                break

    # TODO(rakhmerov): This method may not be needed at all because error
    # handling is now implemented too roughly w/o distinguishing different
    # errors. On most errors (like YAQLException) we shouldn't rollback
    # transactions, we just need to fail corresponding execution objects
    # where a problem happened (action, task or workflow).
    @staticmethod
    def _fail_workflow(wf_ex_id, exc):
        """Private helper to fail workflow on exceptions."""

        with db_api.transaction():
            wf_ex = db_api.load_workflow_execution(wf_ex_id)

            if wf_ex is None:
                LOG.error(
                    "Can't fail workflow execution with id='%s': not found.",
                    wf_ex_id
                )
                return None

            wf_ex = wf_handler.lock_workflow_execution(wf_ex_id)

            if not states.is_paused_or_completed(wf_ex.state):
                wf_handler.set_execution_state(wf_ex, states.ERROR, str(exc))

            return wf_ex