Merge "Add batch size for integrity checker"

This commit is contained in:
Zuul 2018-11-10 15:09:42 +00:00 committed by Gerrit Code Review
commit c48efec6db
5 changed files with 26 additions and 4 deletions

View File

@ -184,6 +184,14 @@ engine_opts = [
' set to a negative value Mistral will never be doing '
' this check.')
),
cfg.IntOpt(
'execution_integrity_check_batch_size',
default=5,
min=1,
help=_('A number of task executions in RUNNING state that the'
' execution integrity checker can process in a single'
' iteration.')
),
cfg.IntOpt(
'action_definition_cache_time',
default=60,

View File

@ -131,7 +131,8 @@ def _check_and_fix_integrity(wf_ex_id):
running_task_execs = db_api.get_task_executions(
workflow_execution_id=wf_ex.id,
state=states.RUNNING
state=states.RUNNING,
limit=CONF.engine.execution_integrity_check_batch_size
)
for t_ex in running_task_execs:

View File

@ -271,8 +271,8 @@ class EngineTestCase(base.DbTestCase):
lambda: self.is_workflow_in_state(ex_id, state),
delay,
timeout,
fail_message="Execution {ex_id} to reach {state} "
"state but is in {current}",
fail_message="Execution {ex_id} must have reached state {state} "
"state but it is in {current}",
fail_message_formatter=lambda m: m.format(
ex_id=ex_id,
state=state,

View File

@ -1046,7 +1046,7 @@ class DirectWorkflowEngineTest(base.EngineTestCase):
"""
# Generate the workflow text.
task_cnt = 200
task_cnt = 50
on_success_list_str = ''

View File

@ -0,0 +1,13 @@
---
fixes:
- |
Workflow execution integrity checker mechanism was too agressive in case
of big workflows that have many task executions in RUNNING state at the
same time. The mechanism was selecting them all in one query and calling
"on_action_complete" for each of them within a single DB transaction.
That could lead to situations when this mechanism would totally block
all normal workflow processing whereas it should only be a "last chance"
aid in case of real infrastructure failures (e.g. MQ outage).
This issue has been fixed by adding a configurable batch size, so that
the checker can't select more than this number of task executions in
RUNNING state at once.