Add "convert_output_data" config property for YAQL

* Adding "convert_output_data" config property gives an opportunity
  to increase overal performance. If YAQL always converts an expression
  result, it often takes significant CPU time and overall workflow
  execution time increases. It is especially important when a workflow
  publishes lots of data into the context and uses big workflow
  environments. It's been tested on a very big workflow (~7k tasks)
  with a big workflow environment (~2.5mb) that often uses the YAQL
  function "<% env() %>". This function basically just returns the
  workflow environment.
* Created all necessary unit tests.
* Other style fixes.

Closes-Bug: #1863857
Change-Id: Ie3169ec884ec9a0e7e50327dd03cd78dcda0a39b
Cherry picked from Ie3169ec884ec9a0e7e50327dd03cd78dcda0a39b
This commit is contained in:
Renat Akhmerov 2020-02-11 11:18:50 +07:00
parent 3f61f80c61
commit d22676ef39
7 changed files with 199 additions and 31 deletions

View File

@ -597,7 +597,7 @@ yaql_opts = [
'convert_input_data',
default=True,
help=_('Enables input data conversion for YAQL expressions. If set '
'to True then YAQL will convert mutable data structures '
'to True, YAQL will convert mutable data structures '
'(lists, dicts, sets) into their immutable versions. That '
'will allow them to work with some constructs that require '
'hashable types even if elements are not hashable. For '
@ -610,23 +610,38 @@ yaql_opts = [
'performance boost if the input data for an expression is '
'large.')
),
cfg.BoolOpt(
'convert_output_data',
default=True,
help=_('Enables output data conversion for YAQL expressions.'
'If set to False, it is possible that YAQL will generate '
'an output that will be not JSON-serializable. For example, '
'if an expression has ".toSet()" in the end to convert a list '
'into a set. It does not mean though that such functions '
'cannot be used, they can still be used in expressions but '
'user has to keep in mind of what type a result will be, '
'whereas if the value of ths property is True YAQL will '
'convert the result to a JSON-compatible type.')
),
cfg.BoolOpt(
'convert_tuples_to_lists',
default=True,
help=_('When set to true, yaql converts all tuples in the expression '
'result to lists.')
help=_('When set to True, yaql converts all tuples in the expression '
'result to lists. It works only if "convert_output_data" is '
'set to True.')
),
cfg.BoolOpt(
'convert_sets_to_lists',
default=False,
help=_('When set to true, yaql converts all sets in the expression '
help=_('When set to True, yaql converts all sets in the expression '
'result to lists. Otherwise the produced result may contain '
'sets that are not JSON-serializable.')
'sets that are not JSON-serializable. It works only if '
'"convert_output_data" is set to True.')
),
cfg.BoolOpt(
'iterable_dicts',
default=False,
help=_('When set to true, dictionaries are considered to be iterable '
help=_('When set to True, dictionaries are considered to be iterable '
'and iteration over dictionaries produces their keys (as in '
'Python and yaql 0.2).')
),

View File

@ -17,6 +17,7 @@ from oslo_log import log as logging
from mistral import config as cfg
from mistral.db.v2 import api as db_api
from mistral.engine import default_engine
from mistral import exceptions as exc
from mistral.rpc import base as rpc
from mistral.scheduler import base as sched_base
from mistral.service import base as service_base
@ -28,6 +29,16 @@ from mistral_lib import utils
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
def _validate_config():
if not CONF.yaql.convert_output_data and CONF.yaql.convert_input_data:
raise exc.MistralError(
"The config property 'yaql.convert_output_data' is set to False "
"so 'yaql.convert_input_data' must also be set to False."
)
class EngineServer(service_base.MistralService):
"""Engine server.
@ -48,6 +59,8 @@ class EngineServer(service_base.MistralService):
def start(self):
super(EngineServer, self).start()
_validate_config()
db_api.setup_db()
self._scheduler = sched_base.get_system_scheduler()

View File

@ -15,6 +15,7 @@
# limitations under the License.
import abc
import collections
import copy
import json
from oslo_config import cfg
@ -777,8 +778,6 @@ class WithItemsTask(RegularTask):
with_items_values = self._get_with_items_values()
if self._is_new():
self._validate_values(with_items_values)
action_count = len(six.next(iter(with_items_values.values())))
self._prepare_runtime_context(action_count)
@ -827,25 +826,36 @@ class WithItemsTask(RegularTask):
:return: Evaluated 'with-items' expression values.
"""
return self._evaluate_expression(self.task_spec.get_with_items())
exp_res = self._evaluate_expression(self.task_spec.get_with_items())
def _validate_values(self, with_items_values):
# Take only mapped values and check them.
values = list(with_items_values.values())
# Expression result may contain iterables instead of lists in the
# dictionary values. So we need to convert them into lists and
# perform all needed checks.
if not all([isinstance(v, list) for v in values]):
raise exc.InputException(
"Wrong input format for: %s. List type is"
" expected for each value." % with_items_values
)
result = {}
required_len = len(values[0])
required_len = -1
if not all(len(v) == required_len for v in values):
raise exc.InputException(
"Wrong input format for: %s. All arrays must"
" have the same length." % with_items_values
)
for var, items in exp_res.items():
if not isinstance(items, collections.Iterable):
raise exc.InputException(
"Wrong input format for: %s. Iterable type is"
" expected for each value." % result
)
items_list = list(items)
result[var] = items_list
if required_len < 0:
required_len = len(items_list)
elif len(items_list) != required_len:
raise exc.InputException(
"Wrong input format for: %s. All arrays must"
" have the same length." % exp_res
)
return result
def _get_input_dicts(self, with_items_values):
"""Calculate input dictionaries for another portion of actions.

View File

@ -22,6 +22,7 @@ from oslo_log import log as logging
import six
from yaql.language import exceptions as yaql_exc
from yaql.language import factory
from yaql.language import utils as yaql_utils
from mistral.config import cfg
from mistral import exceptions as exc
@ -41,7 +42,7 @@ def get_yaql_engine_options():
"yaql.convertTuplesToLists": _YAQL_CONF.convert_tuples_to_lists,
"yaql.convertSetsToLists": _YAQL_CONF.convert_sets_to_lists,
"yaql.iterableDicts": _YAQL_CONF.iterable_dicts,
"yaql.convertOutputData": True
"yaql.convertOutputData": _YAQL_CONF.convert_output_data
}
@ -73,6 +74,19 @@ LOG.info(
INLINE_YAQL_REGEXP = '<%.*?%>'
def _sanitize_yaql_result(result):
# Expression output conversion can be disabled but we can still
# do some basic unboxing if we got an internal YAQL type.
# TODO(rakhmerov): FrozenDict doesn't provide any public method
# or property to access a regular dict that it wraps so ideally
# we need to add it to YAQL. Once it's there we need to make a
# fix here.
if isinstance(result, yaql_utils.FrozenDict):
return result._d
return result if not inspect.isgenerator(result) else list(result)
class YAQLEvaluator(Evaluator):
@classmethod
def validate(cls, expression):
@ -113,7 +127,7 @@ class YAQLEvaluator(Evaluator):
", data=%s]" % (expression, str(e), data_context)
)
return result if not inspect.isgenerator(result) else list(result)
return _sanitize_yaql_result(result)
@classmethod
def is_expression(cls, s):

View File

@ -111,7 +111,7 @@ class TaskSpec(base.BaseSpec):
self._action = data.get('action')
self._workflow = data.get('workflow')
self._input = data.get('input', {})
self._with_items = self._transform_with_items()
self._with_items = self._get_with_items_as_dict()
self._publish = data.get('publish', {})
self._publish_on_error = data.get('publish-on-error', {})
self._policies = self._group_spec(
@ -157,8 +157,9 @@ class TaskSpec(base.BaseSpec):
"The length of a '{0}' task name must not exceed {1}"
" symbols".format(task_name, MAX_LENGTH_TASK_NAME))
def _transform_with_items(self):
def _get_with_items_as_dict(self):
raw = self._data.get('with-items', [])
with_items = {}
if isinstance(raw, six.string_types):
@ -173,10 +174,11 @@ class TaskSpec(base.BaseSpec):
match = re.match(WITH_ITEMS_PTRN, item)
if not match:
msg = ("Wrong format of 'with-items' property. Please use "
"format 'var in {[some, list] | <%% $.array %%> }: "
"%s" % self._data)
raise exc.InvalidModelException(msg)
raise exc.InvalidModelException(
"Wrong format of 'with-items' property. Please use "
"format 'var in {[some, list] | <%% $.array %%> }: "
"%s" % self._data
)
match_groups = match.groups()
var_name = match_groups[0]

View File

@ -0,0 +1,111 @@
# Copyright 2014 - Mirantis, Inc.
# Copyright 2015 - StackStorm, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from mistral.db.v2 import api as db_api
from mistral.engine import engine_server
from mistral import exceptions as exc
from mistral.services import workflows as wf_service
from mistral.tests.unit.engine import base as engine_test_base
class DisabledYAQLConversionTest(engine_test_base.EngineTestCase):
def setUp(self):
super(DisabledYAQLConversionTest, self).setUp()
self.override_config('auth_enable', False, 'pecan')
def test_disabled_yaql_output_conversion(self):
"""Test YAQL expressions with disabled data conversion.
The test is needed to make sure that if we disable YAQL data
conversion (for both input and output), then Mistral will handle
YAQL internal data types properly if they sneak into the Mistral
logic as part of an expression result. Particularly, we need to
make sure that the ORM framework (SQLAlchemy) will also be able
to save data properly if it comes across such a type.
NOTE:
- set() and toSet() functions produce "frozenset" type
internally within YAQL and it should be handled properly
everywhere in the code including SQLAlchemy.
- dict() produces "FrozenDict" internally but we unwrap the
top most dict after evaluating an expression on the Mistral
side.
"""
# Both input and output data conversion in YAQL need to be disabled
# so that we're sure that there won't be any surprises from YAQL
# like some YAQL internal types included in expression results.
self.override_config('convert_input_data', False, 'yaql')
self.override_config('convert_output_data', False, 'yaql')
wf_text = """---
version: '2.0'
wf:
tasks:
task1:
publish:
var1: <% range(0,10) %>
var2: <% set(15) %>
var3: <% [4, 5, 6].toSet() %>
var4: <% {k1 => v1, k2 => v2} %>
var5: <% dict([['a', 2], ['b', 4]]) %>
var6: <% [1, dict(k3 => v3, k4 => v4), 3] %>
"""
wf_service.create_workflows(wf_text)
# Start workflow.
wf_ex = self.engine.start_workflow('wf')
self.await_workflow_success(wf_ex.id)
with db_api.transaction():
# Note: We need to reread execution to access related tasks.
wf_ex = db_api.get_workflow_execution(wf_ex.id)
tasks = wf_ex.task_executions
t_ex = self._assert_single_item(tasks, name='task1')
self.assertDictEqual(
{
'var1': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
'var2': [15],
'var3': [4, 5, 6],
'var4': {'k1': 'v1', 'k2': 'v2'},
'var5': {'a': 2, 'b': 4},
'var6': [1, {'k3': 'v3', 'k4': 'v4'}, 3],
},
t_ex.published
)
def test_configuration_check(self):
# Kill all the threads started by default and try to start an
# instance of engine server again with the wrong configuration.
self.kill_threads()
self.override_config('convert_input_data', True, 'yaql')
self.override_config('convert_output_data', False, 'yaql')
eng_svc = engine_server.get_oslo_service(setup_profiler=False)
self.assertRaisesWithMessage(
exc.MistralError,
"The config property 'yaql.convert_output_data' is set to False "
"so 'yaql.convert_input_data' must also be set to False.",
eng_svc.start
)

View File

@ -302,6 +302,9 @@ class InlineYAQLEvaluatorTest(base.BaseTest):
{'a': 1})
def test_set_of_dicts(self):
# This test makes sense only if YAQL expression output conversion
# is enabled.
self.override_config('convert_output_data', True, 'yaql')
self.override_config('convert_sets_to_lists', True, 'yaql')
def _restore_engine(old_engine):