From c9d71a65099ce5ec982c3a844c291596c6590a83 Mon Sep 17 00:00:00 2001 From: Felipe Monteiro Date: Sat, 18 Aug 2018 03:54:59 +0100 Subject: [PATCH] substitution: Recursive pattern replacement Patterns may now be replaced recursively. This can be achieved by using specifying a ``pattern`` value and ``recurse`` (with a required ``depth`` argument). Example: substitutions: - dest: path: . pattern: REGEX recurse: depth: -1 src: schema: deckhand/Passphrase/v1 name: example-password path: . NOTE: Recursive selection of patterns will only consider matching patterns. Non-matching patterns will be ignored. Thus, even if recursion can "pass over" non-matching patterns, they will be silently ignored. This is useful for reducing the number of substitution stanzas that are required for the purposes of performing all the string pattern replacements that are required. Best practice is to limit the scope of the recursion as much as possible: e.g. avoid passing in "$" as the ``jsonpath``, but rather a JSON path that lives closer to the nested strings in question. Included in this patch set: * recursive implementation for pattern replacement * unit tests for most scenarios and edge cases * docstring updates * documentation updates * schema updates for validation Change-Id: I85048349097ed696667fae80f1180808d264bbcf --- deckhand/common/utils.py | 138 ++++++++++++------ .../engine/schemas/metadata_document.yaml | 11 ++ deckhand/engine/secrets_manager.py | 5 +- deckhand/tests/unit/common/test_utils.py | 129 ++++++++++++++++ doc/source/substitution.rst | 82 ++++++++++- 5 files changed, 320 insertions(+), 45 deletions(-) diff --git a/deckhand/common/utils.py b/deckhand/common/utils.py index c0bdf3a3..8b6be04f 100644 --- a/deckhand/common/utils.py +++ b/deckhand/common/utils.py @@ -87,6 +87,7 @@ def jsonpath_parse(data, jsonpath, match_all=False): :param data: The `data` section of a document. :param jsonpath: A multi-part key that references a nested path in ``data``. + :param match_all: Whether to return all matches or just the first one. :returns: Entry that corresponds to ``data[jsonpath]`` if present, else None. @@ -107,7 +108,78 @@ def jsonpath_parse(data, jsonpath, match_all=False): return result if match_all else result[0] -def _execute_data_expansion(jsonpath, data): +def _execute_replace(data, value, jsonpath, pattern=None, recurse=None): + # These are O(1) reference copies to avoid accidentally modifying source + # data. We only want to update destination data. + data_copy = copy.copy(data) + value_copy = copy.copy(value) + + path = _jsonpath_parse(jsonpath) + path_to_change = path.find(data_copy) + recurse = recurse or {} + + def _try_replace_pattern(to_replace): + try: + # A pattern requires us to look up the data located at + # to_replace[jsonpath] and then figure out what + # re.match(to_replace[jsonpath], pattern) is (in pseudocode). + # Raise an exception in case the path isn't present in the + # to_replace and a pattern has been provided since it is + # otherwise impossible to do the look-up. + replacement = re.sub(pattern, + six.text_type(value_copy), + to_replace) + except TypeError as e: + LOG.error('Failed to substitute the value %s into %s ' + 'using pattern %s. Details: %s', + six.text_type(value_copy), to_replace, pattern, + six.text_type(e)) + raise errors.MissingDocumentPattern(jsonpath=jsonpath, + pattern=pattern) + return replacement + + def _replace_pattern_recursively(curr_data, depth, max_depth=-1): + # If max_depth is -1 (meaning no depth), then recursion will be + # performed over all of ``curr_data`` as depth starts out at 0. + if depth == max_depth: + return + + if isinstance(curr_data, dict): + for k, v in curr_data.items(): + if isinstance(v, six.string_types) and pattern in v: + replacement = _try_replace_pattern(v) + curr_data[k] = replacement + else: + _replace_pattern_recursively(v, depth + 1, max_depth) + elif isinstance(curr_data, list): + for idx, v in enumerate(curr_data): + if isinstance(v, six.string_types) and pattern in v: + replacement = _try_replace_pattern(v) + curr_data[idx] = replacement + else: + _replace_pattern_recursively(v, depth + 1, max_depth) + + to_replace = path_to_change[0].value + if pattern: + if recurse: + max_depth = recurse.get('depth', -1) + # Recursion is only possible for lists/dicts. + if isinstance(to_replace, (dict, list)): + _replace_pattern_recursively(to_replace, 0, max_depth) + return data_copy + else: + # Edge case to handle a path that leads to a string value + # (not a list or dict). Even though no recursion is + # technically possible, gracefully handle this by + # performing non-recursive pattern replacement on the str. + return path.update(data_copy, _try_replace_pattern(to_replace)) + else: + return path.update(data_copy, _try_replace_pattern(to_replace)) + else: + return path.update(data_copy, value_copy) + + +def _execute_data_expansion(data, jsonpath): # Expand ``data`` with any path specified in ``jsonpath``. For example, # if jsonpath is ".foo[0].bar.baz" then for each subpath -- foo[0], bar, # and baz -- that key will be added to ``data`` if missing. @@ -137,25 +209,13 @@ def _execute_data_expansion(jsonpath, data): d = d.get(path) -def jsonpath_replace(data, value, jsonpath, pattern=None): +def jsonpath_replace(data, value, jsonpath, pattern=None, recurse=None): """Update value in ``data`` at the path specified by ``jsonpath``. - If the nested path corresponding to ``jsonpath`` isn't found in ``data``, the path is created as an empty ``{}`` for each sub-path along the ``jsonpath``. - :param data: The `data` section of a document. - :param value: The new value for ``data[jsonpath]``. - :param jsonpath: A multi-part key that references a nested path in - ``data``. Must begin with "." (without quotes). - :param pattern: A regular expression pattern. - :returns: Updated value at ``data[jsonpath]``. - :raises: MissingDocumentPattern if ``pattern`` is not None and - ``data[jsonpath]`` doesn't exist. - :raises ValueError: If ``jsonpath`` doesn't begin with "." - Example:: - doc = { 'data': { 'some_url': http://admin:INSERT_PASSWORD_HERE@svc-name:8080/v1 @@ -169,6 +229,24 @@ def jsonpath_replace(data, value, jsonpath, pattern=None): # The returned URL will look like: # http://admin:super-duper-secret@svc-name:8080/v1 doc['data'].update(replaced_data) + + :param data: The ``data`` section of a document. + :param value: The new value for ``data[jsonpath]``. + :param jsonpath: A multi-part key that references a nested path in + ``data``. Must begin with "." or "$" (without quotes). + :param pattern: A regular expression pattern. + :param recurse: Dictionary containing a single key called "depth" which + specifies the recursion depth. If provided, indicates that recursive + pattern substitution should be performed, beginning at ``jsonpath``. + Best practice is to limit the scope of the recursion as much as + possible: e.g. avoid passing in "$" as the ``jsonpath``, but rather + a JSON path that lives closer to the nested strings in question. + Optimize performance by choosing an ideal ``depth`` value; -1 will + cause recursion depth to be infinite. + :returns: Updated value at ``data[jsonpath]``. + :raises: MissingDocumentPattern if ``pattern`` is not None and + ``data[jsonpath]`` doesn't exist. + :raises ValueError: If ``jsonpath`` doesn't begin with "." """ # These are O(1) reference copies to avoid accidentally modifying source @@ -177,45 +255,23 @@ def jsonpath_replace(data, value, jsonpath, pattern=None): value_copy = copy.copy(value) jsonpath = _normalize_jsonpath(jsonpath) + recurse = recurse or {} if not jsonpath == '$' and not jsonpath.startswith('$.'): LOG.error('The provided jsonpath %s does not begin with "." or "$"', jsonpath) + # TODO(felipemonteiro): Use a custom internal exception for this. raise ValueError('The provided jsonpath %s does not begin with "." ' 'or "$"' % jsonpath) - def _execute_replace(path, path_to_change): - if path_to_change: - new_value = value_copy - if pattern: - to_replace = path_to_change[0].value - # `new_value` represents the value to inject into `to_replace` - # that matches the `pattern`. - try: - # A pattern requires us to look up the data located at - # data[jsonpath] and then figure out what - # re.match(data[jsonpath], pattern) is (in pseudocode). - # Raise an exception in case the path isn't present in the - # data and a pattern has been provided since it is - # otherwise impossible to do the look-up. - new_value = re.sub(pattern, str(value_copy), to_replace) - except TypeError as e: - LOG.error('Failed to substitute the value %s into %s ' - 'using pattern %s. Details: %s', str(value_copy), - to_replace, pattern, six.text_type(e)) - raise errors.MissingDocumentPattern(jsonpath=jsonpath, - pattern=pattern) - - return path.update(data_copy, new_value) - # Deckhand should be smart enough to create the nested keys in the # data if they don't exist and a pattern isn't required. path = _jsonpath_parse(jsonpath) path_to_change = path.find(data_copy) if not path_to_change: - _execute_data_expansion(jsonpath, data_copy) - path_to_change = path.find(data_copy) - return _execute_replace(path, path_to_change) + _execute_data_expansion(data_copy, jsonpath) + return _execute_replace(data_copy, value_copy, jsonpath, pattern=pattern, + recurse=recurse) def multisort(data, sort_by=None, order_by=None): diff --git a/deckhand/engine/schemas/metadata_document.yaml b/deckhand/engine/schemas/metadata_document.yaml index 57379b12..a64067b4 100644 --- a/deckhand/engine/schemas/metadata_document.yaml +++ b/deckhand/engine/schemas/metadata_document.yaml @@ -27,6 +27,17 @@ data: type: string pattern: type: string + recurse: + type: object + properties: + depth: + type: integer + minimum: -1 + # -1 indicates that the recursion depth is infinite. Refinements + # to this value should be specified by the caller. + default: -1 + required: + - depth additionalProperties: false required: - path diff --git a/deckhand/engine/secrets_manager.py b/deckhand/engine/secrets_manager.py index ba86b0b0..0ef1081f 100644 --- a/deckhand/engine/secrets_manager.py +++ b/deckhand/engine/secrets_manager.py @@ -320,6 +320,7 @@ class SecretsSubstitution(object): for each_dest_path in dest_array: dest_path = each_dest_path['path'] dest_pattern = each_dest_path.get('pattern', None) + dest_recurse = each_dest_path.get('recurse', {}) LOG.debug('Substituting from schema=%s layer=%s name=%s ' 'src_path=%s into dest_path=%s, dest_pattern=%s', @@ -329,8 +330,8 @@ class SecretsSubstitution(object): try: exc_message = '' substituted_data = utils.jsonpath_replace( - document['data'], src_secret, - dest_path, dest_pattern) + document['data'], src_secret, dest_path, + pattern=dest_pattern, recurse=dest_recurse) if (isinstance(document['data'], dict) and isinstance(substituted_data, dict)): document['data'].update(substituted_data) diff --git a/deckhand/tests/unit/common/test_utils.py b/deckhand/tests/unit/common/test_utils.py index ac55c3b6..42928de4 100644 --- a/deckhand/tests/unit/common/test_utils.py +++ b/deckhand/tests/unit/common/test_utils.py @@ -57,6 +57,135 @@ class TestJSONPathReplace(test_base.DeckhandTestCase): pattern="REGEX") self.assertEqual(expected, result) + def test_jsonpath_replace_with_pattern_and_array_index(self): + path = ".values.endpoints.admin[1]" + body = {"values": {"endpoints": {"admin": [None, "REGEX_FRESH"]}}} + expected = {"values": {"endpoints": {"admin": [None, "EAT_FRESH"]}}} + result = utils.jsonpath_replace(body, "EAT", jsonpath=path, + pattern="REGEX") + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_pattern_recursive_dict(self): + path = ".values" + body = {"values": {"re1": "REGEX_ONE", "re2": "REGEX_TWO"}} + expected = {"values": {"re1": "YES_ONE", "re2": "YES_TWO"}} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_pattern_recursive_list(self): + path = ".values" + + # String entries inside list. + body = {"values": ["REGEX_ONE", "REGEX_TWO"]} + expected = {"values": ["YES_ONE", "YES_TWO"]} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + # Dictionary entries inside list. + body = {"values": [{"re1": "REGEX_ONE", "re2": "REGEX_TWO"}]} + expected = {"values": [{"re1": "YES_ONE", "re2": "YES_TWO"}]} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_pattern_recursive_str(self): + """Edge case to validate that passing in a path that leads to a string + value itself (not a list or dict) still results in pattern replacement + gracefully passing, even though no recursion is technically possible. + """ + path = ".values.endpoints.admin" + body = {"values": {"endpoints": {"admin": "REGEX_FRESH"}}} + expected = {"values": {"endpoints": {"admin": "EAT_FRESH"}}} + result = utils.jsonpath_replace(body, "EAT", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_pattern_recursive_dict_nested(self): + path = ".values" + body = {"values": {"re1": "REGEX_ONE", "nested": {"re2": "REGEX_TWO"}}} + expected = {"values": {"re1": "YES_ONE", "nested": {"re2": "YES_TWO"}}} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_pattern_recursive_list_nested(self): + path = ".values" + + # String entry inside nested list. + body = {"values": [{"re1": "REGEX_ONE", "nested": ["REGEX_TWO"]}]} + expected = {"values": [{"re1": "YES_ONE", "nested": ["YES_TWO"]}]} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + # Dictionary entry inside nested list. + body = {"values": [{"nested": [{"re2": "REGEX_TWO"}]}]} + expected = {"values": [{"nested": [{"re2": "YES_TWO"}]}]} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_pattern_recursive_root_path(self): + """Validate that recursion happens even from root path.""" + path = "$" + body = {"values": {"re1": "REGEX_ONE", "nested": {"re2": "REGEX_TWO"}}} + expected = {"values": {"re1": "YES_ONE", "nested": {"re2": "YES_TWO"}}} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_different_patterns_recursive(self): + """Edge case to validate that different regexes that live recursively + under the same parent path are handled gracefully. Note that + non-matching regexes are obviously skipped over. + """ + path = ".values" + + # Only the first string's pattern will be replaced since it'll match + # REGEX. The second one won't as its pattern is XEGER. + body = {"values": [{"re1": "REGEX_ONE", "nested": ["XEGER_TWO"]}]} + expected = {"values": [{"re1": "YES_ONE", "nested": ["XEGER_TWO"]}]} + result1 = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", + recurse={'depth': -1}) + self.assertEqual(expected, result1) + + # Now replace the second one by passing in pattern="XEGER". + expected = {"values": [{"re1": "YES_ONE", "nested": ["NO_TWO"]}]} + result2 = utils.jsonpath_replace(result1, "NO", jsonpath=path, + pattern="XEGER", + recurse={'depth': -1}) + self.assertEqual(expected, result2) + + def test_jsonpath_replace_with_recursion_depth_specified(self): + # Only the first string's pattern will be replaced since it'll + # only recurse 1 level. + body = {"re1": "REGEX_ONE", "values": {"re2": "REGEX_TWO"}} + expected = {"re1": "YES_ONE", "values": {"re2": "REGEX_TWO"}} + result = utils.jsonpath_replace(body, "YES", jsonpath="$", + pattern="REGEX", + recurse={'depth': 1}) + self.assertEqual(expected, result) + + # Depth of 2 should cover both. + body = {"re1": "REGEX_ONE", "values": {"re2": "REGEX_TWO"}} + expected = {"re1": "YES_ONE", "values": {"re2": "YES_TWO"}} + result = utils.jsonpath_replace(body, "YES", jsonpath="$", + pattern="REGEX", + recurse={'depth': 2}) + self.assertEqual(expected, result) + + # Depth of 3 is required as the list around "REGEX_TWO" results in + # another layer of recursion. + body = {"re1": "REGEX_ONE", "values": {"re2": ["REGEX_TWO"]}} + expected = {"re1": "YES_ONE", "values": {"re2": ["YES_TWO"]}} + result = utils.jsonpath_replace(body, "YES", jsonpath="$", + pattern="REGEX", + recurse={'depth': 3}) + self.assertEqual(expected, result) + class TestJSONPathReplaceNegative(test_base.DeckhandTestCase): """Validate JSONPath replace negative scenarios.""" diff --git a/doc/source/substitution.rst b/doc/source/substitution.rst index d9bacbe2..00020325 100644 --- a/doc/source/substitution.rst +++ b/doc/source/substitution.rst @@ -255,6 +255,49 @@ document) will be: --- schema: armada/Chart/v1 + metadata: + name: example-chart-01 + schema: metadata/Document/v1 + [...] + data: + chart: + details: + data: here + values: + # Notice string replacement occurs at exact location specified by + # ``dest.pattern``. + some_url: http://admin:my-secret-password@service-name:8080/v1 + +Recursive Replacement of Patterns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Patterns may also be replaced recursively. This can be achieved by using +specifying a ``pattern`` value and ``recurse`` as ``True`` (it otherwise +defaults to ``False``). Best practice is to limit the scope of the recursion +as much as possible: e.g. avoid passing in "$" as the ``jsonpath``, but rather +a JSON path that lives closer to the nested strings in question. + +.. note:: + + Recursive selection of patterns will only consider matching patterns. + Non-matching patterns will be ignored. Thus, even if recursion can "pass + over" non-matching patterns, they will be silently ignored. + +.. code-block:: yaml + + --- + # Source document. + schema: deckhand/Passphrase/v1 + metadata: + name: example-password + schema: metadata/Document/v1 + layeringDefinition: + layer: site + storagePolicy: cleartext + data: my-secret-password + --- + # Destination document. + schema: armada/Chart/v1 metadata: name: example-chart-01 schema: metadata/Document/v1 @@ -262,12 +305,40 @@ document) will be: layer: region substitutions: - dest: - path: .chart.values.some_url + # Note that the path encapsulates all 3 entries that require pattern + # replacement. + path: .chart.values pattern: INSERT_[A-Z]+_HERE + recurse: + # Note that specifying the depth is mandatory. -1 means that all + # layers are recursed through. + depth: -1 src: schema: deckhand/Passphrase/v1 name: example-password path: . + data: + chart: + details: + data: here + values: + # Notice string replacement occurs for all paths recursively captured + # by dest.path, since all their patterns match dest.pattern. + admin_url: http://admin:INSERT_PASSWORD_HERE@service-name:35357/v1 + internal_url: http://internal:INSERT_PASSWORD_HERE@service-name:5000/v1 + public_url: http://public:INSERT_PASSWORD_HERE@service-name:5000/v1 + +After document rendering, the output for ``example-chart-01`` (the destination +document) will be: + +.. code-block:: yaml + + --- + schema: armada/Chart/v1 + metadata: + name: example-chart-01 + schema: metadata/Document/v1 + [...] data: chart: details: @@ -275,7 +346,14 @@ document) will be: values: # Notice how the data from the source document is injected into the # exact location specified by ``dest.pattern``. - some_url: http://admin:my-secret-password@service-name:8080/v1 + admin_url: http://admin:my-secret-password@service-name:35357/v1 + internal_url: http://internal:my-secret-passwor@service-name:5000/v1 + public_url: http://public:my-secret-passwor@service-name:5000/v1 + +Note that the recursion depth must be specified. -1 effectively ignores the +depth. Any other positive integer will specify how many levels deep to recurse +in order to optimize recursive pattern replacement. Take care to specify the +required recursion depth or else too-deep patterns won't be replaced. Substitution of Encrypted Data ------------------------------