diff --git a/src/bin/shipyard_airflow/shipyard_airflow/control/util/redactor.py b/src/bin/shipyard_airflow/shipyard_airflow/control/util/redactor.py index cf9c4054..d54480f3 100644 --- a/src/bin/shipyard_airflow/shipyard_airflow/control/util/redactor.py +++ b/src/bin/shipyard_airflow/shipyard_airflow/control/util/redactor.py @@ -50,6 +50,10 @@ class Redactor(): patterns :param double_patterns: list of additional double capture group regex patterns + :param nonkey_patterns: list of additional non-key-based double capture + group regex patterns. These are always applied to messages. + Note: Regex patterns are processed using re.DOTALL, so newline characters + are included in the '.' pattern. """ # Start with the values defined in strutils _KEYS = list(_SANITIZE_KEYS) @@ -64,17 +68,25 @@ class Redactor(): # More two capture group patterns _DOUBLE_CG_PATTERNS.extend([]) + # Set up the non-key patterns. These can be precompiled. + _NONKEY_CG_PATTERNS = [ + re.compile(r'([^\s]+:\/\/[^:]+:)[^@\s]+(@[^\s]+)', re.DOTALL) + ] + def __init__(self, redaction='***', keys=None, single_patterns=None, - double_patterns=None): + double_patterns=None, + nonkey_patterns=None): if keys is None: keys = [] if single_patterns is None: single_patterns = [] if double_patterns is None: double_patterns = [] + if nonkey_patterns is None: + nonkey_patterns = [] self.redaction = redaction @@ -89,7 +101,10 @@ class Redactor(): self._single_cg_patterns = self._gen_patterns(patterns=singles) self._double_cg_patterns = self._gen_patterns(patterns=doubles) - # the two capture group patterns + self._non_key_patterns = Redactor._NONKEY_CG_PATTERNS + for p in nonkey_patterns: + rx = re.compile(p, re.DOTALL) + self._non_key_patterns.append(rx) def _gen_patterns(self, patterns): """Initialize the redaction patterns""" @@ -116,4 +131,8 @@ class Redactor(): message = re.sub(pattern, substitute2, message) for pattern in self._single_cg_patterns[key]: message = re.sub(pattern, substitute1, message) + # Apply the nonkey patterns + for pattern in self._non_key_patterns: + message = re.sub(pattern, substitute2, message) + return message diff --git a/src/bin/shipyard_airflow/tests/unit/control/test_redaction.py b/src/bin/shipyard_airflow/tests/unit/control/test_redaction.py index c3a4679d..8e0f21bc 100644 --- a/src/bin/shipyard_airflow/tests/unit/control/test_redaction.py +++ b/src/bin/shipyard_airflow/tests/unit/control/test_redaction.py @@ -73,6 +73,18 @@ password: """ assert redactor.redact(to_redact) == expected + # Covering database connection urls, for example + to_redact = "This is a basic auth url something+x://shipy:passw@postg" + expected = "This is a basic auth url something+x://shipy:***@postg" + assert redactor.redact(to_redact) == expected + + to_redact = """ +a+http://usr:pssw@fakeurl.fake -37!!! with b+http://usr2:sword@fakeurl.fake +""" + expected = """ +a+http://usr:***@fakeurl.fake -37!!! with b+http://usr2:***@fakeurl.fake +""" + def test_extended_keys_redactor(self): redactor = Redactor(redaction="++++", keys=['trains']) to_redact = """ @@ -89,6 +101,30 @@ password: """ assert redactor.redact(to_redact) == expected + def test_extra_nonkey_patterns(self): + redactor = Redactor(nonkey_patterns=[r'(toes).*(nose)']) + to_redact = 'do not put toes by your nose' + expected = 'do not put toes***nose' + assert redactor.redact(to_redact) == expected + + to_redact = """ +do not put toes + by +your nose +""" + expected = """ +do not put toes***nose +""" + assert redactor.redact(to_redact) == expected + + def test_extra_keyed_patterns(self): + redactor = Redactor( + single_patterns=[r'(%(key)s\s*[#]\s*)[^\s^\'^\"]+'], + double_patterns=[r'(%(key)s\s*[#]\s*[\"\'])[^\"\']*([\"\'])']) + to_redact = 'password# hi and password # "hi"' + expected = 'password# *** and password # "***"' + assert redactor.redact(to_redact) == expected + def test_redaction_formatter(self, caplog): # since root logging is setup by prior tests need to remove all # handlers to simulate a clean environment of setting up this