From 871d2df290d829b9cafb0dc383dc3d65810c2dac Mon Sep 17 00:00:00 2001 From: Ben Nemec Date: Mon, 27 Aug 2018 17:44:40 +0000 Subject: [PATCH] Filter args dict in JSONFormatter In most formatters, any unused keys in the args dict will just be discarded. Because JSONFormatter logged the entire dict in addition to the message, some values were included in the output that may not have been intended. This could include sensitive data, so we should stop that. In the interest of maintaining compatibility with any tools that are reading the args dict, we leave the dict but filter out any unused keys. Change-Id: Ib64837c1ae93a27bef3d30a776320a373f18dd1c Closes-Bug: 1571714 Closes-Bug: 1787214 (cherry picked from commit a93c6ef98c8aeddc5a4ae87083689225fbc728bb) --- oslo_log/formatters.py | 17 ++++++++++++++++- oslo_log/tests/unit/test_log.py | 16 ++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/oslo_log/formatters.py b/oslo_log/formatters.py index 25450354..8386ccc1 100644 --- a/oslo_log/formatters.py +++ b/oslo_log/formatters.py @@ -17,6 +17,7 @@ import itertools import logging import logging.config import logging.handlers +import re import socket import sys import traceback @@ -180,6 +181,9 @@ class _ReplaceFalseValue(dict): return dict.get(self, key, None) or '-' +_MSG_KEY_REGEX = re.compile('(%+)\((\w+)\)') + + class JSONFormatter(logging.Formatter): def __init__(self, fmt=None, datefmt=None, style='%'): # NOTE(sfinucan) we ignore the fmt and style arguments, but they're @@ -206,11 +210,22 @@ class JSONFormatter(logging.Formatter): return lines def format(self, record): + args = record.args + if isinstance(args, dict): + msg_keys = _MSG_KEY_REGEX.findall(record.msg) + # NOTE(bnemec): The logic around skipping escaped placeholders is + # tricky and error-prone to include in the regex. Much easier to + # just grab them all and filter after the fact. + msg_keys = [m[1] for m in msg_keys if len(m[0]) == 1] + # If no named keys were found, then the entire dict must have been + # the value to be formatted. Don't filter anything. + if msg_keys: + args = {k: v for k, v in args.items() if k in msg_keys} message = {'message': record.getMessage(), 'asctime': self.formatTime(record, self.datefmt), 'name': record.name, 'msg': record.msg, - 'args': record.args, + 'args': args, 'levelname': record.levelname, 'levelno': record.levelno, 'pathname': record.pathname, diff --git a/oslo_log/tests/unit/test_log.py b/oslo_log/tests/unit/test_log.py index 78785250..84950a4e 100644 --- a/oslo_log/tests/unit/test_log.py +++ b/oslo_log/tests/unit/test_log.py @@ -591,6 +591,22 @@ class JSONFormatterTestCase(LogTestBase): # convert it using repr() to prevent serialization error on logging. self.assertEqual(['repr'], data['args']) + def test_extra_args_filtered(self): + test_msg = 'This is a %(test)s line %%(unused)' + test_data = {'test': 'log', 'unused': 'removeme'} + self.log.debug(test_msg, test_data) + + data = jsonutils.loads(self.stream.getvalue()) + self.assertNotIn('unused', data['args']) + + def test_entire_dict(self): + test_msg = 'This is a %s dict' + test_data = {'test': 'log', 'other': 'value'} + self.log.debug(test_msg, test_data) + + data = jsonutils.loads(self.stream.getvalue()) + self.assertEqual(test_data, data['args']) + def get_fake_datetime(retval): class FakeDateTime(datetime.datetime):