From 56b2c89c39e45bc2307113ea12ae584d4f1e11a1 Mon Sep 17 00:00:00 2001
From: Monty Taylor <mordred@inaugust.com>
Date: Wed, 20 Sep 2017 12:11:40 -0500
Subject: [PATCH] Split request logging into four different loggers

Python logging is pretty amazingly flexible, and allows us to emit
to arbitrary logging domains so that a consumer can direct log output
with specificity.

Turning on HTTP debug logging currently produces an avalanche of output,
when sometimes just seeing that the requests were made and responded to
is perfectly fine.

Split the loggers used into four - one for request ids, one for request
commands, one for response headers and one for response body content.
Make them subloggers of keystoneauth.session so that if a user does nothing,
their existing logging config will be unchanged.

If someone passes in a logger, behave as before logging all things to
the provided logger.

While we're at it, document this in the using-sessions document, so that
people know that the loggers exist and what they do.

NOTE:
The tox (>=1.7.0) by default sets a random python hash seed which
causes ordering of dicts and sets to be different between tests runs.
Disabled the random python hash seed by setting PYTHONHASHSEED=0 to
fix the random failure of below test:

keystoneauth1.tests.unit.test_session.SessionAuthTests.
test_split_loggers

The PYTHONHASHSEED=0 is removed in the followup patch so that we can
separate the tracking down of ordering issues in tests from this patch.

Change-Id: Ide7dac8adf5c76c9019c35867cda632aff39770f
---
 doc/source/using-sessions.rst            | 161 ++++++++++++++++++++++-
 keystoneauth1/session.py                 | 123 ++++++++++++-----
 keystoneauth1/tests/unit/test_session.py |  54 ++++++++
 tox.ini                                  |   1 +
 4 files changed, 307 insertions(+), 32 deletions(-)

diff --git a/doc/source/using-sessions.rst b/doc/source/using-sessions.rst
index ace130b0..a66cfff2 100644
--- a/doc/source/using-sessions.rst
+++ b/doc/source/using-sessions.rst
@@ -39,6 +39,10 @@ Features
   plugins. Discovery information is automatically cached in memory, so the user
   need not worry about excessive use of discovery metadata.
 
+- Safe logging of HTTP interactions
+
+  Clients need to be able to enable logging of the HTTP interactions, but some
+  things, such as the token or secrets, need to be ommitted.
 
 Sessions for Users
 ==================
@@ -433,8 +437,163 @@ expects ``volume``.
                                         'min_version': '3',
                                         'max_version': 'latest'})
 
+Logging
+=======
+
+The logging system uses standard `python logging`_ rooted on the
+``keystoneauth`` namespace as would be expected. There are two possibilities
+of where log messages about HTTP interactions will go.
+
+By default, all messages will go to the ``keystoneauth.session`` logger.
+
+If the ``split_loggers`` option on the :class:`keystoneauth1.session.Session`
+constructor is set to ``True``, the HTTP content will be split across four
+subloggers to allow for fine-grained control of what is logged and how:
+
+keystoneauth.session.request-id
+  Emits a log entry at the ``DEBUG`` level for every http request
+  including information about the URL, ``service-type`` and ``request-id``.
+
+keystoneauth.session.request
+  Emits a log entry at the ``DEBUG`` level for every http request including a
+  curl formatted string of the request.
+
+keystoneauth.session.response
+  Emits a log entry at the ``DEBUG`` level for every http response received,
+  including the status code, and the headers received.
+
+keystoneauth.session.body
+  Emits a log entry at the ``DEBUG`` level containing the contents of the
+  response body if the ``content-type`` is either ``text`` or ``json``.
+
+Using loggers
+-------------
+
+A full description of how to consume `python logging`_ is out of scope of this
+document, but a few simple examples are provided.
+
+If you would like to configure logging to log keystoneuath at the ``INFO``
+level with no ``DEBUG`` messages:
+
+.. code-block:: python
+
+  import keystoneauth1
+  import logging
+
+  logger = logging.getLogger('keystoneauth')
+  logger.addHandler(logging.StreamHandler())
+  logger.setLevel(logging.INFO)
+
+If you would like to get a full HTTP debug trace including bodies:
+
+.. code-block:: python
+
+  import keystoneauth1
+  import logging
+
+  logger = logging.getLogger('keystoneauth')
+  logger.addHandler(logging.StreamHandler())
+  logger.setLevel(logging.DEBUG)
+
+If you would like to get a full HTTP debug trace bug with no bodies:
+
+.. code-block:: python
+
+  import keystoneauth1
+  import keystoneauth1.session
+  import logging
+
+  logger = logging.getLogger('keystoneauth')
+  logger.addHandler(logging.StreamHandler())
+  logger.setLevel(logging.DEBUG)
+  body_logger = logging.getLogger('keystoneauth.session.body')
+  body_logger.setLevel(logging.WARN)
+  session = keystoneauth1.session.Session(split_loggers=True)
+
+Finally, if you would like to log request-ids and response headers to one file,
+request commands, response headers and response bodies to a different file,
+and everything else to the console:
+
+.. code-block:: python
+
+  import keystoneauth1
+  import keystoneauth1.session
+  import logging
+
+  # Create a handler that outputs only outputs INFO level messages to stdout
+  stream_handler = logging.StreamHandler()
+  stream_handler.setLevel(logging.INFO)
+
+  # Configure the default behavior of all keystoneauth logging to log at the
+  # INFO level.
+  logger = logging.getLogger('keystoneauth')
+  logger.setLevel(logging.INFO)
+
+  # Emit INFO messages from all keystoneauth loggers to stdout
+  logger.addHandler(stream_handler)
+
+  # Create an output formatter that includes logger name and timestamp.
+  formatter = logging.Formatter('%(asctime)s %(name)s %(message)s')
+
+  # Create a file output for request ids and response headers
+  request_handler = logging.FileHandler('request.log')
+  request_handler.setFormatter(formatter)
+
+  # Create a file output for request commands, response headers and bodies
+  body_handler = logging.FileHandler('response-body.log')
+  body_handler.setFormatter(formatter)
+
+  # Log all HTTP interactions at the DEBUG level
+  session_logger = logging.getLogger('keystoneauth.session')
+  session_logger.setLevel(logging.DEBUG)
+
+  # Emit request ids to the request log
+  request_id_logger = logging.getLogger('keystoneauth.session.request-id')
+  request_id_logger.addHandler(request_handler)
+
+  # Emit response headers to both the request log and the body log
+  header_logger = logging.getLogger('keystoneauth.session.response')
+  header_logger.addHandler(request_handler)
+  header_logger.addHandler(body_handler)
+
+  # Emit request commands to the body log
+  request_logger = logging.getLogger('keystoneauth.session.request')
+  request_logger.addHandler(body_handler)
+
+  # Emit bodies only to the body log
+  body_logger = logging.getLogger('keystoneauth.session.body')
+  body_logger.addHandler(body_handler)
+
+  session = keystoneauth1.session.Session(split_loggers=True)
+
+The above will produce messages like the following in request.log:
+
+::
+
+  2017-09-19 22:10:09,466 keystoneauth.session.request-id  GET call to volumev2 for http://cloud.example.com/volume/v2/137155c35fb34172a284a3c2540c92ab/volumes/detail used request id req-f4f2058a-9308-4c4a-94e6-5ee1cd6c78bd
+  2017-09-19 22:10:09,751 keystoneauth.session.response    [200] Date: Tue, 19 Sep 2017 22:10:09 GMT Server: Apache/2.4.18 (Ubuntu) x-compute-request-id: req-2e9181d2-9f3e-404e-a12f-1f1566736ab3 Content-Type: application/json Content-Length: 15 x-openstack-request-id: req-2e9181d2-9f3e-404e-a12f-1f1566736ab3 Connection: close
+
+And content like the following into response-body.log:
+
+::
+
+  2017-09-19 22:10:09,490 keystoneauth.session.request     curl -g -i -X GET http://cloud.example.com/volume/v2/137155c35fb34172a284a3c2540c92ab/volumes/detail?marker=34cd00cf-bf67-4667-a900-5ce233e383d5 -H "User-Agent: os-client-config/1.28.0 shade/1.23.1 keystoneauth1/3.2.0 python-requests/2.18.4 CPython/2.7.12" -H "X-Auth-Token: {SHA1}a1d03d2a4cbee590a55f1786d452e1027d5fd781"
+  2017-09-19 22:10:09,751 keystoneauth.session.response    [200] Date: Tue, 19 Sep 2017 22:10:09 GMT Server: Apache/2.4.18 (Ubuntu) x-compute-request-id: req-2e9181d2-9f3e-404e-a12f-1f1566736ab3 Content-Type: application/json Content-Length: 15 x-openstack-request-id: req-2e9181d2-9f3e-404e-a12f-1f1566736ab3 Connection: close
+  2017-09-19 22:10:09,751 keystoneauth.session.body        {"volumes": []}
+
+User Provided Loggers
+---------------------
+
+The HTTP methods (request, get, post, put, etc) on
+`keystoneauth1.session.Session` and `keystoneauth1.adapter.Adapter` all support
+a ``logger`` parameter. A user can provide their own `logger`_ which will
+override the session loggers mentioned above. If a single logger is provided
+in this manner, request, response and body content will all be logged to that
+logger at the ``DEBUG`` level, and the strings ``REQ:``, ``RESP:`` and
+``RESP BODY:`` will be pre-pended as appropriate.
 
 .. _API-WG Specs: http://specs.openstack.org/openstack/api-wg/
 .. _Consuming the Catalog: http://specs.openstack.org/openstack/api-wg/guidelines/consuming-catalog.html
 .. _Microversions: http://specs.openstack.org/openstack/api-wg/guidelines/microversion_specification.html#version-discovery
-
+.. _python logging: https://docs.python.org/3/library/logging.html
+.. _logger: https://docs.python.org/3/library/logging.html#logging.Logger
diff --git a/keystoneauth1/session.py b/keystoneauth1/session.py
index d0794e93..53ea6e4f 100644
--- a/keystoneauth1/session.py
+++ b/keystoneauth1/session.py
@@ -50,8 +50,6 @@ DEFAULT_USER_AGENT = 'keystoneauth1/%s %s %s/%s' % (
 # here and we'll add it to the list as required.
 _LOG_CONTENT_TYPES = set(['application/json'])
 
-_logger = utils.get_logger(__name__)
-
 
 def _construct_session(session_obj=None):
     # NOTE(morganfainberg): if the logic in this function changes be sure to
@@ -244,6 +242,8 @@ class Session(object):
                                  that do not share an auth plugin, it can
                                  be provided here. (optional, defaults to
                                  None which means automatically manage)
+    :param bool split_loggers: Split the logging of requests across multiple
+                               loggers instead of just one. Defaults to False.
     """
 
     user_agent = None
@@ -256,7 +256,7 @@ class Session(object):
                  cert=None, timeout=None, user_agent=None,
                  redirect=_DEFAULT_REDIRECT_LIMIT, additional_headers=None,
                  app_name=None, app_version=None, additional_user_agent=None,
-                 discovery_cache=None):
+                 discovery_cache=None, split_loggers=None):
 
         self.auth = auth
         self.session = _construct_session(session)
@@ -273,6 +273,7 @@ class Session(object):
         if discovery_cache is None:
             discovery_cache = {}
         self._discovery_cache = discovery_cache
+        self._split_loggers = split_loggers
 
         if timeout is not None:
             self.timeout = float(timeout)
@@ -324,16 +325,33 @@ class Session(object):
             return (header[0], '{SHA1}%s' % token_hash)
         return header
 
+    def _get_split_loggers(self, split_loggers):
+        if split_loggers is None:
+            split_loggers = self._split_loggers
+        if split_loggers is None:
+            split_loggers = False
+        return split_loggers
+
     def _http_log_request(self, url, method=None, data=None,
                           json=None, headers=None, query_params=None,
-                          logger=_logger):
+                          logger=None, split_loggers=None):
+        string_parts = []
+
+        if self._get_split_loggers(split_loggers):
+            logger = utils.get_logger(__name__ + '.request')
+        else:
+            # Only a single logger was passed in, use string prefixing.
+            string_parts.append('REQ:')
+            if not logger:
+                logger = utils.get_logger(__name__)
+
         if not logger.isEnabledFor(logging.DEBUG):
             # NOTE(morganfainberg): This whole debug section is expensive,
             # there is no need to do the work if we're not going to emit a
             # debug log.
             return
 
-        string_parts = ['REQ: curl -g -i']
+        string_parts.append('curl -g -i')
 
         # NOTE(jamielennox): None means let requests do its default validation
         # so we need to actually check that this is False.
@@ -356,7 +374,8 @@ class Session(object):
             string_parts.append(url)
 
         if headers:
-            for header in headers.items():
+            # Sort headers so that testing can work consistently.
+            for header in sorted(headers.items()):
                 string_parts.append('-H "%s: %s"'
                                     % self._process_header(header))
         if json:
@@ -373,7 +392,18 @@ class Session(object):
 
     def _http_log_response(self, response=None, json=None,
                            status_code=None, headers=None, text=None,
-                           logger=_logger):
+                           logger=None, split_loggers=True):
+        string_parts = []
+        body_parts = []
+        if self._get_split_loggers(split_loggers):
+            logger = utils.get_logger(__name__ + '.response')
+            body_logger = utils.get_logger(__name__ + '.body')
+        else:
+            # Only a single logger was passed in, use string prefixing.
+            string_parts.append('RESP:')
+            body_parts.append('RESP BODY:')
+            body_logger = logger
+
         if not logger.isEnabledFor(logging.DEBUG):
             return
 
@@ -382,6 +412,19 @@ class Session(object):
                 status_code = response.status_code
             if not headers:
                 headers = response.headers
+
+        if status_code:
+            string_parts.append('[%s]' % status_code)
+        if headers:
+            # Sort headers so that testing can work consistently.
+            for header in sorted(headers.items()):
+                string_parts.append('%s: %s' % self._process_header(header))
+        logger.debug(' '.join(string_parts))
+
+        if not body_logger.isEnabledFor(logging.DEBUG):
+            return
+
+        if response is not None:
             if not text:
                 # NOTE(samueldmq): If the response does not provide enough info
                 # about the content type to decide whether it is useful and
@@ -406,17 +449,9 @@ class Session(object):
         if json:
             text = self._json.encode(json)
 
-        string_parts = ['RESP:']
-
-        if status_code:
-            string_parts.append('[%s]' % status_code)
-        if headers:
-            for header in headers.items():
-                string_parts.append('%s: %s' % self._process_header(header))
         if text:
-            string_parts.append('\nRESP BODY: %s\n' % text)
-
-        logger.debug(' '.join(string_parts))
+            body_parts.append(text)
+            body_logger.debug(' '.join(body_parts))
 
     @staticmethod
     def _set_microversion_headers(
@@ -465,7 +500,7 @@ class Session(object):
                 user_agent=None, redirect=None, authenticated=None,
                 endpoint_filter=None, auth=None, requests_auth=None,
                 raise_exc=True, allow_reauth=True, log=True,
-                endpoint_override=None, connect_retries=0, logger=_logger,
+                endpoint_override=None, connect_retries=0, logger=None,
                 allow=None, client_name=None, client_version=None,
                 microversion=None, microversion_service_type=None,
                 **kwargs):
@@ -560,6 +595,14 @@ class Session(object):
 
         :returns: The response to the request.
         """
+        # If a logger is passed in, use it and do not log requests, responses
+        # and bodies separately.
+        if logger:
+            split_loggers = False
+        else:
+            split_loggers = None
+        logger = logger or utils.get_logger(__name__)
+
         headers = kwargs.setdefault('headers', dict())
         if microversion:
             self._set_microversion_headers(
@@ -671,7 +714,7 @@ class Session(object):
                                    data=kwargs.get('data'),
                                    headers=headers,
                                    query_params=query_params,
-                                   logger=logger)
+                                   logger=logger, split_loggers=split_loggers)
 
         # Force disable requests redirect handling. We will manage this below.
         kwargs['allow_redirects'] = False
@@ -681,7 +724,7 @@ class Session(object):
 
         send = functools.partial(self._send_request,
                                  url, method, redirect, log, logger,
-                                 connect_retries)
+                                 split_loggers, connect_retries)
 
         try:
             connection_params = self.get_auth_connection_params(auth=auth)
@@ -713,13 +756,29 @@ class Session(object):
             request_id = (resp.headers.get('x-openstack-request-id') or
                           resp.headers.get('x-compute-request-id'))
             if request_id:
-                logger.debug('%(method)s call to %(service_name)s for '
-                             '%(url)s used request id '
-                             '%(response_request_id)s',
-                             {'method': resp.request.method,
-                              'service_name': service_name,
-                              'url': resp.url,
-                              'response_request_id': request_id})
+                if self._get_split_loggers(split_loggers):
+                    id_logger = utils.get_logger(__name__ + '.request-id')
+                else:
+                    id_logger = logger
+                if service_name:
+                    id_logger.debug(
+                        '%(method)s call to %(service_name)s for '
+                        '%(url)s used request id '
+                        '%(response_request_id)s', {
+                            'method': resp.request.method,
+                            'service_name': service_name,
+                            'url': resp.url,
+                            'response_request_id': request_id
+                        })
+                else:
+                    id_logger.debug(
+                        '%(method)s call to '
+                        '%(url)s used request id '
+                        '%(response_request_id)s', {
+                            'method': resp.request.method,
+                            'url': resp.url,
+                            'response_request_id': request_id
+                        })
 
         # handle getting a 401 Unauthorized response by invalidating the plugin
         # and then retrying the request. This is only tried once.
@@ -738,7 +797,7 @@ class Session(object):
 
         return resp
 
-    def _send_request(self, url, method, redirect, log, logger,
+    def _send_request(self, url, method, redirect, log, logger, split_loggers,
                       connect_retries, connect_retry_delay=0.5, **kwargs):
         # NOTE(jamielennox): We handle redirection manually because the
         # requests lib follows some browser patterns where it will redirect
@@ -784,13 +843,15 @@ class Session(object):
             time.sleep(connect_retry_delay)
 
             return self._send_request(
-                url, method, redirect, log, logger,
+                url, method, redirect, log, logger, split_loggers,
                 connect_retries=connect_retries - 1,
                 connect_retry_delay=connect_retry_delay * 2,
                 **kwargs)
 
         if log:
-            self._http_log_response(response=resp, logger=logger)
+            self._http_log_response(
+                response=resp, logger=logger,
+                split_loggers=split_loggers)
 
         if resp.status_code in self._REDIRECT_STATUSES:
             # be careful here in python True == 1 and False == 0
@@ -812,7 +873,7 @@ class Session(object):
                 # NOTE(jamielennox): We don't pass through connect_retry_delay.
                 # This request actually worked so we can reset the delay count.
                 new_resp = self._send_request(
-                    location, method, redirect, log, logger,
+                    location, method, redirect, log, logger, split_loggers,
                     connect_retries=connect_retries,
                     **kwargs)
 
diff --git a/keystoneauth1/tests/unit/test_session.py b/keystoneauth1/tests/unit/test_session.py
index 5aa0ccba..8d18949a 100644
--- a/keystoneauth1/tests/unit/test_session.py
+++ b/keystoneauth1/tests/unit/test_session.py
@@ -956,6 +956,60 @@ class SessionAuthTests(utils.TestCase):
         self.assertNotIn(list(response.keys())[0], self.logger.output)
         self.assertNotIn(list(response.values())[0], self.logger.output)
 
+    def test_split_loggers(self):
+
+        def get_logger_io(name):
+            logger_name = 'keystoneauth.session.{name}'.format(name=name)
+            logger = logging.getLogger(logger_name)
+            logger.setLevel(logging.DEBUG)
+
+            io = six.StringIO()
+            handler = logging.StreamHandler(io)
+            logger.addHandler(handler)
+            return io
+
+        io = {}
+        for name in ('request', 'body', 'response', 'request-id'):
+            io[name] = get_logger_io(name)
+
+        auth = AuthPlugin()
+        sess = client_session.Session(auth=auth, split_loggers=True)
+        response_key = uuid.uuid4().hex
+        response_val = uuid.uuid4().hex
+        response = {response_key: response_val}
+        request_id = uuid.uuid4().hex
+
+        self.stub_url(
+            'GET',
+            json=response,
+            headers={
+                'Content-Type': 'application/json',
+                'X-OpenStack-Request-ID': request_id,
+            })
+
+        resp = sess.get(self.TEST_URL)
+
+        self.assertEqual(response, resp.json())
+
+        request_output = io['request'].getvalue().strip()
+        response_output = io['response'].getvalue().strip()
+        body_output = io['body'].getvalue().strip()
+        id_output = io['request-id'].getvalue().strip()
+
+        self.assertIn('curl -g -i -X GET {url}'.format(url=self.TEST_URL),
+                      request_output)
+        self.assertEqual('[200] Content-Type: application/json '
+                         'X-OpenStack-Request-ID: '
+                         '{id}'.format(id=request_id), response_output)
+        self.assertEqual(
+            'GET call to {url} used request id {id}'.format(
+                url=self.TEST_URL, id=request_id),
+            id_output)
+        self.assertEqual(
+            '{{"{key}": "{val}"}}'.format(
+                key=response_key, val=response_val),
+            body_output)
+
 
 class AdapterTest(utils.TestCase):
 
diff --git a/tox.ini b/tox.ini
index 22d0dc9d..bae02a2c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -7,6 +7,7 @@ envlist = py35,py27,pep8,releasenotes
 usedevelop = True
 install_command = {toxinidir}/tools/tox_install.sh {env:UPPER_CONSTRAINTS_FILE:https://git.openstack.org/cgit/openstack/requirements/plain/upper-constraints.txt} {opts} {packages}
 setenv = VIRTUAL_ENV={envdir}
+         PYTHONHASHSEED=0
          BRANCH_NAME=master
          CLIENT_NAME=keystoneauth1
          OS_STDOUT_NOCAPTURE=False