From 50d67088eabfde69db1d8d86d7f45f094611cf26 Mon Sep 17 00:00:00 2001 From: John Dennis Date: Fri, 15 Jul 2016 09:01:45 -0400 Subject: [PATCH] Avoid ASCII encoding errors when output is redirected Unicode must be encoded/decoded for text I/O streams, the correct encoding for the stream must be selected and it must be capable of handling the set of characters in the stream or Python will raise a codec error. The correct codec is selected based on the locale. Python2 uses the locales encoding but only when the I/O stream is attached to a terminal (TTY) otherwise it uses the default ASCII encoding. The effect is internationalized text written to the terminal works as expected but if command line output is redirected (file or pipe) the ASCII codec is used and the program aborts with a codec error. The default I/O streams stdin, stdout and stderr can be wrapped in a codec based on the locale thus assuring the users desired encoding is always used no matter the I/O destination. Python3 does this by default. If the caller supplies an I/O stream we use it unmodified on the assumption the caller has taken all responsibility for the stream. But with Python2 if the caller allows us to default the I/O streams to sys.stdin, sys.stdout and sys.stderr we apply the locales encoding just as Python3 would do. We also check to make sure the main Python program has not already already wrapped sys.stdin, sys.stdout and sys.stderr as this is a common recommendation. Note, the unit test enhancement included in this patch is limited to assuring the I/O streams passed to the App constructor are wrapped or not wrapped as expected. No attempt is made to assure codec errors will not occur in a terminal because of the difficulty of constructing such a test. Properly testing this probably would require spawing a subshell but one can't use Python's subcommand because the I/O streams are not attached to a TTY. One could set up a ptty (pseudo-terminal) but experience has shown ptty's to be non-portable across operating systems and environments, getting a ptty to work correctly in all testing environments would be more work and hassle than is justified. Change-Id: I8e85936dff0d5db29a66337c9ce60c70a8dee10a Closes-Bug: 1603210 Signed-off-by: John Dennis --- cliff/app.py | 41 ++++++++++++++++++++ cliff/tests/test_app.py | 83 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) diff --git a/cliff/app.py b/cliff/app.py index 9b123fe..fbe8df3 100644 --- a/cliff/app.py +++ b/cliff/app.py @@ -1,11 +1,13 @@ """Application base class. """ +import codecs import inspect import locale import logging import logging.handlers import os +import six import sys from cliff import argparse @@ -70,6 +72,45 @@ class App(object): locale.setlocale(locale.LC_ALL, '') except locale.Error: pass + + # Unicode must be encoded/decoded for text I/O streams, the + # correct encoding for the stream must be selected and it must + # be capable of handling the set of characters in the stream + # or Python will raise a codec error. The correct codec is + # selected based on the locale. Python2 uses the locales + # encoding but only when the I/O stream is attached to a + # terminal (TTY) otherwise it uses the default ASCII + # encoding. The effect is internationalized text written to + # the terminal works as expected but if command line output is + # redirected (file or pipe) the ASCII codec is used and the + # program aborts with a codec error. + # + # The default I/O streams stdin, stdout and stderr can be + # wrapped in a codec based on the locale thus assuring the + # users desired encoding is always used no matter the I/O + # destination. Python3 does this by default. + # + # If the caller supplies an I/O stream we use it unmodified on + # the assumption the caller has taken all responsibility for + # the stream. But with Python2 if the caller allows us to + # default the I/O streams to sys.stdin, sys.stdout and + # sys.stderr we apply the locales encoding just as Python3 + # would do. We also check to make sure the main Python program + # has not already already wrapped sys.stdin, sys.stdout and + # sys.stderr as this is a common recommendation. + + if six.PY2: + encoding = locale.getpreferredencoding() + if encoding: + if not (stdin or isinstance(sys.stdin, codecs.StreamReader)): + stdin = codecs.getreader(encoding)(sys.stdin) + + if not (stdout or isinstance(sys.stdout, codecs.StreamWriter)): + stdout = codecs.getwriter(encoding)(sys.stdout) + + if not (stderr or isinstance(sys.stderr, codecs.StreamWriter)): + stderr = codecs.getwriter(encoding)(sys.stderr) + self.stdin = stdin or sys.stdin self.stdout = stdout or sys.stdout self.stderr = stderr or sys.stderr diff --git a/cliff/tests/test_app.py b/cliff/tests/test_app.py index ae60269..42fe442 100644 --- a/cliff/tests/test_app.py +++ b/cliff/tests/test_app.py @@ -5,7 +5,11 @@ try: except ImportError: from io import StringIO +import codecs +import locale import mock +import six +import sys from cliff.app import App from cliff.command import Command @@ -398,3 +402,82 @@ def test_verbose(): pass else: raise Exception('Exception was not thrown') + + +def test_io_streams(): + cmd_mgr = CommandManager('cliff.tests') + io = mock.Mock() + + if six.PY2: + stdin_save = sys.stdin + stdout_save = sys.stdout + stderr_save = sys.stderr + encoding = locale.getpreferredencoding() or 'utf-8' + + app = App('no io streams', 1, cmd_mgr) + assert isinstance(app.stdin, codecs.StreamReader) + assert isinstance(app.stdout, codecs.StreamWriter) + assert isinstance(app.stderr, codecs.StreamWriter) + + app = App('with stdin io stream', 1, cmd_mgr, stdin=io) + assert app.stdin is io + assert isinstance(app.stdout, codecs.StreamWriter) + assert isinstance(app.stderr, codecs.StreamWriter) + + app = App('with stdout io stream', 1, cmd_mgr, stdout=io) + assert isinstance(app.stdin, codecs.StreamReader) + assert app.stdout is io + assert isinstance(app.stderr, codecs.StreamWriter) + + app = App('with stderr io stream', 1, cmd_mgr, stderr=io) + assert isinstance(app.stdin, codecs.StreamReader) + assert isinstance(app.stdout, codecs.StreamWriter) + assert app.stderr is io + + try: + sys.stdin = codecs.getreader(encoding)(sys.stdin) + app = App('with wrapped sys.stdin io stream', 1, cmd_mgr) + assert app.stdin is sys.stdin + assert isinstance(app.stdout, codecs.StreamWriter) + assert isinstance(app.stderr, codecs.StreamWriter) + finally: + sys.stdin = stdin_save + + try: + sys.stdout = codecs.getwriter(encoding)(sys.stdout) + app = App('with wrapped stdout io stream', 1, cmd_mgr) + assert isinstance(app.stdin, codecs.StreamReader) + assert app.stdout is sys.stdout + assert isinstance(app.stderr, codecs.StreamWriter) + finally: + sys.stdout = stdout_save + + try: + sys.stderr = codecs.getwriter(encoding)(sys.stderr) + app = App('with wrapped stderr io stream', 1, cmd_mgr) + assert isinstance(app.stdin, codecs.StreamReader) + assert isinstance(app.stdout, codecs.StreamWriter) + assert app.stderr is sys.stderr + finally: + sys.stderr = stderr_save + + else: + app = App('no io streams', 1, cmd_mgr) + assert app.stdin is sys.stdin + assert app.stdout is sys.stdout + assert app.stderr is sys.stderr + + app = App('with stdin io stream', 1, cmd_mgr, stdin=io) + assert app.stdin is io + assert app.stdout is sys.stdout + assert app.stderr is sys.stderr + + app = App('with stdout io stream', 1, cmd_mgr, stdout=io) + assert app.stdin is sys.stdin + assert app.stdout is io + assert app.stderr is sys.stderr + + app = App('with stderr io stream', 1, cmd_mgr, stderr=io) + assert app.stdin is sys.stdin + assert app.stdout is sys.stdout + assert app.stderr is io