Avoid ASCII encoding errors when output is redirected

Unicode must be encoded/decoded for text I/O streams, the
correct encoding for the stream must be selected and it must
be capable of handling the set of characters in the stream
or Python will raise a codec error. The correct codec is
selected based on the locale. Python2 uses the locales
encoding but only when the I/O stream is attached to a
terminal (TTY) otherwise it uses the default ASCII
encoding. The effect is internationalized text written to
the terminal works as expected but if command line output is
redirected (file or pipe) the ASCII codec is used and the
program aborts with a codec error.

The default I/O streams stdin, stdout and stderr can be
wrapped in a codec based on the locale thus assuring the
users desired encoding is always used no matter the I/O
destination. Python3 does this by default.

If the caller supplies an I/O stream we use it unmodified on
the assumption the caller has taken all responsibility for
the stream.  But with Python2 if the caller allows us to
default the I/O streams to sys.stdin, sys.stdout and
sys.stderr we apply the locales encoding just as Python3
would do. We also check to make sure the main Python program
has not already already wrapped sys.stdin, sys.stdout and
sys.stderr as this is a common recommendation.

Note, the unit test enhancement included in this patch is limited to
assuring the I/O streams passed to the App constructor are wrapped or
not wrapped as expected. No attempt is made to assure codec errors
will not occur in a terminal because of the difficulty of constructing
such a test. Properly testing this probably would require spawing a
subshell but one can't use Python's subcommand because the I/O streams
are not attached to a TTY. One could set up a ptty (pseudo-terminal)
but experience has shown ptty's to be non-portable across operating
systems and environments, getting a ptty to work correctly in all
testing environments would be more work and hassle than is justified.

Change-Id: I8e85936dff0d5db29a66337c9ce60c70a8dee10a
Closes-Bug: 1603210
Signed-off-by: John Dennis <jdennis@redhat.com>
This commit is contained in:
John Dennis 2016-07-15 09:01:45 -04:00 committed by ayoung
parent 9f42088539
commit 50d67088ea
2 changed files with 124 additions and 0 deletions

View File

@ -1,11 +1,13 @@
"""Application base class.
"""
import codecs
import inspect
import locale
import logging
import logging.handlers
import os
import six
import sys
from cliff import argparse
@ -70,6 +72,45 @@ class App(object):
locale.setlocale(locale.LC_ALL, '')
except locale.Error:
pass
# Unicode must be encoded/decoded for text I/O streams, the
# correct encoding for the stream must be selected and it must
# be capable of handling the set of characters in the stream
# or Python will raise a codec error. The correct codec is
# selected based on the locale. Python2 uses the locales
# encoding but only when the I/O stream is attached to a
# terminal (TTY) otherwise it uses the default ASCII
# encoding. The effect is internationalized text written to
# the terminal works as expected but if command line output is
# redirected (file or pipe) the ASCII codec is used and the
# program aborts with a codec error.
#
# The default I/O streams stdin, stdout and stderr can be
# wrapped in a codec based on the locale thus assuring the
# users desired encoding is always used no matter the I/O
# destination. Python3 does this by default.
#
# If the caller supplies an I/O stream we use it unmodified on
# the assumption the caller has taken all responsibility for
# the stream. But with Python2 if the caller allows us to
# default the I/O streams to sys.stdin, sys.stdout and
# sys.stderr we apply the locales encoding just as Python3
# would do. We also check to make sure the main Python program
# has not already already wrapped sys.stdin, sys.stdout and
# sys.stderr as this is a common recommendation.
if six.PY2:
encoding = locale.getpreferredencoding()
if encoding:
if not (stdin or isinstance(sys.stdin, codecs.StreamReader)):
stdin = codecs.getreader(encoding)(sys.stdin)
if not (stdout or isinstance(sys.stdout, codecs.StreamWriter)):
stdout = codecs.getwriter(encoding)(sys.stdout)
if not (stderr or isinstance(sys.stderr, codecs.StreamWriter)):
stderr = codecs.getwriter(encoding)(sys.stderr)
self.stdin = stdin or sys.stdin
self.stdout = stdout or sys.stdout
self.stderr = stderr or sys.stderr

View File

@ -5,7 +5,11 @@ try:
except ImportError:
from io import StringIO
import codecs
import locale
import mock
import six
import sys
from cliff.app import App
from cliff.command import Command
@ -398,3 +402,82 @@ def test_verbose():
pass
else:
raise Exception('Exception was not thrown')
def test_io_streams():
cmd_mgr = CommandManager('cliff.tests')
io = mock.Mock()
if six.PY2:
stdin_save = sys.stdin
stdout_save = sys.stdout
stderr_save = sys.stderr
encoding = locale.getpreferredencoding() or 'utf-8'
app = App('no io streams', 1, cmd_mgr)
assert isinstance(app.stdin, codecs.StreamReader)
assert isinstance(app.stdout, codecs.StreamWriter)
assert isinstance(app.stderr, codecs.StreamWriter)
app = App('with stdin io stream', 1, cmd_mgr, stdin=io)
assert app.stdin is io
assert isinstance(app.stdout, codecs.StreamWriter)
assert isinstance(app.stderr, codecs.StreamWriter)
app = App('with stdout io stream', 1, cmd_mgr, stdout=io)
assert isinstance(app.stdin, codecs.StreamReader)
assert app.stdout is io
assert isinstance(app.stderr, codecs.StreamWriter)
app = App('with stderr io stream', 1, cmd_mgr, stderr=io)
assert isinstance(app.stdin, codecs.StreamReader)
assert isinstance(app.stdout, codecs.StreamWriter)
assert app.stderr is io
try:
sys.stdin = codecs.getreader(encoding)(sys.stdin)
app = App('with wrapped sys.stdin io stream', 1, cmd_mgr)
assert app.stdin is sys.stdin
assert isinstance(app.stdout, codecs.StreamWriter)
assert isinstance(app.stderr, codecs.StreamWriter)
finally:
sys.stdin = stdin_save
try:
sys.stdout = codecs.getwriter(encoding)(sys.stdout)
app = App('with wrapped stdout io stream', 1, cmd_mgr)
assert isinstance(app.stdin, codecs.StreamReader)
assert app.stdout is sys.stdout
assert isinstance(app.stderr, codecs.StreamWriter)
finally:
sys.stdout = stdout_save
try:
sys.stderr = codecs.getwriter(encoding)(sys.stderr)
app = App('with wrapped stderr io stream', 1, cmd_mgr)
assert isinstance(app.stdin, codecs.StreamReader)
assert isinstance(app.stdout, codecs.StreamWriter)
assert app.stderr is sys.stderr
finally:
sys.stderr = stderr_save
else:
app = App('no io streams', 1, cmd_mgr)
assert app.stdin is sys.stdin
assert app.stdout is sys.stdout
assert app.stderr is sys.stderr
app = App('with stdin io stream', 1, cmd_mgr, stdin=io)
assert app.stdin is io
assert app.stdout is sys.stdout
assert app.stderr is sys.stderr
app = App('with stdout io stream', 1, cmd_mgr, stdout=io)
assert app.stdin is sys.stdin
assert app.stdout is io
assert app.stderr is sys.stderr
app = App('with stderr io stream', 1, cmd_mgr, stderr=io)
assert app.stdin is sys.stdin
assert app.stdout is sys.stdout
assert app.stderr is io