rally/rally/utils/encodeutils.py

95 lines
3.4 KiB
Python

# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sys
def _get_default_encoding():
return sys.stdin.encoding or sys.getdefaultencoding()
def safe_decode(text, incoming=None, errors="strict"):
"""Decodes incoming string using `incoming` if they're not already unicode.
:param text: text/bytes string to decode
:param incoming: Text's current encoding
:param errors: Errors handling policy. See here for valid
values http://docs.python.org/2/library/codecs.html
:returns: text or a unicode `incoming` encoded representation of it.
:raises TypeError: If text is not an instance of str
"""
if not isinstance(text, (str, bytes)):
raise TypeError("%s can't be decoded" % type(text))
if isinstance(text, str):
return text
if not incoming:
incoming = _get_default_encoding()
try:
return text.decode(incoming, errors)
except UnicodeDecodeError:
# Note(flaper87) If we get here, it means that
# sys.stdin.encoding / sys.getdefaultencoding
# didn't return a suitable encoding to decode
# text. This happens mostly when global LANG
# var is not set correctly and there's no
# default encoding. In this case, most likely
# python will use ASCII or ANSI encoders as
# default encodings but they won't be capable
# of decoding non-ASCII characters.
#
# Also, UTF-8 is being used since it's an ASCII
# extension.
return text.decode("utf-8", errors)
def safe_encode(text, incoming=None, encoding="utf-8", errors="strict"):
"""Encodes incoming text/bytes string using `encoding`.
If incoming is not specified, text is expected to be encoded with
current python's default encoding. (`sys.getdefaultencoding`)
:param text: Incoming text/bytes string
:param incoming: Text's current encoding
:param encoding: Expected encoding for text (Default UTF-8)
:param errors: Errors handling policy. See here for valid
values http://docs.python.org/2/library/codecs.html
:returns: text or a bytestring `encoding` encoded representation of it.
:raises TypeError: If text is not an instance of str
See also to_utf8() function which is simpler and don't depend on
the locale encoding.
"""
if not isinstance(text, (str, bytes)):
raise TypeError("%s can't be encoded" % type(text))
if not incoming:
incoming = _get_default_encoding()
# Avoid case issues in comparisons
if hasattr(incoming, "lower"):
incoming = incoming.lower()
if hasattr(encoding, "lower"):
encoding = encoding.lower()
if isinstance(text, str):
return text.encode(encoding, errors)
elif text and encoding != incoming:
# Decode text before encoding it with `encoding`
text = safe_decode(text, incoming, errors)
return text.encode(encoding, errors)
else:
return text