Add support for non-ascii string

Swift3 doesn't support a non-ascii string because
the lxml library handles it as unicode, which Swift
doesn't expect. This patch introduces a wrapper Element
class which handles its text property as a utf8 string.

Change-Id: Ie4add6ca3d678400306d2404e2e1ee447ad54755
This commit is contained in:
Kota Tsuyuzaki 2014-08-31 22:13:39 -07:00
parent 065bb5f034
commit 9bf2ff8482
4 changed files with 78 additions and 4 deletions

View File

@ -54,7 +54,7 @@ def cleanup_namespaces(elem):
def fromstring(text, root_tag=None):
try:
elem = lxml.etree.fromstring(text)
elem = lxml.etree.fromstring(text, parser)
except lxml.etree.XMLSyntaxError as e:
LOGGER.debug(e)
raise XMLSyntaxError(e)
@ -83,7 +83,7 @@ def tostring(tree, use_s3ns=True):
nsmap = tree.nsmap.copy()
nsmap[None] = XMLNS_S3
root = lxml.etree.Element(tree.tag, attrib=tree.attrib, nsmap=nsmap)
root = Element(tree.tag, attrib=tree.attrib, nsmap=nsmap)
root.text = tree.text
root.extend(deepcopy(tree.getchildren()))
tree = root
@ -91,5 +91,41 @@ def tostring(tree, use_s3ns=True):
return lxml.etree.tostring(tree, xml_declaration=True, encoding='UTF-8')
Element = lxml.etree.Element
class _Element(lxml.etree.ElementBase):
"""
Wrapper Element class of lxml.etree.Element to support
a utf-8 encoded non-ascii string as a text.
Why we need this?:
Original lxml.etree.Element supports only unicode for the text.
It declines maintainability because we have to call a lot of encode/decode
methods to apply account/container/object name (i.e. PATH_INFO) to each
Element instance. When using this class, we can remove such a redundant
codes from swift3 middleware.
"""
def __init__(self, *args, **kwargs):
super(_Element, self).__init__(*args, **kwargs)
@property
def text(self):
"""
utf-8 wrapper property of lxml.etree.Element.text
"""
text = lxml.etree.ElementBase.text.__get__(self)
if isinstance(text, unicode):
text = text.encode('utf-8')
return text
@text.setter
def text(self, value):
if isinstance(value, str):
value = value.decode('utf-8')
lxml.etree.ElementBase.text.__set__(self, value)
parser_lookup = lxml.etree.ElementDefaultClassLookup(element=_Element)
parser = lxml.etree.XMLParser()
parser.set_element_class_lookup(parser_lookup)
Element = parser.makeelement
SubElement = lxml.etree.SubElement

View File

@ -175,6 +175,26 @@ class TestSwift3Bucket(Swift3TestCase):
self.assertEquals(args['marker'], 'b')
self.assertEquals(args['prefix'], 'c')
def test_bucket_GET_with_nonascii_queries(self):
bucket_name = 'junk'
req = Request.blank(
'/%s' % bucket_name,
environ={'REQUEST_METHOD': 'GET', 'QUERY_STRING':
'delimiter=\xef\xbc\xa1&marker=\xef\xbc\xa2&'
'prefix=\xef\xbc\xa3'},
headers={'Authorization': 'AWS test:tester:hmac'})
status, headers, body = self.call_swift3(req)
elem = fromstring(body, 'ListBucketResult')
self.assertEquals(elem.find('./Prefix').text, '\xef\xbc\xa3')
self.assertEquals(elem.find('./Marker').text, '\xef\xbc\xa2')
self.assertEquals(elem.find('./Delimiter').text, '\xef\xbc\xa1')
_, path = self.swift.calls[-1]
_, query_string = path.split('?')
args = dict(cgi.parse_qsl(query_string))
self.assertEquals(args['delimiter'], '\xef\xbc\xa1')
self.assertEquals(args['marker'], '\xef\xbc\xa2')
self.assertEquals(args['prefix'], '\xef\xbc\xa3')
def test_bucket_PUT_error(self):
code = self._test_method_error('PUT', '/bucket', swob.HTTPCreated,
headers={'Content-Length': 'a'})

View File

@ -52,5 +52,22 @@ class TestSwift3Etree(unittest.TestCase):
elem = etree.fromstring(xml)
self.assertEquals(elem.find('./B').text, 'C')
def test_tostring_with_nonascii_text(self):
elem = etree.Element('Test')
sub = etree.SubElement(elem, 'FOO')
sub.text = '\xef\xbc\xa1'
self.assertTrue(isinstance(sub.text, str))
xml_string = etree.tostring(elem)
self.assertTrue(isinstance(xml_string, str))
def test_fromstring_with_nonascii_text(self):
input_str = '<?xml version="1.0" encoding="UTF-8"?>\n' \
'<Test><FOO>\xef\xbc\xa1</FOO></Test>'
elem = etree.fromstring(input_str)
text = elem.find('FOO').text
self.assertEquals(text, '\xef\xbc\xa1')
self.assertTrue(isinstance(text, str))
if __name__ == '__main__':
unittest.main()

View File

@ -33,7 +33,8 @@ deps =
https://launchpad.net/keystone/icehouse/2014.1.1/+download/keystone-2014.1.1.tar.gz
[testenv:pylint]
commands = pylint -E swift3
# Avoid to fail by checking members at lxml dynamically loaded module
commands = pylint -E swift3 --ignored-modules=lxml.etree
[testenv:pep8]
commands = flake8