py3: fix copying unicode names

Turns out, unquote()ing WSGI strings is a great way to mangle them.

Change-Id: I42a08d84aa22a1a7ee7ccab97aaec55d845264f9
This commit is contained in:
Tim Burke 2019-03-01 13:04:58 -08:00
parent 96013436a1
commit 4ac81ebbd7
5 changed files with 83 additions and 20 deletions

View File

@ -114,12 +114,11 @@ greater than 5GB.
"""
from six.moves.urllib.parse import quote, unquote
from swift.common.utils import get_logger, config_true_value, FileLikeIter, \
close_if_possible
from swift.common.swob import Request, HTTPPreconditionFailed, \
HTTPRequestEntityTooLarge, HTTPBadRequest, HTTPException
HTTPRequestEntityTooLarge, HTTPBadRequest, HTTPException, \
wsgi_quote, wsgi_unquote
from swift.common.http import HTTP_MULTIPLE_CHOICES, is_success, HTTP_OK
from swift.common.constraints import check_account_format, MAX_FILE_SIZE
from swift.common.request_helpers import copy_header_subset, remove_items, \
@ -183,7 +182,7 @@ class ServerSideCopyWebContext(WSGIContext):
def get_source_resp(self, req):
sub_req = make_subrequest(
req.environ, path=quote(req.path_info), headers=req.headers,
req.environ, path=wsgi_quote(req.path_info), headers=req.headers,
swift_source='SSC')
return sub_req.get_response(self.app)
@ -257,9 +256,9 @@ class ServerSideCopyMiddleware(object):
)(req.environ, start_response)
dest_account = account
if 'Destination-Account' in req.headers:
dest_account = unquote(req.headers.get('Destination-Account'))
dest_account = wsgi_unquote(req.headers.get('Destination-Account'))
dest_account = check_account_format(req, dest_account)
req.headers['X-Copy-From-Account'] = quote(account)
req.headers['X-Copy-From-Account'] = wsgi_quote(account)
account = dest_account
del req.headers['Destination-Account']
dest_container, dest_object = _check_destination_header(req)
@ -275,7 +274,7 @@ class ServerSideCopyMiddleware(object):
req.path_info = '/%s/%s/%s/%s' % (
ver, dest_account, dest_container, dest_object)
req.headers['Content-Length'] = 0
req.headers['X-Copy-From'] = quote(source)
req.headers['X-Copy-From'] = wsgi_quote(source)
del req.headers['Destination']
return self.handle_PUT(req, start_response)
@ -312,8 +311,8 @@ class ServerSideCopyMiddleware(object):
def _create_response_headers(self, source_path, source_resp, sink_req):
resp_headers = dict()
acct, path = source_path.split('/', 3)[2:4]
resp_headers['X-Copied-From-Account'] = quote(acct)
resp_headers['X-Copied-From'] = quote(path)
resp_headers['X-Copied-From-Account'] = wsgi_quote(acct)
resp_headers['X-Copied-From'] = wsgi_quote(path)
if 'last-modified' in source_resp.headers:
resp_headers['X-Copied-From-Last-Modified'] = \
source_resp.headers['last-modified']
@ -334,7 +333,7 @@ class ServerSideCopyMiddleware(object):
src_account_name = req.headers.get('X-Copy-From-Account')
if src_account_name:
src_account_name = check_account_format(
req, unquote(src_account_name))
req, wsgi_unquote(src_account_name))
else:
src_account_name = acct
src_container_name, src_obj_name = _check_copy_from_header(req)

View File

@ -26,7 +26,6 @@ import sys
import time
import six
from six.moves.urllib.parse import unquote
from swift.common.header_key_dict import HeaderKeyDict
from swift import gettext_ as _
@ -35,7 +34,7 @@ from swift.common.exceptions import ListingIterError, SegmentError
from swift.common.http import is_success
from swift.common.swob import HTTPBadRequest, \
HTTPServiceUnavailable, Range, is_chunked, multi_range_iterator, \
HTTPPreconditionFailed, wsgi_to_bytes
HTTPPreconditionFailed, wsgi_to_bytes, wsgi_unquote, wsgi_to_str
from swift.common.utils import split_path, validate_device_partition, \
close_if_possible, maybe_multipart_byteranges_to_document_iters, \
multipart_byteranges_to_document_iters, parse_content_type, \
@ -115,14 +114,13 @@ def split_and_validate_path(request, minsegs=1, maxsegs=None,
Utility function to split and validate the request path.
:returns: result of :meth:`~swift.common.utils.split_path` if
everything's okay
everything's okay, as native strings
:raises HTTPBadRequest: if something's not okay
"""
try:
segs = split_path(unquote(request.path),
minsegs, maxsegs, rest_with_last)
segs = request.split_path(minsegs, maxsegs, rest_with_last)
validate_device_partition(segs[0], segs[1])
return segs
return [wsgi_to_str(seg) for seg in segs]
except ValueError as err:
raise HTTPBadRequest(body=str(err), request=request,
content_type='text/plain')
@ -308,7 +306,7 @@ def check_path_header(req, name, length, error_msg):
:raise: HTTPPreconditionFailed if header value
is not well formatted.
"""
hdr = unquote(req.headers.get(name))
hdr = wsgi_unquote(req.headers.get(name))
if not hdr.startswith('/'):
hdr = '/' + hdr
try:

View File

@ -309,6 +309,50 @@ def str_to_wsgi(native_str):
return bytes_to_wsgi(native_str.encode('utf8', errors='surrogateescape'))
def wsgi_quote(wsgi_str):
if six.PY2:
if not isinstance(wsgi_str, bytes):
raise TypeError('Expected a WSGI string; got %r' % wsgi_str)
return urllib.parse.quote(wsgi_str)
if not isinstance(wsgi_str, str) or any(ord(x) > 255 for x in wsgi_str):
raise TypeError('Expected a WSGI string; got %r' % wsgi_str)
return urllib.parse.quote(wsgi_str, encoding='latin-1')
def wsgi_unquote(wsgi_str):
if six.PY2:
if not isinstance(wsgi_str, bytes):
raise TypeError('Expected a WSGI string; got %r' % wsgi_str)
return urllib.parse.unquote(wsgi_str)
if not isinstance(wsgi_str, str) or any(ord(x) > 255 for x in wsgi_str):
raise TypeError('Expected a WSGI string; got %r' % wsgi_str)
return urllib.parse.unquote(wsgi_str, encoding='latin-1')
def wsgi_quote_plus(wsgi_str):
if six.PY2:
if not isinstance(wsgi_str, bytes):
raise TypeError('Expected a WSGI string; got %r' % wsgi_str)
return urllib.parse.quote_plus(wsgi_str)
if not isinstance(wsgi_str, str) or any(ord(x) > 255 for x in wsgi_str):
raise TypeError('Expected a WSGI string; got %r' % wsgi_str)
return urllib.parse.quote_plus(wsgi_str, encoding='latin-1')
def wsgi_unquote_plus(wsgi_str):
if six.PY2:
if not isinstance(wsgi_str, bytes):
raise TypeError('Expected a WSGI string; got %r' % wsgi_str)
return urllib.parse.unquote_plus(wsgi_str)
if not isinstance(wsgi_str, str) or any(ord(x) > 255 for x in wsgi_str):
raise TypeError('Expected a WSGI string; got %r' % wsgi_str)
return urllib.parse.unquote_plus(wsgi_str, encoding='latin-1')
def _resp_status_property():
"""
Set and retrieve the value of Response.status

View File

@ -32,13 +32,12 @@ from eventlet.green import socket, ssl, os as green_os
import six
from six import BytesIO
from six import StringIO
from six.moves.urllib.parse import unquote
if six.PY2:
import mimetools
from swift.common import utils, constraints
from swift.common.storage_policy import BindPortsCache
from swift.common.swob import Request
from swift.common.swob import Request, wsgi_unquote
from swift.common.utils import capture_stdio, disable_fallocate, \
drop_privileges, get_logger, NullLogger, config_true_value, \
validate_configuration, get_hub, config_auto_int_value, \
@ -1319,7 +1318,7 @@ def make_subrequest(env, method=None, path=None, body=None, headers=None,
path = path or ''
if path and '?' in path:
path, query_string = path.split('?', 1)
newenv = make_env(env, method, path=unquote(path), agent=agent,
newenv = make_env(env, method, path=wsgi_unquote(path), agent=agent,
query_string=query_string, swift_source=swift_source)
if not headers:
headers = {}

View File

@ -335,6 +335,29 @@ class TestServerSideCopyMiddleware(unittest.TestCase):
self.assertEqual('PUT', self.authorized[1].method)
self.assertEqual('/v1/a/c/o', self.authorized[1].path)
def test_copy_with_unicode(self):
self.app.register('GET', '/v1/a/c/\xF0\x9F\x8C\xB4', swob.HTTPOk,
{}, 'passed')
self.app.register('PUT', '/v1/a/c/\xE2\x98\x83', swob.HTTPCreated, {})
# Just for fun, let's have a mix of properly encoded and not
req = Request.blank('/v1/a/c/%F0\x9F%8C%B4',
environ={'REQUEST_METHOD': 'COPY'},
headers={'Content-Length': '0',
'Destination': 'c/%E2\x98%83'})
status, headers, body = self.call_ssc(req)
self.assertEqual(status, '201 Created')
calls = self.app.calls_with_headers
method, path, req_headers = calls[0]
self.assertEqual('GET', method)
self.assertEqual('/v1/a/c/\xF0\x9F\x8C\xB4', path)
self.assertIn(('X-Copied-From', 'c/%F0%9F%8C%B4'), headers)
self.assertEqual(len(self.authorized), 2)
self.assertEqual('GET', self.authorized[0].method)
self.assertEqual('/v1/a/c/%F0%9F%8C%B4', self.authorized[0].path)
self.assertEqual('PUT', self.authorized[1].method)
self.assertEqual('/v1/a/c/%E2%98%83', self.authorized[1].path)
def test_copy_with_spaces_in_x_copy_from_and_account(self):
self.app.register('GET', '/v1/a/c/o o2', swob.HTTPOk, {}, b'passed')
self.app.register('PUT', '/v1/a1/c1/o', swob.HTTPCreated, {})