From d03fc9bc5401ffc6beb9b6faf810577dcd9c4aac Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Tue, 26 Jun 2018 15:56:56 -0700 Subject: [PATCH] swob: Stop auto-encoding unicode bodies Instead, require that callers provide an encoding. Related-Change: I31408f525ba9836f634a35581d4aee6fa2c9428f Change-Id: I3e5ed9e4401eea76c375bb43ad4afc58b1d8006a --- swift/common/constraints.py | 53 ++++++++++--------- swift/common/middleware/domain_remap.py | 2 +- swift/common/middleware/slo.py | 50 ++++++++--------- swift/common/middleware/tempauth.py | 10 ++-- swift/common/swob.py | 18 +++++-- .../unit/common/middleware/test_gatekeeper.py | 4 +- .../common/middleware/test_healthcheck.py | 2 +- test/unit/common/middleware/test_tempauth.py | 14 +++-- test/unit/common/test_swob.py | 18 ++++--- 9 files changed, 99 insertions(+), 72 deletions(-) diff --git a/swift/common/constraints.py b/swift/common/constraints.py index 1f713d50d9..e1c4e7260e 100644 --- a/swift/common/constraints.py +++ b/swift/common/constraints.py @@ -24,7 +24,7 @@ from six.moves import urllib from swift.common import utils, exceptions from swift.common.swob import HTTPBadRequest, HTTPLengthRequired, \ HTTPRequestEntityTooLarge, HTTPPreconditionFailed, HTTPNotImplemented, \ - HTTPException, wsgi_to_str + HTTPException, wsgi_to_str, wsgi_to_bytes MAX_FILE_SIZE = 5368709122 MAX_META_NAME_LENGTH = 128 @@ -132,38 +132,39 @@ def check_metadata(req, target_type): if (isinstance(value, six.string_types) and len(value) > MAX_HEADER_SIZE): - return HTTPBadRequest(body='Header value too long: %s' % - key[:MAX_META_NAME_LENGTH], + return HTTPBadRequest(body=b'Header value too long: %s' % + wsgi_to_bytes(key[:MAX_META_NAME_LENGTH]), request=req, content_type='text/plain') if not key.lower().startswith(prefix): continue key = key[len(prefix):] if not key: - return HTTPBadRequest(body='Metadata name cannot be empty', + return HTTPBadRequest(body=b'Metadata name cannot be empty', request=req, content_type='text/plain') bad_key = not check_utf8(wsgi_to_str(key)) bad_value = value and not check_utf8(wsgi_to_str(value)) if target_type in ('account', 'container') and (bad_key or bad_value): - return HTTPBadRequest(body='Metadata must be valid UTF-8', + return HTTPBadRequest(body=b'Metadata must be valid UTF-8', request=req, content_type='text/plain') meta_count += 1 meta_size += len(key) + len(value) if len(key) > MAX_META_NAME_LENGTH: return HTTPBadRequest( - body='Metadata name too long: %s%s' % (prefix, key), + body=wsgi_to_bytes('Metadata name too long: %s%s' % ( + prefix, key)), request=req, content_type='text/plain') if len(value) > MAX_META_VALUE_LENGTH: return HTTPBadRequest( - body='Metadata value longer than %d: %s%s' % ( - MAX_META_VALUE_LENGTH, prefix, key), + body=wsgi_to_bytes('Metadata value longer than %d: %s%s' % ( + MAX_META_VALUE_LENGTH, prefix, key)), request=req, content_type='text/plain') if meta_count > MAX_META_COUNT: return HTTPBadRequest( - body='Too many metadata items; max %d' % MAX_META_COUNT, + body=b'Too many metadata items; max %d' % MAX_META_COUNT, request=req, content_type='text/plain') if meta_size > MAX_META_OVERALL_SIZE: return HTTPBadRequest( - body='Total metadata too large; max %d' + body=b'Total metadata too large; max %d' % MAX_META_OVERALL_SIZE, request=req, content_type='text/plain') return None @@ -186,28 +187,28 @@ def check_object_creation(req, object_name): ml = req.message_length() except ValueError as e: return HTTPBadRequest(request=req, content_type='text/plain', - body=str(e)) + body=str(e).encode('ascii')) except AttributeError as e: return HTTPNotImplemented(request=req, content_type='text/plain', - body=str(e)) + body=str(e).encode('ascii')) if ml is not None and ml > MAX_FILE_SIZE: - return HTTPRequestEntityTooLarge(body='Your request is too large.', + return HTTPRequestEntityTooLarge(body=b'Your request is too large.', request=req, content_type='text/plain') if req.content_length is None and \ req.headers.get('transfer-encoding') != 'chunked': - return HTTPLengthRequired(body='Missing Content-Length header.', + return HTTPLengthRequired(body=b'Missing Content-Length header.', request=req, content_type='text/plain') if len(object_name) > MAX_OBJECT_NAME_LENGTH: - return HTTPBadRequest(body='Object name length of %d longer than %d' % + return HTTPBadRequest(body=b'Object name length of %d longer than %d' % (len(object_name), MAX_OBJECT_NAME_LENGTH), request=req, content_type='text/plain') if 'Content-Type' not in req.headers: return HTTPBadRequest(request=req, content_type='text/plain', - body='No content type') + body=b'No content type') try: req = check_delete_headers(req) @@ -216,7 +217,7 @@ def check_object_creation(req, object_name): content_type='text/plain') if not check_utf8(wsgi_to_str(req.headers['Content-Type'])): - return HTTPBadRequest(request=req, body='Invalid Content-Type', + return HTTPBadRequest(request=req, body=b'Invalid Content-Type', content_type='text/plain') return check_metadata(req, 'object') @@ -299,7 +300,7 @@ def valid_timestamp(request): try: return request.timestamp except exceptions.InvalidTimestamp as e: - raise HTTPBadRequest(body=str(e), request=request, + raise HTTPBadRequest(body=str(e).encode('ascii'), request=request, content_type='text/plain') @@ -324,13 +325,13 @@ def check_delete_headers(request): except ValueError: raise HTTPBadRequest(request=request, content_type='text/plain', - body='Non-integer X-Delete-After') + body=b'Non-integer X-Delete-After') actual_del_time = utils.normalize_delete_at_timestamp( now + x_delete_after) if int(actual_del_time) <= now: raise HTTPBadRequest(request=request, content_type='text/plain', - body='X-Delete-After in past') + body=b'X-Delete-After in past') request.headers['x-delete-at'] = actual_del_time del request.headers['x-delete-after'] @@ -340,12 +341,12 @@ def check_delete_headers(request): int(request.headers['x-delete-at']))) except ValueError: raise HTTPBadRequest(request=request, content_type='text/plain', - body='Non-integer X-Delete-At') + body=b'Non-integer X-Delete-At') if x_delete_at <= now and not utils.config_true_value( request.headers.get('x-backend-replication', 'f')): raise HTTPBadRequest(request=request, content_type='text/plain', - body='X-Delete-At in past') + body=b'X-Delete-At in past') return request @@ -406,19 +407,19 @@ def check_name_format(req, name, target_type): if not name: raise HTTPPreconditionFailed( request=req, - body='%s name cannot be empty' % target_type) + body=b'%s name cannot be empty' % target_type) if isinstance(name, six.text_type): name = name.encode('utf-8') if b'/' in name: raise HTTPPreconditionFailed( request=req, - body='%s name cannot contain slashes' % target_type) + body=b'%s name cannot contain slashes' % target_type) return name check_account_format = functools.partial(check_name_format, - target_type='Account') + target_type=b'Account') check_container_format = functools.partial(check_name_format, - target_type='Container') + target_type=b'Container') def valid_api_version(version): diff --git a/swift/common/middleware/domain_remap.py b/swift/common/middleware/domain_remap.py index a8f47650b3..d79322ef7c 100644 --- a/swift/common/middleware/domain_remap.py +++ b/swift/common/middleware/domain_remap.py @@ -158,7 +158,7 @@ class DomainRemapMiddleware(object): container, account = None, parts_to_parse[0] else: resp = HTTPBadRequest(request=Request(env), - body='Bad domain in host header', + body=b'Bad domain in host header', content_type='text/plain') return resp(env, start_response) if len(self.reseller_prefixes) > 0: diff --git a/swift/common/middleware/slo.py b/swift/common/middleware/slo.py index f23ba8a3ce..106c9dae89 100644 --- a/swift/common/middleware/slo.py +++ b/swift/common/middleware/slo.py @@ -398,7 +398,7 @@ def parse_and_validate_input(req_body, req_path): errors = [] for seg_index, seg_dict in enumerate(parsed_data): if not isinstance(seg_dict, dict): - errors.append("Index %d: not a JSON object" % seg_index) + errors.append(b"Index %d: not a JSON object" % seg_index) continue for required in SLO_KEYS: @@ -407,36 +407,36 @@ def parse_and_validate_input(req_body, req_path): break else: errors.append( - "Index %d: expected keys to include one of %s" + b"Index %d: expected keys to include one of %s" % (seg_index, - " or ".join(repr(required) for required in SLO_KEYS))) + b" or ".join(repr(required) for required in SLO_KEYS))) continue allowed_keys = SLO_KEYS[segment_type].union([segment_type]) extraneous_keys = [k for k in seg_dict if k not in allowed_keys] if extraneous_keys: errors.append( - "Index %d: extraneous keys %s" + b"Index %d: extraneous keys %s" % (seg_index, - ", ".join('"%s"' % (ek,) - for ek in sorted(extraneous_keys)))) + b", ".join(json.dumps(ek).encode('ascii') + for ek in sorted(extraneous_keys)))) continue if segment_type == 'path': if not isinstance(seg_dict['path'], six.string_types): - errors.append("Index %d: \"path\" must be a string" % + errors.append(b"Index %d: \"path\" must be a string" % seg_index) continue if not (seg_dict.get('etag') is None or isinstance(seg_dict['etag'], six.string_types)): - errors.append('Index %d: "etag" must be a string or null ' - '(if provided)' % seg_index) + errors.append(b'Index %d: "etag" must be a string or null ' + b'(if provided)' % seg_index) continue if '/' not in seg_dict['path'].strip('/'): errors.append( - "Index %d: path does not refer to an object. Path must " - "be of the form /container/object." % seg_index) + b"Index %d: path does not refer to an object. Path must " + b"be of the form /container/object." % seg_index) continue seg_size = seg_dict.get('size_bytes') @@ -445,11 +445,11 @@ def parse_and_validate_input(req_body, req_path): seg_size = int(seg_size) seg_dict['size_bytes'] = seg_size except (TypeError, ValueError): - errors.append("Index %d: invalid size_bytes" % seg_index) + errors.append(b"Index %d: invalid size_bytes" % seg_index) continue if seg_size < 1 and seg_index != (len(parsed_data) - 1): - errors.append("Index %d: too small; each segment must be " - "at least 1 byte." + errors.append(b"Index %d: too small; each segment must be " + b"at least 1 byte." % (seg_index,)) continue @@ -457,7 +457,7 @@ def parse_and_validate_input(req_body, req_path): seg_dict['path'].lstrip('/')]) if req_path == quote(obj_path): errors.append( - "Index %d: manifest must not include itself as a segment" + b"Index %d: manifest must not include itself as a segment" % seg_index) continue @@ -465,12 +465,12 @@ def parse_and_validate_input(req_body, req_path): try: seg_dict['range'] = Range('bytes=%s' % seg_dict['range']) except ValueError: - errors.append("Index %d: invalid range" % seg_index) + errors.append(b"Index %d: invalid range" % seg_index) continue if len(seg_dict['range'].ranges) > 1: - errors.append("Index %d: multiple ranges " - "(only one allowed)" % seg_index) + errors.append(b"Index %d: multiple ranges " + b"(only one allowed)" % seg_index) continue # If the user *told* us the object's size, we can check range @@ -478,7 +478,7 @@ def parse_and_validate_input(req_body, req_path): # fail that validation later. if (seg_size is not None and 1 != len( seg_dict['range'].ranges_for_length(seg_size))): - errors.append("Index %d: unsatisfiable range" % seg_index) + errors.append(b"Index %d: unsatisfiable range" % seg_index) continue elif segment_type == 'data': @@ -487,22 +487,22 @@ def parse_and_validate_input(req_body, req_path): data = strict_b64decode(seg_dict['data']) except ValueError: errors.append( - "Index %d: data must be valid base64" % seg_index) + b"Index %d: data must be valid base64" % seg_index) continue if len(data) < 1: - errors.append("Index %d: too small; each segment must be " - "at least 1 byte." + errors.append(b"Index %d: too small; each segment must be " + b"at least 1 byte." % (seg_index,)) continue # re-encode to normalize padding seg_dict['data'] = base64.b64encode(data) if parsed_data and all('data' in d for d in parsed_data): - errors.append("Inline data segments require at least one " - "object-backed segment.") + errors.append(b"Inline data segments require at least one " + b"object-backed segment.") if errors: - error_message = "".join(e + "\n" for e in errors) + error_message = b"".join(e + b"\n" for e in errors) raise HTTPBadRequest(error_message, headers={"Content-Type": "text/plain"}) diff --git a/swift/common/middleware/tempauth.py b/swift/common/middleware/tempauth.py index 7f46b425e0..823a4ca29f 100644 --- a/swift/common/middleware/tempauth.py +++ b/swift/common/middleware/tempauth.py @@ -174,6 +174,7 @@ To generate a curl command line from the above:: from __future__ import print_function +import json from time import time from traceback import format_exc from uuid import uuid4 @@ -506,16 +507,19 @@ class TempAuth(object): # on ACLs, TempAuth is not such an auth system. At this point, # it thinks it is authoritative. if key not in tempauth_acl_keys: - return "Key '%s' not recognized" % key + return "Key %s not recognized" % json.dumps( + key).encode('ascii') for key in tempauth_acl_keys: if key not in result: continue if not isinstance(result[key], list): - return "Value for key '%s' must be a list" % key + return "Value for key %s must be a list" % json.dumps( + key).encode('ascii') for grantee in result[key]: if not isinstance(grantee, six.string_types): - return "Elements of '%s' list must be strings" % key + return "Elements of %s list must be strings" % json.dumps( + key).encode('ascii') # Everything looks fine, no errors found internal_hdr = get_sys_meta_prefix('account') + 'core-access-control' diff --git a/swift/common/swob.py b/swift/common/swob.py index 23289e2a38..caaefd4b80 100644 --- a/swift/common/swob.py +++ b/swift/common/swob.py @@ -281,16 +281,28 @@ class HeaderEnvironProxy(MutableMapping): return keys +def wsgi_to_bytes(wsgi_str): + if six.PY2: + return wsgi_str + return wsgi_str.encode('latin1') + + def wsgi_to_str(wsgi_str): if six.PY2: return wsgi_str - return wsgi_str.encode('latin1').decode('utf8', errors='surrogateescape') + return wsgi_to_bytes(wsgi_str).decode('utf8', errors='surrogateescape') + + +def bytes_to_wsgi(byte_str): + if six.PY2: + return byte_str + return byte_str.decode('latin1') def str_to_wsgi(native_str): if six.PY2: return native_str - return native_str.encode('utf8', errors='surrogateescape').decode('latin1') + return bytes_to_wsgi(native_str.encode('utf8', errors='surrogateescape')) def _resp_status_property(): @@ -334,7 +346,7 @@ def _resp_body_property(): def setter(self, value): if isinstance(value, six.text_type): - value = value.encode('utf-8') + raise TypeError('WSGI responses must be bytes') if isinstance(value, six.binary_type): self.content_length = len(value) self._app_iter = None diff --git a/test/unit/common/middleware/test_gatekeeper.py b/test/unit/common/middleware/test_gatekeeper.py index 37cbe04888..0fac175dbf 100644 --- a/test/unit/common/middleware/test_gatekeeper.py +++ b/test/unit/common/middleware/test_gatekeeper.py @@ -29,7 +29,7 @@ class FakeApp(object): def __call__(self, env, start_response): self.req = Request(env) - return Response(request=self.req, body='FAKE APP', + return Response(request=self.req, body=b'FAKE APP', headers=self.headers)(env, start_response) @@ -224,7 +224,7 @@ class TestGatekeeper(unittest.TestCase): class SelfishApp(FakeApp): def __call__(self, env, start_response): self.req = Request(env) - resp = Response(request=self.req, body='FAKE APP', + resp = Response(request=self.req, body=b'FAKE APP', headers=self.headers) # like webob, middlewares in the pipeline may rewrite # location header from relative to absolute diff --git a/test/unit/common/middleware/test_healthcheck.py b/test/unit/common/middleware/test_healthcheck.py index 34e29e424b..cc54c8ad1a 100644 --- a/test/unit/common/middleware/test_healthcheck.py +++ b/test/unit/common/middleware/test_healthcheck.py @@ -25,7 +25,7 @@ from swift.common.middleware import healthcheck class FakeApp(object): def __call__(self, env, start_response): req = Request(env) - return Response(request=req, body='FAKE APP')( + return Response(request=req, body=b'FAKE APP')( env, start_response) diff --git a/test/unit/common/middleware/test_tempauth.py b/test/unit/common/middleware/test_tempauth.py index 317da713f3..2e364ed8b1 100644 --- a/test/unit/common/middleware/test_tempauth.py +++ b/test/unit/common/middleware/test_tempauth.py @@ -1342,7 +1342,15 @@ class TestAccountAcls(unittest.TestCase): resp = req.get_response(test_auth) self.assertEqual(resp.status_int, 400) self.assertTrue(resp.body.startswith( - errmsg % "Key 'other-auth-system' not recognized"), resp.body) + errmsg % 'Key "other-auth-system" not recognized'), resp.body) + + # and do something sane with crazy data + update = {'x-account-access-control': u'{"\u1234": []}'.encode('utf8')} + req = self._make_request(target, headers=dict(good_headers, **update)) + resp = req.get_response(test_auth) + self.assertEqual(resp.status_int, 400) + self.assertTrue(resp.body.startswith( + errmsg % 'Key "\\u1234" not recognized'), resp.body) # acls with good keys but bad values also get a 400 update = {'x-account-access-control': bad_value_acl} @@ -1350,7 +1358,7 @@ class TestAccountAcls(unittest.TestCase): resp = req.get_response(test_auth) self.assertEqual(resp.status_int, 400) self.assertTrue(resp.body.startswith( - errmsg % "Value for key 'admin' must be a list"), resp.body) + errmsg % 'Value for key "admin" must be a list'), resp.body) # acls with non-string-types in list also get a 400 update = {'x-account-access-control': bad_list_types} @@ -1358,7 +1366,7 @@ class TestAccountAcls(unittest.TestCase): resp = req.get_response(test_auth) self.assertEqual(resp.status_int, 400) self.assertTrue(resp.body.startswith( - errmsg % "Elements of 'read-only' list must be strings"), + errmsg % 'Elements of "read-only" list must be strings'), resp.body) # acls with wrong json structure also get a 400 diff --git a/test/unit/common/test_swob.py b/test/unit/common/test_swob.py index 4c918cef2d..729156bb08 100644 --- a/test/unit/common/test_swob.py +++ b/test/unit/common/test_swob.py @@ -832,7 +832,7 @@ class TestRequest(unittest.TestCase): @swift.common.swob.wsgify def _wsgi_func(req): used_req.append(req) - return swift.common.swob.Response('200 OK') + return swift.common.swob.Response(b'200 OK') req = swift.common.swob.Request.blank('/hi/there') resp = req.get_response(_wsgi_func) @@ -1117,13 +1117,15 @@ class TestResponse(unittest.TestCase): def test_empty_body(self): resp = self._get_response() - resp.body = '' + resp.body = b'' self.assertEqual(resp.body, b'') def test_unicode_body(self): resp = self._get_response() - resp.body = u'\N{SNOWMAN}' - self.assertEqual(resp.body, u'\N{SNOWMAN}'.encode('utf-8')) + with self.assertRaises(TypeError) as catcher: + resp.body = u'\N{SNOWMAN}' + self.assertEqual(str(catcher.exception), + 'WSGI responses must be bytes') def test_call_reifies_request_if_necessary(self): """ @@ -1388,7 +1390,7 @@ class TestResponse(unittest.TestCase): '/', headers={'Range': 'bytes=1-3'}) resp = swift.common.swob.Response( - body='1234567890', request=req, + body=b'1234567890', request=req, conditional_response=True) body = b''.join(resp({}, start_response)) self.assertEqual(body, b'234') @@ -1408,7 +1410,7 @@ class TestResponse(unittest.TestCase): self.assertEqual(resp.content_range, 'bytes */10') resp = swift.common.swob.Response( - body='1234567890', request=req, + body=b'1234567890', request=req, conditional_response=True) body = b''.join(resp({}, start_response)) self.assertIn(b'The Range requested is not available', body) @@ -1426,7 +1428,7 @@ class TestResponse(unittest.TestCase): self.assertNotIn('Content-Range', resp.headers) resp = swift.common.swob.Response( - body='1234567890', request=req, + body=b'1234567890', request=req, conditional_response=True) body = b''.join(resp({}, start_response)) self.assertEqual(body, b'1234567890') @@ -1575,7 +1577,7 @@ class TestResponse(unittest.TestCase): # body, headers with content_length and app_iter exist resp = swift.common.swob.Response( - body='ok', headers={'Content-Length': '5'}, app_iter=iter([])) + body=b'ok', headers={'Content-Length': '5'}, app_iter=iter([])) self.assertEqual(resp.content_length, 5) self.assertEqual(resp.body, b'')