Merge "Fix encoding issue in ssync_sender.send_put()"

This commit is contained in:
Jenkins 2017-04-19 19:37:38 +00:00 committed by Gerrit Code Review
commit cce5482bd8
5 changed files with 129 additions and 18 deletions

View File

@ -114,6 +114,20 @@ def _get_filename(fd):
return fd
def _encode_metadata(metadata):
"""
UTF8 encode any unicode keys or values in given metadata dict.
:param metadata: a dict
"""
def encode_str(item):
if isinstance(item, six.text_type):
return item.encode('utf8')
return item
return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items()))
def read_metadata(fd):
"""
Helper function to read the pickled metadata from an object file.
@ -140,7 +154,10 @@ def read_metadata(fd):
raise DiskFileNotExist()
# TODO: we might want to re-raise errors that don't denote a missing
# xattr here. Seems to be ENODATA on linux and ENOATTR on BSD/OSX.
return pickle.loads(metadata)
# strings are utf-8 encoded when written, but have not always been
# (see https://bugs.launchpad.net/swift/+bug/1678018) so encode them again
# when read
return _encode_metadata(pickle.loads(metadata))
def write_metadata(fd, metadata, xattr_size=65536):
@ -150,7 +167,7 @@ def write_metadata(fd, metadata, xattr_size=65536):
:param fd: file descriptor or filename to write the metadata
:param metadata: metadata to write
"""
metastr = pickle.dumps(metadata, PICKLE_PROTOCOL)
metastr = pickle.dumps(_encode_metadata(metadata), PICKLE_PROTOCOL)
key = 0
while metastr:
try:

View File

@ -81,8 +81,13 @@ class TestReconstructorRebuild(ECProbeTest):
# PUT object and POST some metadata
contents = Body()
headers = {'x-object-meta-foo': 'meta-foo'}
self.headers_post = {'x-object-meta-bar': 'meta-bar'}
headers = {
self._make_name('x-object-meta-').decode('utf8'):
self._make_name('meta-foo-').decode('utf8'),
}
self.headers_post = {
self._make_name('x-object-meta-').decode('utf8'):
self._make_name('meta-bar-').decode('utf8')}
self.etag = client.put_object(self.url, self.token,
self.container_name,

View File

@ -286,6 +286,58 @@ class TestDiskFileModuleMethods(unittest.TestCase):
exp_dir = '/srv/node/sdb5/objects-1/123'
self.assertEqual(part_dir, exp_dir)
def test_write_read_metadata(self):
path = os.path.join(self.testdir, str(uuid.uuid4()))
metadata = {'name': '/a/c/o',
'Content-Length': 99,
u'X-Object-Sysmeta-Ec-Frag-Index': 4,
u'X-Object-Meta-Strange': u'should be bytes',
b'X-Object-Meta-x\xff': b'not utf8 \xff',
u'X-Object-Meta-y\xe8': u'not ascii \xe8'}
expected = {b'name': b'/a/c/o',
b'Content-Length': 99,
b'X-Object-Sysmeta-Ec-Frag-Index': 4,
b'X-Object-Meta-Strange': b'should be bytes',
b'X-Object-Meta-x\xff': b'not utf8 \xff',
b'X-Object-Meta-y\xc3\xa8': b'not ascii \xc3\xa8'}
def check_metadata():
with open(path, 'rb') as fd:
actual = diskfile.read_metadata(fd)
self.assertEqual(expected, actual)
for k in actual.keys():
self.assertIsInstance(k, six.binary_type)
for k in (b'name',
b'X-Object-Meta-Strange',
b'X-Object-Meta-x\xff',
b'X-Object-Meta-y\xc3\xa8'):
self.assertIsInstance(actual[k], six.binary_type)
with open(path, 'wb') as fd:
diskfile.write_metadata(fd, metadata)
check_metadata()
# mock the read path to check the write path encoded persisted metadata
with mock.patch.object(diskfile, '_encode_metadata', lambda x: x):
check_metadata()
# simulate a legacy diskfile that might have persisted unicode metadata
with mock.patch.object(diskfile, '_encode_metadata', lambda x: x):
with open(path, 'wb') as fd:
diskfile.write_metadata(fd, metadata)
# sanity check, while still mocked, that we did persist unicode
with open(path, 'rb') as fd:
actual = diskfile.read_metadata(fd)
for k, v in actual.items():
if k == u'X-Object-Meta-Strange':
self.assertIsInstance(k, six.text_type)
self.assertIsInstance(v, six.text_type)
break
else:
self.fail('Did not find X-Object-Meta-Strange')
# check that read_metadata converts binary_type
check_metadata()
@patch_policies
class TestObjectAuditLocationGenerator(unittest.TestCase):

View File

@ -7255,6 +7255,19 @@ class TestObjectServer(unittest.TestCase):
obj_datafile = found_files['.data'][0]
self.assertEqual("%s#2#d.data" % put_timestamp.internal,
os.path.basename(obj_datafile))
with open(obj_datafile) as fd:
actual_meta = diskfile.read_metadata(fd)
expected_meta = {'Content-Length': '82',
'name': '/a/c/o',
'X-Object-Sysmeta-Ec-Frag-Index': '2',
'X-Timestamp': put_timestamp.normal,
'Content-Type': 'text/plain'}
for k, v in actual_meta.items():
self.assertIsInstance(k, six.binary_type)
self.assertIsInstance(v, six.binary_type)
self.assertIsNotNone(actual_meta.pop('ETag', None))
self.assertEqual(expected_meta, actual_meta)
# but no other files
self.assertFalse(found_files['.data'][1:])
found_files.pop('.data')

View File

@ -1452,62 +1452,86 @@ class TestSender(BaseTest):
exc = err
self.assertEqual(str(exc), '0.01 seconds: send_put chunk')
def test_send_put(self):
def _check_send_put(self, obj_name, meta_value):
ts_iter = make_timestamp_iter()
t1 = next(ts_iter)
body = 'test'
extra_metadata = {'Some-Other-Header': 'value'}
df = self._make_open_diskfile(body=body, timestamp=t1,
extra_metadata = {'Some-Other-Header': 'value',
u'Unicode-Meta-Name': meta_value}
df = self._make_open_diskfile(obj=obj_name, body=body,
timestamp=t1,
extra_metadata=extra_metadata)
expected = dict(df.get_metadata())
expected['body'] = body
expected['chunk_size'] = len(body)
expected['meta'] = meta_value
path = six.moves.urllib.parse.quote(expected['name'])
expected['path'] = path
expected['length'] = format(145 + len(path) + len(meta_value), 'x')
# .meta file metadata is not included in expected for data only PUT
t2 = next(ts_iter)
metadata = {'X-Timestamp': t2.internal, 'X-Object-Meta-Fruit': 'kiwi'}
df.write_metadata(metadata)
df.open()
self.sender.connection = FakeConnection()
self.sender.send_put('/a/c/o', df)
self.sender.send_put(path, df)
self.assertEqual(
''.join(self.sender.connection.sent),
'82\r\n'
'PUT /a/c/o\r\n'
'%(length)s\r\n'
'PUT %(path)s\r\n'
'Content-Length: %(Content-Length)s\r\n'
'ETag: %(ETag)s\r\n'
'Some-Other-Header: value\r\n'
'Unicode-Meta-Name: %(meta)s\r\n'
'X-Timestamp: %(X-Timestamp)s\r\n'
'\r\n'
'\r\n'
'%(chunk_size)s\r\n'
'%(body)s\r\n' % expected)
def test_send_post(self):
def test_send_put(self):
self._check_send_put('o', 'meta')
def test_send_put_unicode(self):
self._check_send_put(
'o_with_caract\xc3\xa8res_like_in_french', 'm\xc3\xa8ta')
def _check_send_post(self, obj_name, meta_value):
ts_iter = make_timestamp_iter()
# create .data file
extra_metadata = {'X-Object-Meta-Foo': 'old_value',
'X-Object-Sysmeta-Test': 'test_sysmeta',
'Content-Type': 'test_content_type'}
ts_0 = next(ts_iter)
df = self._make_open_diskfile(extra_metadata=extra_metadata,
df = self._make_open_diskfile(obj=obj_name,
extra_metadata=extra_metadata,
timestamp=ts_0)
# create .meta file
ts_1 = next(ts_iter)
newer_metadata = {'X-Object-Meta-Foo': 'new_value',
newer_metadata = {u'X-Object-Meta-Foo': meta_value,
'X-Timestamp': ts_1.internal}
df.write_metadata(newer_metadata)
path = six.moves.urllib.parse.quote(df.read_metadata()['name'])
length = format(61 + len(path) + len(meta_value), 'x')
self.sender.connection = FakeConnection()
with df.open():
self.sender.send_post('/a/c/o', df)
self.sender.send_post(path, df)
self.assertEqual(
''.join(self.sender.connection.sent),
'4c\r\n'
'POST /a/c/o\r\n'
'X-Object-Meta-Foo: new_value\r\n'
'%s\r\n'
'POST %s\r\n'
'X-Object-Meta-Foo: %s\r\n'
'X-Timestamp: %s\r\n'
'\r\n'
'\r\n' % ts_1.internal)
'\r\n' % (length, path, meta_value, ts_1.internal))
def test_send_post(self):
self._check_send_post('o', 'meta')
def test_send_post_unicode(self):
self._check_send_post(
'o_with_caract\xc3\xa8res_like_in_french', 'm\xc3\xa8ta')
def test_disconnect_timeout(self):
self.sender.connection = FakeConnection()