Merge "Include SLO ETag in container updates"

This commit is contained in:
Zuul 2018-07-12 05:30:56 +00:00 committed by Gerrit Code Review
commit c991076a6e
5 changed files with 149 additions and 61 deletions

View File

@ -313,6 +313,7 @@ metadata which can be used for stats and billing purposes.
"""
import base64
from cgi import parse_header
from collections import defaultdict
from datetime import datetime
import json
@ -322,6 +323,8 @@ import six
import time
from hashlib import md5
from swift.common.exceptions import ListingIterError, SegmentError
from swift.common.middleware.listing_formats import \
MAX_CONTAINER_LISTING_CONTENT_LENGTH
from swift.common.swob import Request, HTTPBadRequest, HTTPServerError, \
HTTPMethodNotAllowed, HTTPRequestEntityTooLarge, HTTPLengthRequired, \
HTTPOk, HTTPPreconditionFailed, HTTPException, HTTPNotFound, \
@ -1276,6 +1279,14 @@ class StaticLargeObject(object):
'Etag': md5(json_data).hexdigest(),
})
# Ensure container listings have both etags. However, if any
# middleware to the left of us touched the base value, trust them.
override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag'
val, sep, params = req.headers.get(
override_header, '').partition(';')
req.headers[override_header] = '%s; slo_etag=%s' % (
(val or req.headers['Etag']) + sep + params, slo_etag)
env = req.environ
if not env.get('CONTENT_TYPE'):
guessed_type, _junk = mimetypes.guess_type(req.path_info)
@ -1408,6 +1419,30 @@ class StaticLargeObject(object):
out_content_type=out_content_type)
return resp
def handle_container_listing(self, req, start_response):
resp = req.get_response(self.app)
if not resp.is_success or resp.content_type != 'application/json':
return resp(req.environ, start_response)
if resp.content_length is None or \
resp.content_length > MAX_CONTAINER_LISTING_CONTENT_LENGTH:
return resp(req.environ, start_response)
try:
listing = json.loads(resp.body)
except ValueError:
return resp(req.environ, start_response)
for item in listing:
if 'subdir' in item:
continue
etag, params = parse_header(item['hash'])
if 'slo_etag' in params:
item['slo_etag'] = '"%s"' % params.pop('slo_etag')
item['hash'] = etag + ''.join(
'; %s=%s' % kv for kv in params.items())
resp.body = json.dumps(listing).encode('ascii')
return resp(req.environ, start_response)
def __call__(self, env, start_response):
"""
WSGI entry point
@ -1417,10 +1452,15 @@ class StaticLargeObject(object):
req = Request(env)
try:
vrs, account, container, obj = req.split_path(4, 4, True)
vrs, account, container, obj = req.split_path(3, 4, True)
except ValueError:
return self.app(env, start_response)
if not obj:
if req.method == 'GET':
return self.handle_container_listing(req, start_response)
return self.app(env, start_response)
try:
if req.method == 'PUT' and \
req.params.get('multipart-manifest') == 'put':

View File

@ -830,7 +830,7 @@ class File(Base):
header_fields = self.header_fields(fields,
optional_fields=optional_fields)
header_fields['etag'] = header_fields['etag'].strip('"')
header_fields['etag'] = header_fields['etag']
return header_fields
def initialize(self, hdrs=None, parms=None):
@ -855,7 +855,7 @@ class File(Base):
if hdr[0].lower().startswith('x-object-meta-'):
self.metadata[hdr[0][14:]] = hdr[1]
if hdr[0].lower() == 'etag':
self.etag = hdr[1].strip('"')
self.etag = hdr[1]
if hdr[0].lower() == 'content-length':
self.size = int(hdr[1])
if hdr[0].lower() == 'last-modified':

View File

@ -271,14 +271,19 @@ class TestSlo(Base):
file_item.write(
json.dumps([self.env.seg_info['seg_a']]),
parms={'multipart-manifest': 'put'})
# The container listing has the etag of the actual manifest object
# contents which we get using multipart-manifest=get. Arguably this
# should be the etag that we get when NOT using multipart-manifest=get,
# to be consistent with size and content-type. But here we at least
# verify that it remains consistent when the object is updated with a
# POST.
# The container listing exposes BOTH the MD5 of the manifest content
# and the SLO MD5-of-MD5s by splitting the latter out into a separate
# key. These should remain consistent when the object is updated with
# a POST.
file_item.initialize(parms={'multipart-manifest': 'get'})
expected_etag = file_item.etag
manifest_etag = file_item.etag
self.assertFalse(manifest_etag.startswith('"'))
self.assertFalse(manifest_etag.endswith('"'))
file_item.initialize()
slo_etag = file_item.etag
self.assertTrue(slo_etag.startswith('"'))
self.assertTrue(slo_etag.endswith('"'))
listing = self.env.container.files(parms={'format': 'json'})
for f_dict in listing:
@ -286,7 +291,8 @@ class TestSlo(Base):
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual('application/octet-stream',
f_dict['content_type'])
self.assertEqual(expected_etag, f_dict['hash'])
self.assertEqual(manifest_etag, f_dict['hash'])
self.assertEqual(slo_etag, f_dict['slo_etag'])
break
else:
self.fail('Failed to find manifest file in container listing')
@ -304,7 +310,8 @@ class TestSlo(Base):
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual(file_item.content_type,
f_dict['content_type'])
self.assertEqual(expected_etag, f_dict['hash'])
self.assertEqual(manifest_etag, f_dict['hash'])
self.assertEqual(slo_etag, f_dict['slo_etag'])
break
else:
self.fail('Failed to find manifest file in container listing')
@ -322,7 +329,8 @@ class TestSlo(Base):
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual(file_item.content_type,
f_dict['content_type'])
self.assertEqual(expected_etag, f_dict['hash'])
self.assertEqual(manifest_etag, f_dict['hash'])
self.assertEqual(slo_etag, f_dict['slo_etag'])
break
else:
self.fail('Failed to find manifest file in container listing')
@ -456,13 +464,14 @@ class TestSlo(Base):
self.assertEqual('c', file_contents[-2])
self.assertEqual('d', file_contents[-1])
def test_slo_etag_is_hash_of_etags(self):
def test_slo_etag_is_quote_wrapped_hash_of_etags(self):
# we have this check in test_slo_get_simple_manifest, too,
# but verify that it holds for HEAD requests
file_item = self.env.container.file('manifest-abcde')
self.assertEqual(self.manifest_abcde_etag, file_item.info()['etag'])
self.assertEqual('"%s"' % self.manifest_abcde_etag,
file_item.info()['etag'])
def test_slo_etag_is_hash_of_etags_submanifests(self):
def test_slo_etag_is_quote_wrapped_hash_of_etags_submanifests(self):
def hd(x):
return hashlib.md5(x).hexdigest()
@ -474,7 +483,7 @@ class TestSlo(Base):
hd('e'))
file_item = self.env.container.file('manifest-abcde-submanifest')
self.assertEqual(expected_etag, file_item.info()['etag'])
self.assertEqual('"%s"' % expected_etag, file_item.info()['etag'])
def test_slo_etag_mismatch(self):
file_item = self.env.container.file("manifest-a-bad-etag")
@ -657,32 +666,34 @@ class TestSlo(Base):
def test_slo_copy_the_manifest(self):
source = self.env.container.file("manifest-abcde")
source.initialize(parms={'multipart-manifest': 'get'})
source_contents = source.read(parms={'multipart-manifest': 'get'})
source_json = json.loads(source_contents)
manifest_etag = hashlib.md5(source_contents).hexdigest()
self.assertEqual(manifest_etag, source.etag)
source.initialize()
self.assertEqual('application/octet-stream', source.content_type)
source.initialize(parms={'multipart-manifest': 'get'})
source_hash = hashlib.md5()
source_hash.update(source_contents)
self.assertEqual(source_hash.hexdigest(), source.etag)
self.assertNotEqual(manifest_etag, source.etag)
slo_etag = source.etag
self.assertTrue(source.copy(self.env.container.name,
"copied-abcde-manifest-only",
parms={'multipart-manifest': 'get'}))
copied = self.env.container.file("copied-abcde-manifest-only")
copied.initialize(parms={'multipart-manifest': 'get'})
copied_contents = copied.read(parms={'multipart-manifest': 'get'})
try:
copied_json = json.loads(copied_contents)
except ValueError:
self.fail("COPY didn't copy the manifest (invalid json on GET)")
self.assertEqual(source_json, copied_json)
self.assertEqual(manifest_etag, copied.etag)
copied.initialize()
self.assertEqual('application/octet-stream', copied.content_type)
copied.initialize(parms={'multipart-manifest': 'get'})
copied_hash = hashlib.md5()
copied_hash.update(copied_contents)
self.assertEqual(copied_hash.hexdigest(), copied.etag)
self.assertEqual(slo_etag, copied.etag)
# verify the listing metadata
listing = self.env.container.files(parms={'format': 'json'})
@ -696,13 +707,15 @@ class TestSlo(Base):
actual = names['manifest-abcde']
self.assertEqual(4 * 1024 * 1024 + 1, actual['bytes'])
self.assertEqual('application/octet-stream', actual['content_type'])
self.assertEqual(source.etag, actual['hash'])
self.assertEqual(manifest_etag, actual['hash'])
self.assertEqual(slo_etag, actual['slo_etag'])
self.assertIn('copied-abcde-manifest-only', names)
actual = names['copied-abcde-manifest-only']
self.assertEqual(4 * 1024 * 1024 + 1, actual['bytes'])
self.assertEqual('application/octet-stream', actual['content_type'])
self.assertEqual(copied.etag, actual['hash'])
self.assertEqual(manifest_etag, actual['hash'])
self.assertEqual(slo_etag, actual['slo_etag'])
# Test copy manifest including data segments
source = self.env.container.file("mixed-object-data-manifest")
@ -727,14 +740,16 @@ class TestSlo(Base):
source = self.env.container.file("manifest-abcde")
source.content_type = 'application/octet-stream'
source.sync_metadata({'test': 'original'})
source.initialize(parms={'multipart-manifest': 'get'})
source_contents = source.read(parms={'multipart-manifest': 'get'})
source_json = json.loads(source_contents)
manifest_etag = hashlib.md5(source_contents).hexdigest()
self.assertEqual(manifest_etag, source.etag)
source.initialize()
self.assertEqual('application/octet-stream', source.content_type)
source.initialize(parms={'multipart-manifest': 'get'})
source_hash = hashlib.md5()
source_hash.update(source_contents)
self.assertEqual(source_hash.hexdigest(), source.etag)
self.assertNotEqual(manifest_etag, source.etag)
slo_etag = source.etag
self.assertEqual(source.metadata['test'], 'original')
self.assertTrue(
@ -744,18 +759,18 @@ class TestSlo(Base):
'X-Object-Meta-Test': 'updated'}))
copied = self.env.container.file("copied-abcde-manifest-only")
copied.initialize(parms={'multipart-manifest': 'get'})
copied_contents = copied.read(parms={'multipart-manifest': 'get'})
try:
copied_json = json.loads(copied_contents)
except ValueError:
self.fail("COPY didn't copy the manifest (invalid json on GET)")
self.assertEqual(source_json, copied_json)
self.assertEqual(manifest_etag, copied.etag)
copied.initialize()
self.assertEqual('image/jpeg', copied.content_type)
copied.initialize(parms={'multipart-manifest': 'get'})
copied_hash = hashlib.md5()
copied_hash.update(copied_contents)
self.assertEqual(copied_hash.hexdigest(), copied.etag)
self.assertEqual(slo_etag, copied.etag)
self.assertEqual(copied.metadata['test'], 'updated')
# verify the listing metadata
@ -771,13 +786,15 @@ class TestSlo(Base):
self.assertEqual(4 * 1024 * 1024 + 1, actual['bytes'])
self.assertEqual('application/octet-stream', actual['content_type'])
# the container listing should have the etag of the manifest contents
self.assertEqual(source.etag, actual['hash'])
self.assertEqual(manifest_etag, actual['hash'])
self.assertEqual(slo_etag, actual['slo_etag'])
self.assertIn('copied-abcde-manifest-only', names)
actual = names['copied-abcde-manifest-only']
self.assertEqual(4 * 1024 * 1024 + 1, actual['bytes'])
self.assertEqual('image/jpeg', actual['content_type'])
self.assertEqual(copied.etag, actual['hash'])
self.assertEqual(manifest_etag, actual['hash'])
self.assertEqual(slo_etag, actual['slo_etag'])
def test_slo_copy_the_manifest_account(self):
acct = self.env.conn.account_name

View File

@ -1094,13 +1094,14 @@ class TestSymlinkToSloSegments(Base):
parms={'multipart-manifest': 'put'})
# The container listing has the etag of the actual manifest object
# contents which we get using multipart-manifest=get. Arguably this
# should be the etag that we get when NOT using multipart-manifest=get,
# to be consistent with size and content-type. But here we at least
# verify that it remains consistent when the object is updated with a
# POST.
# contents which we get using multipart-manifest=get. New enough swift
# also exposes the etag that we get when NOT using
# multipart-manifest=get. Verify that both remain consistent when the
# object is updated with a POST.
file_item.initialize()
slo_etag = file_item.etag
file_item.initialize(parms={'multipart-manifest': 'get'})
expected_etag = file_item.etag
manifest_etag = file_item.etag
listing = self.env.container.files(parms={'format': 'json'})
for f_dict in listing:
@ -1108,7 +1109,8 @@ class TestSymlinkToSloSegments(Base):
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual('application/octet-stream',
f_dict['content_type'])
self.assertEqual(expected_etag, f_dict['hash'])
self.assertEqual(manifest_etag, f_dict['hash'])
self.assertEqual(slo_etag, f_dict['slo_etag'])
break
else:
self.fail('Failed to find manifest file in container listing')
@ -1126,7 +1128,8 @@ class TestSymlinkToSloSegments(Base):
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual(file_item.content_type,
f_dict['content_type'])
self.assertEqual(expected_etag, f_dict['hash'])
self.assertEqual(manifest_etag, f_dict['hash'])
self.assertEqual(slo_etag, f_dict['slo_etag'])
break
else:
self.fail('Failed to find manifest file in container listing')
@ -1144,7 +1147,8 @@ class TestSymlinkToSloSegments(Base):
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual(file_item.content_type,
f_dict['content_type'])
self.assertEqual(expected_etag, f_dict['hash'])
self.assertEqual(manifest_etag, f_dict['hash'])
self.assertEqual(slo_etag, f_dict['slo_etag'])
break
else:
self.fail('Failed to find manifest file in container listing')
@ -1156,7 +1160,7 @@ class TestSymlinkToSloSegments(Base):
expected_etag = expected_hash.hexdigest()
file_item = self.env.container.file('manifest-linkto-ab')
self.assertEqual(expected_etag, file_item.info()['etag'])
self.assertEqual('"%s"' % expected_etag, file_item.info()['etag'])
def test_slo_copy(self):
file_item = self.env.container.file("manifest-linkto-ab")
@ -1171,12 +1175,16 @@ class TestSymlinkToSloSegments(Base):
source = self.env.container.file("manifest-linkto-ab")
source_contents = source.read(parms={'multipart-manifest': 'get'})
source_json = json.loads(source_contents)
manifest_etag = hashlib.md5(source_contents).hexdigest()
source.initialize()
slo_etag = source.etag
self.assertEqual('application/octet-stream', source.content_type)
source.initialize(parms={'multipart-manifest': 'get'})
source_hash = hashlib.md5()
source_hash.update(source_contents)
self.assertEqual(source_hash.hexdigest(), source.etag)
self.assertEqual(manifest_etag, source.etag)
self.assertEqual('application/json; charset=utf-8',
source.content_type)
# now, copy the manifest
self.assertTrue(source.copy(self.env.container.name,
@ -1193,12 +1201,14 @@ class TestSymlinkToSloSegments(Base):
# make sure content of copied manifest is the same as original man.
self.assertEqual(source_json, copied_json)
copied.initialize()
self.assertEqual(copied.etag, slo_etag)
self.assertEqual('application/octet-stream', copied.content_type)
copied.initialize(parms={'multipart-manifest': 'get'})
copied_hash = hashlib.md5()
copied_hash.update(copied_contents)
self.assertEqual(copied_hash.hexdigest(), copied.etag)
self.assertEqual(copied_hash.hexdigest(), source.etag)
self.assertEqual(source_contents, copied_contents)
self.assertEqual(copied.etag, manifest_etag)
self.assertEqual('application/json; charset=utf-8',
copied.content_type)
# verify the listing metadata
listing = self.env.container.files(parms={'format': 'json'})
@ -1212,13 +1222,15 @@ class TestSymlinkToSloSegments(Base):
actual = names['manifest-linkto-ab']
self.assertEqual(2 * 1024 * 1024, actual['bytes'])
self.assertEqual('application/octet-stream', actual['content_type'])
self.assertEqual(source.etag, actual['hash'])
self.assertEqual(manifest_etag, actual['hash'])
self.assertEqual(slo_etag, actual['slo_etag'])
self.assertIn('copied-ab-manifest-only', names)
actual = names['copied-ab-manifest-only']
self.assertEqual(2 * 1024 * 1024, actual['bytes'])
self.assertEqual('application/octet-stream', actual['content_type'])
self.assertEqual(copied.etag, actual['hash'])
self.assertEqual(manifest_etag, actual['hash'])
self.assertEqual(slo_etag, actual['slo_etag'])
class TestSymlinkDlo(Base):

View File

@ -418,12 +418,18 @@ class TestSloPutManifest(SloTestCase):
list(self.slo.handle_multipart_put(req, fake_start_response))
def test_handle_multipart_put_success(self):
override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag'
headers = {
'Accept': 'test',
override_header: '; params=are important',
}
req = Request.blank(
'/v1/AUTH_test/c/man?multipart-manifest=put',
environ={'REQUEST_METHOD': 'PUT'}, headers={'Accept': 'test'},
environ={'REQUEST_METHOD': 'PUT'}, headers=headers,
body=test_json_data)
for h in ('X-Static-Large-Object', 'X-Object-Sysmeta-Slo-Etag',
'X-Object-Sysmeta-Slo-Size'):
# Sanity
self.assertNotIn(h, req.headers)
status, headers, body = self.call_slo(req)
@ -431,9 +437,16 @@ class TestSloPutManifest(SloTestCase):
self.assertIn(('Etag', gen_etag), headers)
self.assertIn('X-Static-Large-Object', req.headers)
self.assertEqual(req.headers['X-Static-Large-Object'], 'True')
self.assertIn('Etag', req.headers)
self.assertIn('X-Object-Sysmeta-Slo-Etag', req.headers)
self.assertIn('X-Object-Sysmeta-Container-Update-Override-Etag',
req.headers)
self.assertEqual(req.headers['X-Object-Sysmeta-Slo-Etag'],
md5hex('etagoftheobjectsegment'))
gen_etag.strip('"'))
self.assertEqual(
req.headers['X-Object-Sysmeta-Container-Update-Override-Etag'],
'%s; params=are important; slo_etag=%s' % (
req.headers['Etag'], gen_etag.strip('"')))
self.assertIn('X-Object-Sysmeta-Slo-Size', req.headers)
self.assertEqual(req.headers['X-Object-Sysmeta-Slo-Size'], '100')
self.assertIn('Content-Type', req.headers)
@ -968,13 +981,15 @@ class TestSloPutManifest(SloTestCase):
'size_bytes': None},
{'path': '/cont/object', 'etag': None,
'size_bytes': None, 'range': '10-40'}])
override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag'
req = Request.blank(
'/v1/AUTH_test/checktest/man_3?multipart-manifest=put',
environ={'REQUEST_METHOD': 'PUT'}, body=good_data)
environ={'REQUEST_METHOD': 'PUT'}, body=good_data,
headers={override_header: 'my custom etag'})
status, headers, body = self.call_slo(req)
self.assertEqual(('201 Created', ''), (status, body))
expected_etag = '"%s"' % md5hex('ab:1-1;b:0-0;aetagoftheobjectsegment:'
'10-40;')
expected_etag = '"%s"' % md5hex(
'ab:1-1;b:0-0;aetagoftheobjectsegment:10-40;')
self.assertEqual(expected_etag, dict(headers)['Etag'])
self.assertEqual([
('HEAD', '/v1/AUTH_test/checktest/a_1'), # Only once!
@ -984,6 +999,9 @@ class TestSloPutManifest(SloTestCase):
self.assertEqual(
('PUT', '/v1/AUTH_test/checktest/man_3?multipart-manifest=put'),
self.app.calls[-1])
self.assertEqual(
'my custom etag; slo_etag=%s' % expected_etag.strip('"'),
self.app.headers[-1].get(override_header))
# Check that we still populated the manifest properly from our HEADs
req = Request.blank(
@ -3854,5 +3872,6 @@ class TestSwiftInfo(unittest.TestCase):
self.assertEqual(1, mware.concurrency)
self.assertEqual(3, mware.bulk_deleter.delete_concurrency)
if __name__ == '__main__':
unittest.main()