From 64da481ccde9863b0b062cadb27402a5e18b876e Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Mon, 10 Apr 2017 18:09:53 -0700 Subject: [PATCH] Tolerate RFC-compliant ETags Since time immemorial, Swift has returned unquoted ETags for plain-old Swift objects -- I hear tell that we once tried to change this, but quickly backed it out when some clients broke. However, some proxies (such as nginx) apparently may force the ETag to adhere to the RFC, which states [1]: An entity-tag consists of an opaque *quoted* string (emphasis mine). See the related bug for an instance of this happening. Since we can still get the original ETag easily, we should tolerate the more-compliant format. [1] https://tools.ietf.org/html/rfc2616.html#section-3.11 or, if you prefer the new ones, https://tools.ietf.org/html/rfc7232#section-2.3 Change-Id: I7cfacab3f250a9443af4b67111ef8088d37d9171 Closes-Bug: 1681529 Related-Bug: 1678976 --- swiftclient/service.py | 25 +++++++++++++++++++------ tests/unit/test_service.py | 38 +++++++++++++++++++++++--------------- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/swiftclient/service.py b/swiftclient/service.py index ce0c4fc..c842d23 100644 --- a/swiftclient/service.py +++ b/swiftclient/service.py @@ -377,10 +377,23 @@ class _SwiftReader(object): self._actual_read = 0 self._content_length = None self._actual_md5 = None - self._expected_etag = headers.get('etag') + self._expected_md5 = headers.get('etag', '') - if ('x-object-manifest' not in headers - and 'x-static-large-object' not in headers and checksum): + if len(self._expected_md5) > 1 and self._expected_md5[0] == '"' \ + and self._expected_md5[-1] == '"': + self._expected_md5 = self._expected_md5[1:-1] + + # Some headers indicate the MD5 of the response + # definitely *won't* match the ETag + bad_md5_headers = set([ + 'x-object-manifest', + 'x-static-large-object', + ]) + if bad_md5_headers.intersection(headers): + # This isn't a useful checksum + self._expected_md5 = '' + + if self._expected_md5 and checksum: self._actual_md5 = md5() if 'content-length' in headers: @@ -398,12 +411,12 @@ class _SwiftReader(object): self._check_contents() def _check_contents(self): - if self._actual_md5 and self._expected_etag: + if self._actual_md5 and self._expected_md5: etag = self._actual_md5.hexdigest() - if etag != self._expected_etag: + if etag != self._expected_md5: raise SwiftError('Error downloading {0}: md5sum != etag, ' '{1} != {2}'.format( - self._path, etag, self._expected_etag)) + self._path, etag, self._expected_md5)) if (self._content_length is not None and self._actual_read != self._content_length): diff --git a/tests/unit/test_service.py b/tests/unit/test_service.py index 378b70b..0db5b7a 100644 --- a/tests/unit/test_service.py +++ b/tests/unit/test_service.py @@ -119,25 +119,26 @@ class TestSwiftReader(unittest.TestCase): self.assertEqual(sr._path, 'path') self.assertEqual(sr._body, 'body') self.assertIsNone(sr._content_length) - self.assertIsNone(sr._expected_etag) + self.assertFalse(sr._expected_md5) - self.assertIsNotNone(sr._actual_md5) - self.assertIs(type(sr._actual_md5), self.md5_type) + self.assertIsNone(sr._actual_md5) def test_create_with_large_object_headers(self): # md5 should not be initialized if large object headers are present - sr = self.sr('path', 'body', {'x-object-manifest': 'test'}) + sr = self.sr('path', 'body', {'x-object-manifest': 'test', + 'etag': '"%s"' % ('0' * 32)}) self.assertEqual(sr._path, 'path') self.assertEqual(sr._body, 'body') self.assertIsNone(sr._content_length) - self.assertIsNone(sr._expected_etag) + self.assertFalse(sr._expected_md5) self.assertIsNone(sr._actual_md5) - sr = self.sr('path', 'body', {'x-static-large-object': 'test'}) + sr = self.sr('path', 'body', {'x-static-large-object': 'test', + 'etag': '"%s"' % ('0' * 32)}) self.assertEqual(sr._path, 'path') self.assertEqual(sr._body, 'body') self.assertIsNone(sr._content_length) - self.assertIsNone(sr._expected_etag) + self.assertFalse(sr._expected_md5) self.assertIsNone(sr._actual_md5) def test_create_with_ignore_checksum(self): @@ -146,7 +147,7 @@ class TestSwiftReader(unittest.TestCase): self.assertEqual(sr._path, 'path') self.assertEqual(sr._body, 'body') self.assertIsNone(sr._content_length) - self.assertIsNone(sr._expected_etag) + self.assertFalse(sr._expected_md5) self.assertIsNone(sr._actual_md5) def test_create_with_content_length(self): @@ -155,10 +156,9 @@ class TestSwiftReader(unittest.TestCase): self.assertEqual(sr._path, 'path') self.assertEqual(sr._body, 'body') self.assertEqual(sr._content_length, 5) - self.assertIsNone(sr._expected_etag) + self.assertFalse(sr._expected_md5) - self.assertIsNotNone(sr._actual_md5) - self.assertIs(type(sr._actual_md5), self.md5_type) + self.assertIsNone(sr._actual_md5) # Check Contentlength raises error if it isn't an integer self.assertRaises(SwiftError, self.sr, 'path', 'body', @@ -175,11 +175,17 @@ class TestSwiftReader(unittest.TestCase): # Check error is raised if expected etag doesn't match calculated md5. # md5 for a SwiftReader that has done nothing is # d41d8cd98f00b204e9800998ecf8427e i.e md5 of nothing - sr = self.sr('path', BytesIO(b'body'), {'etag': 'doesntmatch'}) + sr = self.sr('path', BytesIO(b'body'), + {'etag': md5(b'doesntmatch').hexdigest()}) self.assertRaises(SwiftError, _consume, sr) sr = self.sr('path', BytesIO(b'body'), - {'etag': '841a2d689ad86bd1611447453c22c6fc'}) + {'etag': md5(b'body').hexdigest()}) + _consume(sr) + + # Should still work if etag was quoted + sr = self.sr('path', BytesIO(b'body'), + {'etag': '"%s"' % md5(b'body').hexdigest()}) _consume(sr) # Check error is raised if SwiftReader doesn't read the same length @@ -191,11 +197,13 @@ class TestSwiftReader(unittest.TestCase): _consume(sr) # Check that the iterator generates expected length and etag values - sr = self.sr('path', ['abc'.encode()] * 3, {}) + sr = self.sr('path', ['abc'.encode()] * 3, + {'content-length': 9, + 'etag': md5('abc'.encode() * 3).hexdigest()}) _consume(sr) self.assertEqual(sr._actual_read, 9) self.assertEqual(sr._actual_md5.hexdigest(), - '97ac82a5b825239e782d0339e2d7b910') + md5('abc'.encode() * 3).hexdigest()) class _TestServiceBase(unittest.TestCase):