From 8d882095375dc53acdafafac40aa2efc3bafbcd4 Mon Sep 17 00:00:00 2001 From: Thiago da Silva Date: Thu, 15 Sep 2016 16:45:06 -0400 Subject: [PATCH] Return 404 on a GET if tombstone is newer Currently the proxy keeps iterating through the connections in hope of finding a success even if it already has found a tombstone (404). This change changes the code a little bit to compare the timestamp of a 200 and a 404, if the tombstone is newer, then it should be returned, instead of returning a stale 200. Closes-Bug: #1560574 Co-Authored-By: Tim Burke Change-Id: Ia81d6832709d18fe9a01ad247d75bf765e8a89f4 Signed-off-by: Thiago da Silva --- swift/proxy/controllers/base.py | 69 +++++++++++++++++-------- test/probe/test_object_handoff.py | 63 ++++++++++++++++++++++ test/unit/proxy/controllers/test_obj.py | 29 +++++++++++ 3 files changed, 140 insertions(+), 21 deletions(-) diff --git a/swift/proxy/controllers/base.py b/swift/proxy/controllers/base.py index a02de794d8..e588355235 100644 --- a/swift/proxy/controllers/base.py +++ b/swift/proxy/controllers/base.py @@ -92,7 +92,8 @@ def source_key(resp): :param resp: bufferedhttp response object """ - return Timestamp(resp.getheader('x-backend-timestamp') or + return Timestamp(resp.getheader('x-backend-data-timestamp') or + resp.getheader('x-backend-timestamp') or resp.getheader('x-put-timestamp') or resp.getheader('x-timestamp') or 0) @@ -759,6 +760,7 @@ class ResumingGetter(object): self.concurrency = concurrency self.node = None self.header_provider = header_provider + self.latest_404_timestamp = Timestamp(0) # stuff from request self.req_method = req.method @@ -1156,32 +1158,51 @@ class ResumingGetter(object): self.source_headers.append([]) close_swift_conn(possible_source) else: - if self.used_source_etag: - src_headers = dict( - (k.lower(), v) for k, v in - possible_source.getheaders()) + src_headers = dict( + (k.lower(), v) for k, v in + possible_source.getheaders()) + if self.used_source_etag and \ + self.used_source_etag != src_headers.get( + 'x-object-sysmeta-ec-etag', + src_headers.get('etag', '')).strip('"'): + self.statuses.append(HTTP_NOT_FOUND) + self.reasons.append('') + self.bodies.append('') + self.source_headers.append([]) + return False - if self.used_source_etag != src_headers.get( - 'x-object-sysmeta-ec-etag', - src_headers.get('etag', '')).strip('"'): - self.statuses.append(HTTP_NOT_FOUND) - self.reasons.append('') - self.bodies.append('') - self.source_headers.append([]) - return False - - self.statuses.append(possible_source.status) - self.reasons.append(possible_source.reason) - self.bodies.append(None) - self.source_headers.append(possible_source.getheaders()) - self.sources.append((possible_source, node)) - if not self.newest: # one good source is enough - return True + # a possible source should only be added as a valid source + # if its timestamp is newer than previously found tombstones + ps_timestamp = Timestamp( + src_headers.get('x-backend-data-timestamp') or + src_headers.get('x-backend-timestamp') or + src_headers.get('x-put-timestamp') or + src_headers.get('x-timestamp') or 0) + if ps_timestamp >= self.latest_404_timestamp: + self.statuses.append(possible_source.status) + self.reasons.append(possible_source.reason) + self.bodies.append(None) + self.source_headers.append(possible_source.getheaders()) + self.sources.append((possible_source, node)) + if not self.newest: # one good source is enough + return True else: + self.statuses.append(possible_source.status) self.reasons.append(possible_source.reason) self.bodies.append(possible_source.read()) self.source_headers.append(possible_source.getheaders()) + + # if 404, record the timestamp. If a good source shows up, its + # timestamp will be compared to the latest 404. + # For now checking only on objects, but future work could include + # the same check for account and containers. See lp 1560574. + if self.server_type == 'Object' and \ + possible_source.status == HTTP_NOT_FOUND: + hdrs = HeaderKeyDict(possible_source.getheaders()) + ts = Timestamp(hdrs.get('X-Backend-Timestamp', 0)) + if ts > self.latest_404_timestamp: + self.latest_404_timestamp = ts if possible_source.status == HTTP_INSUFFICIENT_STORAGE: self.app.error_limit(node, _('ERROR Insufficient Storage')) elif is_server_error(possible_source.status): @@ -1219,6 +1240,12 @@ class ResumingGetter(object): # ran out of nodes, see if any stragglers will finish any(pile) + # this helps weed out any sucess status that were found before a 404 + # and added to the list in the case of x-newest. + if self.sources: + self.sources = [s for s in self.sources + if source_key(s[0]) >= self.latest_404_timestamp] + if self.sources: self.sources.sort(key=lambda s: source_key(s[0])) source, node = self.sources.pop() diff --git a/test/probe/test_object_handoff.py b/test/probe/test_object_handoff.py index 6326520166..ddc35653da 100644 --- a/test/probe/test_object_handoff.py +++ b/test/probe/test_object_handoff.py @@ -246,6 +246,69 @@ class TestObjectHandoff(ReplProbeTest): else: self.fail("Expected ClientException but didn't get it") + def test_stale_reads(self): + # Create container + container = 'container-%s' % uuid4() + client.put_container(self.url, self.token, container, + headers={'X-Storage-Policy': + self.policy.name}) + + # Kill one primary obj server + obj = 'object-%s' % uuid4() + opart, onodes = self.object_ring.get_nodes( + self.account, container, obj) + onode = onodes[0] + kill_server((onode['ip'], onode['port']), self.ipport2server) + + # Create container/obj (goes to two primaries and one handoff) + client.put_object(self.url, self.token, container, obj, 'VERIFY') + odata = client.get_object(self.url, self.token, container, obj)[-1] + if odata != 'VERIFY': + raise Exception('Object GET did not return VERIFY, instead it ' + 'returned: %s' % repr(odata)) + + # Stash the on disk data from a primary for future comparison with the + # handoff - this may not equal 'VERIFY' if for example the proxy has + # crypto enabled + direct_get_data = direct_client.direct_get_object( + onodes[1], opart, self.account, container, obj, headers={ + 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] + + # Restart the first container/obj primary server again + start_server((onode['ip'], onode['port']), self.ipport2server) + + # send a delete request to primaries + client.delete_object(self.url, self.token, container, obj) + + # there should be .ts files in all primaries now + for node in onodes: + try: + direct_client.direct_get_object( + node, opart, self.account, container, obj, headers={ + 'X-Backend-Storage-Policy-Index': self.policy.idx}) + except ClientException as err: + self.assertEqual(err.http_status, 404) + else: + self.fail("Expected ClientException but didn't get it") + + # verify that handoff still has the data, DELETEs should have gone + # only to primaries + another_onode = next(self.object_ring.get_more_nodes(opart)) + handoff_data = direct_client.direct_get_object( + another_onode, opart, self.account, container, obj, headers={ + 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] + self.assertEqual(handoff_data, direct_get_data) + + # Indirectly (i.e., through proxy) try to GET object, it should return + # a 404, before bug #1560574, the proxy would return the stale object + # from the handoff + try: + client.get_object(self.url, self.token, container, obj) + except client.ClientException as err: + self.assertEqual(err.http_status, 404) + else: + self.fail("Expected ClientException but didn't get it") + class TestECObjectHandoff(ECProbeTest): diff --git a/test/unit/proxy/controllers/test_obj.py b/test/unit/proxy/controllers/test_obj.py index 32eaf4fcf5..810330609d 100644 --- a/test/unit/proxy/controllers/test_obj.py +++ b/test/unit/proxy/controllers/test_obj.py @@ -1352,6 +1352,35 @@ class TestReplicatedObjController(BaseObjectControllerMixin, resp = req.get_response(self.app) self.assertEqual(resp.status_int, 404) + def test_GET_not_found_when_404_newer(self): + # if proxy receives a 404, it keeps waiting for other connections until + # max number of nodes in hopes of finding an object, but if 404 is + # more recent than a 200, then it should ignore 200 and return 404 + req = swift.common.swob.Request.blank('/v1/a/c/o') + codes = [404] * self.obj_ring.replicas + \ + [200] * self.obj_ring.max_more_nodes + ts_iter = iter([2] * self.obj_ring.replicas + + [1] * self.obj_ring.max_more_nodes) + with set_http_connect(*codes, timestamps=ts_iter): + resp = req.get_response(self.app) + self.assertEqual(resp.status_int, 404) + + def test_GET_x_newest_not_found_when_404_newer(self): + # if proxy receives a 404, it keeps waiting for other connections until + # max number of nodes in hopes of finding an object, but if 404 is + # more recent than a 200, then it should ignore 200 and return 404 + req = swift.common.swob.Request.blank('/v1/a/c/o', + headers={'X-Newest': 'true'}) + codes = ([200] + + [404] * self.obj_ring.replicas + + [200] * (self.obj_ring.max_more_nodes - 1)) + ts_iter = iter([1] + + [2] * self.obj_ring.replicas + + [1] * (self.obj_ring.max_more_nodes - 1)) + with set_http_connect(*codes, timestamps=ts_iter): + resp = req.get_response(self.app) + self.assertEqual(resp.status_int, 404) + def test_PUT_delete_at(self): t = str(int(time.time() + 100)) req = swob.Request.blank('/v1/a/c/o', method='PUT', body='',