From 188c07e12ab49ce71a76f2426a943a8f4265d374 Mon Sep 17 00:00:00 2001 From: Mahati Chamarthy Date: Wed, 1 Mar 2017 23:18:09 +0530 Subject: [PATCH] Limit number of revert tombstone SSYNC requests Revert tombstone only parts try to talk to all primary nodes - this fixes it to randomize selection within part_nodes. Corresponding probe test is modified to reflect this change. The primary improvement of this patch is the reconstuctor at a handoff node is being able to delete local tombstones when it succeeds to sync to less than all primary nodes. (Before this patch, it requires all nodes are responsible for the REVERT requests) The number of primary nodes to communicate with the reconstructor can be in dicsussion more but, right now with this patch, it's (replicas - k + 1) that is able to prevent stale read. *BONUS* - Fix mis-testsetting (was setting less replicas than ec_k + ec_m) for reconstructor ring in the unit test Co-Authored-By: Kota Tsuyuzaki Co-Authored-By: Clay Gerrard Change-Id: I05ce8fe75f1c4a7971cc8995b003df818b69b3c1 Closes-Bug: #1668857 --- swift/obj/reconstructor.py | 12 ++-- test/probe/test_reconstructor_revert.py | 75 ++++++++----------------- test/unit/obj/test_reconstructor.py | 9 ++- 3 files changed, 37 insertions(+), 59 deletions(-) diff --git a/swift/obj/reconstructor.py b/swift/obj/reconstructor.py index 93077f3665..675105210a 100644 --- a/swift/obj/reconstructor.py +++ b/swift/obj/reconstructor.py @@ -854,14 +854,18 @@ class ObjectReconstructor(Daemon): # push partitions off this node, but none of the suffixes # have any data fragments to hint at which node would be a # good candidate to receive the tombstones. + # + # we'll check a sample of other primaries before we delete our + # local tombstones, the exact number doesn't matter as long as + # it's enough to ensure the tombstones are not lost and less + # than *all the replicas* + nsample = (policy.ec_n_unique_fragments * + policy.ec_duplication_factor) - policy.ec_ndata + 1 jobs.append(build_job( job_type=REVERT, frag_index=None, suffixes=non_data_fragment_suffixes, - # this is super safe - sync_to=part_nodes, - # something like this would be probably be better - # sync_to=random.sample(part_nodes, 3), + sync_to=random.sample(part_nodes, nsample) )) # return a list of jobs for this part return jobs diff --git a/test/probe/test_reconstructor_revert.py b/test/probe/test_reconstructor_revert.py index 32524325b8..e4723526c9 100644 --- a/test/probe/test_reconstructor_revert.py +++ b/test/probe/test_reconstructor_revert.py @@ -173,7 +173,7 @@ class TestReconstructorRevert(ECProbeTest): client.put_object(self.url, self.token, self.container_name, self.object_name, contents=contents) - # now lets shut down a couple primaries + # now lets shut down a couple of primaries failed_nodes = random.sample(onodes, 2) for node in failed_nodes: self.kill_drive(self.device_dir('object', node)) @@ -197,61 +197,26 @@ class TestReconstructorRevert(ECProbeTest): self.fail('Node data on %r was not fully destroyed!' % (node,)) - # repair the first primary - self.revive_drive(self.device_dir('object', failed_nodes[0])) - - # run the reconstructor on the handoffs nodes, if there are no data - # frags to hint at the node index - each hnode syncs to all primaries - for hnode in hnodes: - self.reconstructor.once(number=self.config_number(hnode)) - - # because not all primaries are online, the tombstones remain - for hnode in hnodes: + # run the reconstructor on the handoff node multiple times until + # tombstone is pushed out - each handoff node syncs to a few + # primaries each time + iterations = 0 + while iterations < 52: + self.reconstructor.once(number=self.config_number(hnodes[0])) + iterations += 1 + # see if the tombstone is reverted try: - self.direct_get(hnode, opart) + self.direct_get(hnodes[0], opart) except direct_client.DirectClientException as err: self.assertEqual(err.http_status, 404) - self.assertEqual(err.http_headers['X-Backend-Timestamp'], - delete_timestamp) - else: - self.fail('Found obj data on %r' % hnode) - - # ... but it's on the first failed (now repaired) primary - try: - self.direct_get(failed_nodes[0], opart) - except direct_client.DirectClientException as err: - self.assertEqual(err.http_status, 404) - self.assertEqual(err.http_headers['X-Backend-Timestamp'], - delete_timestamp) + if 'X-Backend-Timestamp' not in err.http_headers: + # this means the tombstone is *gone* so it's reverted + break else: - self.fail('Found obj data on %r' % failed_nodes[0]) + self.fail('Still found tombstone on %r after %s iterations' % ( + hnodes[0], iterations)) - # repair the second primary - self.revive_drive(self.device_dir('object', failed_nodes[1])) - - # run the reconstructor on the *first* handoff node - self.reconstructor.once(number=self.config_number(hnodes[0])) - - # make sure it's tombstone was pushed out - try: - self.direct_get(hnodes[0], opart) - except direct_client.DirectClientException as err: - self.assertEqual(err.http_status, 404) - self.assertNotIn('X-Backend-Timestamp', err.http_headers) - else: - self.fail('Found obj data on %r' % hnodes[0]) - - # ... and now it's on the second failed primary too! - try: - self.direct_get(failed_nodes[1], opart) - except direct_client.DirectClientException as err: - self.assertEqual(err.http_status, 404) - self.assertEqual(err.http_headers['X-Backend-Timestamp'], - delete_timestamp) - else: - self.fail('Found obj data on %r' % failed_nodes[1]) - - # ... but still on the second handoff node + # tombstone is still on the *second* handoff try: self.direct_get(hnodes[1], opart) except direct_client.DirectClientException as err: @@ -261,10 +226,14 @@ class TestReconstructorRevert(ECProbeTest): else: self.fail('Found obj data on %r' % hnodes[1]) - # ... until it's next sync + # repair the primaries + self.revive_drive(self.device_dir('object', failed_nodes[0])) + self.revive_drive(self.device_dir('object', failed_nodes[1])) + + # run reconstructor on second handoff self.reconstructor.once(number=self.config_number(hnodes[1])) - # ... then it's tombstone is pushed off too! + # verify tombstone is reverted on the first pass try: self.direct_get(hnodes[1], opart) except direct_client.DirectClientException as err: diff --git a/test/unit/obj/test_reconstructor.py b/test/unit/obj/test_reconstructor.py index 21dab08459..99abd4f13e 100644 --- a/test/unit/obj/test_reconstructor.py +++ b/test/unit/obj/test_reconstructor.py @@ -1160,7 +1160,7 @@ class TestObjectReconstructor(unittest.TestCase): self.policy.object_ring.max_more_nodes = \ self.policy.object_ring.replicas self.ts_iter = make_timestamp_iter() - self.fabricated_ring = FabricatedRing() + self.fabricated_ring = FabricatedRing(replicas=14, devices=28) def _configure_reconstructor(self, **kwargs): self.conf.update(kwargs) @@ -1924,7 +1924,12 @@ class TestObjectReconstructor(unittest.TestCase): 'local_dev': self.local_dev, 'device': self.local_dev['device'], } - self.assertEqual(ring.replica_count, len(job['sync_to'])) + self.assertEqual(ring.replica_count, len(part_nodes)) + expected_samples = ( + (self.policy.ec_n_unique_fragments * + self.policy.ec_duplication_factor) - + self.policy.ec_ndata + 1) + self.assertEqual(len(job['sync_to']), expected_samples) for k, v in expected.items(): msg = 'expected %s != %s for %s' % ( v, job[k], k)