From aa82d2cba82209f1bf3944c6d2a67965af5a1540 Mon Sep 17 00:00:00 2001 From: Samuel Merritt Date: Thu, 29 Jun 2017 10:23:38 -0700 Subject: [PATCH] Save ring builder if dispersion changes There are cases where a rebalance improves dispersion, but doesn't improve balance. This is because the balance of a ring builder is taken to be the balance of its least-balanced device, so if there's a device that has no partitions, wants some, but can't get them, then we'll never save the ring builder even if every other device in the ring got better. We can detect this situation by looking at the dispersion number; if it changes, then the rebalance needs to be saved in order to continue to make progress. Partial-Bug: #1697543 Change-Id: Ie239b958fc7e0547ffda2bebf61546bd4ef3d829 --- swift/cli/ringbuilder.py | 24 +++++++- test/unit/cli/test_ringbuilder.py | 95 +++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 3 deletions(-) diff --git a/swift/cli/ringbuilder.py b/swift/cli/ringbuilder.py index 06ac83e3b4..882c484326 100644 --- a/swift/cli/ringbuilder.py +++ b/swift/cli/ringbuilder.py @@ -898,7 +898,9 @@ swift-ring-builder rebalance [options] min_part_seconds_left = builder.min_part_seconds_left try: last_balance = builder.get_balance() + last_dispersion = builder.dispersion parts, balance, removed_devs = builder.rebalance(seed=get_seed(3)) + dispersion = builder.dispersion except exceptions.RingBuilderError as e: print('-' * 79) print("An error has occurred during ring validation. Common\n" @@ -922,9 +924,25 @@ swift-ring-builder rebalance [options] # special value(MAX_BALANCE) until zero weighted device return all # its partitions. So we cannot check balance has changed. # Thus we need to check balance or last_balance is special value. - if not options.force and \ - not devs_changed and abs(last_balance - balance) < 1 and \ - not (last_balance == MAX_BALANCE and balance == MAX_BALANCE): + be_cowardly = True + if options.force: + # User said save it, so we save it. + be_cowardly = False + elif devs_changed: + # We must save if a device changed; this could be something like + # a changed IP address. + be_cowardly = False + else: + # If balance or dispersion changed (presumably improved), then + # we should save to get the improvement. + balance_changed = ( + abs(last_balance - balance) >= 1 or + (last_balance == MAX_BALANCE and balance == MAX_BALANCE)) + dispersion_changed = abs(last_dispersion - dispersion) >= 1 + if balance_changed or dispersion_changed: + be_cowardly = False + + if be_cowardly: print('Cowardly refusing to save rebalance as it did not change ' 'at least 1%.') exit(EXIT_WARNING) diff --git a/test/unit/cli/test_ringbuilder.py b/test/unit/cli/test_ringbuilder.py index 65c096f181..3b40bb02ba 100644 --- a/test/unit/cli/test_ringbuilder.py +++ b/test/unit/cli/test_ringbuilder.py @@ -1916,7 +1916,102 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring.save(self.tmpfile) # Test No change to the device argv = ["", self.tmpfile, "rebalance", "3"] + with mock.patch('swift.common.ring.RingBuilder.save') as mock_save: + self.assertSystemExit(EXIT_WARNING, ringbuilder.main, argv) + self.assertEqual(len(mock_save.calls), 0) + + def test_rebalance_saves_dispersion_improvement(self): + # We set up a situation where dispersion improves but balance + # doesn't. We construct a ring with one zone, then add a second zone + # concurrently with a new device in the first zone. That first + # device won't acquire any partitions, so the ring's balance won't + # change. However, dispersion will improve. + + ring = RingBuilder(6, 5, 1) + ring.add_dev({ + 'region': 1, 'zone': 1, + 'ip': '10.0.0.1', 'port': 20001, 'weight': 1000, + 'device': 'sda'}) + ring.add_dev({ + 'region': 1, 'zone': 1, + 'ip': '10.0.0.1', 'port': 20001, 'weight': 1000, + 'device': 'sdb'}) + ring.add_dev({ + 'region': 1, 'zone': 1, + 'ip': '10.0.0.1', 'port': 20001, 'weight': 1000, + 'device': 'sdc'}) + ring.add_dev({ + 'region': 1, 'zone': 1, + 'ip': '10.0.0.1', 'port': 20001, 'weight': 1000, + 'device': 'sdd'}) + ring.add_dev({ + 'region': 1, 'zone': 1, + 'ip': '10.0.0.1', 'port': 20001, 'weight': 1000, + 'device': 'sde'}) + ring.rebalance() + + # The last guy in zone 1 + ring.add_dev({ + 'region': 1, 'zone': 1, + 'ip': '10.0.0.1', 'port': 20001, 'weight': 1000, + 'device': 'sdf'}) + + # Add zone 2 (same total weight as zone 1) + ring.add_dev({ + 'region': 1, 'zone': 2, + 'ip': '10.0.0.2', 'port': 20001, 'weight': 1000, + 'device': 'sda'}) + ring.add_dev({ + 'region': 1, 'zone': 2, + 'ip': '10.0.0.2', 'port': 20001, 'weight': 1000, + 'device': 'sdb'}) + ring.add_dev({ + 'region': 1, 'zone': 2, + 'ip': '10.0.0.2', 'port': 20001, 'weight': 1000, + 'device': 'sdc'}) + ring.add_dev({ + 'region': 1, 'zone': 2, + 'ip': '10.0.0.2', 'port': 20001, 'weight': 1000, + 'device': 'sdd'}) + ring.add_dev({ + 'region': 1, 'zone': 2, + 'ip': '10.0.0.2', 'port': 20001, 'weight': 1000, + 'device': 'sde'}) + ring.add_dev({ + 'region': 1, 'zone': 2, + 'ip': '10.0.0.2', 'port': 20001, 'weight': 1000, + 'device': 'sdf'}) + ring.pretend_min_part_hours_passed() + ring.save(self.tmpfile) + del ring + + # Rebalance once: this gets 1/5 replica into zone 2; the ring is + # saved because devices changed. + argv = ["", self.tmpfile, "rebalance", "5759339"] self.assertSystemExit(EXIT_WARNING, ringbuilder.main, argv) + rb = RingBuilder.load(self.tmpfile) + self.assertEqual(rb.dispersion, 100) + self.assertEqual(rb.get_balance(), 100) + self.run_srb('pretend_min_part_hours_passed') + + # Rebalance again: this gets 2/5 replica into zone 2, but no devices + # changed and the balance stays the same. The only improvement is + # dispersion. + + captured = {} + + def capture_save(rb, path): + captured['dispersion'] = rb.dispersion + captured['balance'] = rb.get_balance() + # The warning is benign; it's just telling the user to keep on + # rebalancing. The important assertion is that the builder was + # saved. + with mock.patch('swift.common.ring.RingBuilder.save', capture_save): + self.assertSystemExit(EXIT_WARNING, ringbuilder.main, argv) + self.assertEqual(captured, { + 'dispersion': 0, + 'balance': 100, + }) def test_rebalance_no_devices(self): # Test no devices