From 71993d84e88cc1d5f7742182905cace21c7e88cb Mon Sep 17 00:00:00 2001 From: "Lisak, Peter" Date: Fri, 9 Oct 2015 16:13:55 +0200 Subject: [PATCH] swift-ring-builder can't remove a device with zero weight If a device with 0 weight is tried to remove, the following rebalance does not write changes into builder file. Scenario: $ swift-ring-builder object.builder set_weight --id 1 0.00 $ swift-ring-builder object.builder rebalance Wait for moving files out of the device id=1. $ swift-ring-builder object.builder remove --id 1 $ swift-ring-builder object.builder rebalance In fact, the device id=1 is not removed after rebalance (must be --force used). Change-Id: Iad5a444023eae9882a3addd7f119ff4d18559ddd --- swift/cli/ring_builder_analyzer.py | 16 +++++++++------- swift/cli/ringbuilder.py | 4 ++-- swift/common/ring/builder.py | 6 ++++-- test/unit/cli/test_ringbuilder.py | 15 +++++++++++++++ test/unit/common/ring/test_builder.py | 24 ++++++++++++++++++++---- 5 files changed, 50 insertions(+), 15 deletions(-) diff --git a/swift/cli/ring_builder_analyzer.py b/swift/cli/ring_builder_analyzer.py index 8e3d7b5ebe..1ae35ae031 100644 --- a/swift/cli/ring_builder_analyzer.py +++ b/swift/cli/ring_builder_analyzer.py @@ -306,18 +306,20 @@ def run_scenario(scenario): command_f(*command) rebalance_number = 1 - parts_moved, old_balance = rb.rebalance(seed=seed) + parts_moved, old_balance, removed_devs = rb.rebalance(seed=seed) rb.pretend_min_part_hours_passed() - print "\tRebalance 1: moved %d parts, balance is %.6f" % ( - parts_moved, old_balance) + print "\tRebalance 1: moved %d parts, balance is %.6f, \ + %d removed devs" % ( + parts_moved, old_balance, removed_devs) while True: rebalance_number += 1 - parts_moved, new_balance = rb.rebalance(seed=seed) + parts_moved, new_balance, removed_devs = rb.rebalance(seed=seed) rb.pretend_min_part_hours_passed() - print "\tRebalance %d: moved %d parts, balance is %.6f" % ( - rebalance_number, parts_moved, new_balance) - if parts_moved == 0: + print "\tRebalance %d: moved %d parts, balance is %.6f, \ + %d removed devs" % ( + rebalance_number, parts_moved, new_balance, removed_devs) + if parts_moved == 0 and removed_devs == 0: break if abs(new_balance - old_balance) < 1 and not ( old_balance == builder.MAX_BALANCE and diff --git a/swift/cli/ringbuilder.py b/swift/cli/ringbuilder.py index 70fb80b367..caac43065f 100755 --- a/swift/cli/ringbuilder.py +++ b/swift/cli/ringbuilder.py @@ -797,7 +797,7 @@ swift-ring-builder rebalance [options] devs_changed = builder.devs_changed try: last_balance = builder.get_balance() - parts, balance = builder.rebalance(seed=get_seed(3)) + parts, balance, removed_devs = builder.rebalance(seed=get_seed(3)) except exceptions.RingBuilderError as e: print('-' * 79) print("An error has occurred during ring validation. Common\n" @@ -807,7 +807,7 @@ swift-ring-builder rebalance [options] (e,)) print('-' * 79) exit(EXIT_ERROR) - if not (parts or options.force): + if not (parts or options.force or removed_devs): print('No partitions could be reassigned.') print('Either none need to be or none can be due to ' 'min_part_hours [%s].' % builder.min_part_hours) diff --git a/swift/common/ring/builder.py b/swift/common/ring/builder.py index 8d4802f6ef..16d1d71ec8 100644 --- a/swift/common/ring/builder.py +++ b/swift/common/ring/builder.py @@ -425,7 +425,7 @@ class RingBuilder(object): self._initial_balance() self.devs_changed = False self._build_dispersion_graph() - return self.parts, self.get_balance() + return self.parts, self.get_balance(), 0 changed_parts = 0 self._update_last_part_moves() last_balance = 0 @@ -437,6 +437,7 @@ class RingBuilder(object): self._set_parts_wanted() self._reassign_parts(new_parts) changed_parts += len(new_parts) + removed_devs = 0 while True: reassign_parts = self._gather_reassign_parts() changed_parts += len(reassign_parts) @@ -448,6 +449,7 @@ class RingBuilder(object): remove_dev_id = self._remove_devs.pop()['id'] self.logger.debug("Removing dev %d", remove_dev_id) self.devs[remove_dev_id] = None + removed_devs += 1 balance = self.get_balance() if balance < 1 or abs(last_balance - balance) < 1 or \ changed_parts == self.parts: @@ -457,7 +459,7 @@ class RingBuilder(object): self.version += 1 changed_parts = self._build_dispersion_graph(old_replica2part2dev) - return changed_parts, balance + return changed_parts, balance, removed_devs def _build_dispersion_graph(self, old_replica2part2dev=None): """ diff --git a/test/unit/cli/test_ringbuilder.py b/test/unit/cli/test_ringbuilder.py index b3b12a62a7..dcfb00dc49 100644 --- a/test/unit/cli/test_ringbuilder.py +++ b/test/unit/cli/test_ringbuilder.py @@ -1696,6 +1696,21 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): err = e self.assertEqual(err.code, 2) + def test_rebalance_remove_zero_weighted_device(self): + self.create_sample_ring() + ring = RingBuilder.load(self.tmpfile) + ring.set_dev_weight(3, 0.0) + ring.rebalance() + ring.remove_dev(3) + ring.save(self.tmpfile) + + # Test rebalance after remove 0 weighted device + argv = ["", self.tmpfile, "rebalance", "3"] + self.assertRaises(SystemExit, ringbuilder.main, argv) + ring = RingBuilder.load(self.tmpfile) + self.assertTrue(ring.validate()) + self.assertEqual(ring.devs[3], None) + def test_write_ring(self): self.create_sample_ring() argv = ["", self.tmpfile, "rebalance"] diff --git a/test/unit/common/ring/test_builder.py b/test/unit/common/ring/test_builder.py index 70eb7b84c1..f31e4ab747 100644 --- a/test/unit/common/ring/test_builder.py +++ b/test/unit/common/ring/test_builder.py @@ -309,6 +309,22 @@ class TestRingBuilder(unittest.TestCase): rb.rebalance() rb.validate() + def test_remove_zero_weighted(self): + rb = ring.RingBuilder(8, 3, 0) + rb.add_dev({'id': 0, 'device': 'd0', 'ip': '10.0.0.1', + 'port': 6002, 'weight': 1000.0, 'region': 0, 'zone': 1}) + rb.add_dev({'id': 1, 'device': 'd1', 'ip': '10.0.0.2', + 'port': 6002, 'weight': 0.0, 'region': 0, 'zone': 2}) + rb.add_dev({'id': 2, 'device': 'd2', 'ip': '10.0.0.3', + 'port': 6002, 'weight': 1000.0, 'region': 0, 'zone': 3}) + rb.add_dev({'id': 3, 'device': 'd3', 'ip': '10.0.0.1', + 'port': 6002, 'weight': 1000.0, 'region': 0, 'zone': 1}) + rb.rebalance() + + rb.remove_dev(1) + parts, balance, removed = rb.rebalance() + self.assertEqual(removed, 1) + def test_shuffled_gather(self): if self._shuffled_gather_helper() and \ self._shuffled_gather_helper(): @@ -366,7 +382,7 @@ class TestRingBuilder(unittest.TestCase): rb.add_dev({'region': 1, 'zone': 2, 'weight': 4000.0, 'ip': '10.1.1.3', 'port': 10000, 'device': 'sdb'}) - _, balance = rb.rebalance(seed=2) + _, balance, _ = rb.rebalance(seed=2) # maybe not *perfect*, but should be close self.assertTrue(balance <= 1) @@ -795,7 +811,7 @@ class TestRingBuilder(unittest.TestCase): # it's as balanced as it gets, so nothing moves anymore rb.pretend_min_part_hours_passed() - parts_moved, _balance = rb.rebalance(seed=1) + parts_moved, _balance, _removed = rb.rebalance(seed=1) self.assertEqual(parts_moved, 0) def test_region_fullness_with_balanceable_ring(self): @@ -867,7 +883,7 @@ class TestRingBuilder(unittest.TestCase): rb.add_dev({'id': 3, 'region': 1, 'zone': 1, 'weight': 0.25, 'ip': '127.0.0.1', 'port': 10004, 'device': 'sda1'}) rb.pretend_min_part_hours_passed() - changed_parts, _balance = rb.rebalance(seed=2) + changed_parts, _balance, _removed = rb.rebalance(seed=2) # there's not enough room in r1 for every partition to have a replica # in it, so only 86 assignments occur in r1 (that's ~1/5 of the total, @@ -920,7 +936,7 @@ class TestRingBuilder(unittest.TestCase): for weight in range(0, 101, 10): rb.set_dev_weight(5, weight) rb.pretend_min_part_hours_passed() - changed_parts, _balance = rb.rebalance(seed=2) + changed_parts, _balance, _removed = rb.rebalance(seed=2) rb.validate() moved_partitions.append(changed_parts) # Ensure that the second region has enough partitions