Use smaller quorum size in proxy for even numbers of replicas

Requiring 2/2 backends for PUT requests means that the cluster can't
tolerate a single failure. Likewise, if you have 4 replicas in 2
regions, requiring 3/4 on a POST request means you cannot POST with
your inter-region link down or congested.

This changes the (replication) quorum size in the proxy to be at least
half the nodes instead of a majority of the nodes.

Daemons that were looking for a majority remain unchanged. The
container reconciler, replicator, and updater still require majorities
so their functioning is unchanged.

Odd numbers of replicas are unaffected by this commit.

Change-Id: I3b07ff0222aba6293ad7d60afe1747acafbe6ce4
This commit is contained in:
Samuel Merritt 2016-04-27 16:59:00 -05:00
parent ce6c850258
commit 29544a9e17
9 changed files with 65 additions and 48 deletions

View File

@ -2937,6 +2937,10 @@ def public(func):
return func
def majority_size(n):
return (n // 2) + 1
def quorum_size(n):
"""
quorum size as it applies to services that use 'replication' for data
@ -2946,7 +2950,7 @@ def quorum_size(n):
Number of successful backend requests needed for the proxy to consider
the client request successful.
"""
return (n // 2) + 1
return (n + 1) // 2
def rsync_ip(ip):

View File

@ -25,7 +25,7 @@ from swift.common.direct_client import (
direct_head_container, direct_delete_container_object,
direct_put_container_object, ClientException)
from swift.common.internal_client import InternalClient, UnexpectedResponse
from swift.common.utils import get_logger, split_path, quorum_size, \
from swift.common.utils import get_logger, split_path, majority_size, \
FileLikeIter, Timestamp, last_modified_date_to_timestamp, \
LRUCache, decode_timestamps
@ -194,7 +194,7 @@ def add_to_reconciler_queue(container_ring, account, container, obj,
server
:returns: .misplaced_object container name, False on failure. "Success"
means a quorum of containers got the update.
means a majority of containers got the update.
"""
container_name = get_reconciler_container_name(obj_timestamp)
object_name = get_reconciler_obj_name(obj_policy_index, account,
@ -232,7 +232,7 @@ def add_to_reconciler_queue(container_ring, account, container, obj,
response_timeout=response_timeout)
successes = sum(pile)
if successes >= quorum_size(len(nodes)):
if successes >= majority_size(len(nodes)):
return container_name
else:
return False
@ -289,7 +289,7 @@ def direct_get_container_policy_index(container_ring, account_name,
:param container_ring: ring in which to look up the container locations
:param account_name: name of the container's account
:param container_name: name of the container
:returns: storage policy index, or None if it couldn't get a quorum
:returns: storage policy index, or None if it couldn't get a majority
"""
def _eat_client_exception(*args):
try:
@ -307,7 +307,7 @@ def direct_get_container_policy_index(container_ring, account_name,
container_name)
headers = [x for x in pile if x is not None]
if len(headers) < quorum_size(len(nodes)):
if len(headers) < majority_size(len(nodes)):
return
return best_policy_index(headers)

View File

@ -31,7 +31,7 @@ from swift.common.exceptions import DeviceUnavailable
from swift.common.http import is_success
from swift.common.db import DatabaseAlreadyExists
from swift.common.utils import (Timestamp, hash_path,
storage_directory, quorum_size)
storage_directory, majority_size)
class ContainerReplicator(db_replicator.Replicator):
@ -202,9 +202,9 @@ class ContainerReplicator(db_replicator.Replicator):
broker.update_reconciler_sync(info['max_row'])
return
max_sync = self.dump_to_reconciler(broker, point)
success = responses.count(True) >= quorum_size(len(responses))
success = responses.count(True) >= majority_size(len(responses))
if max_sync > point and success:
# to be safe, only slide up the sync point with a quorum on
# to be safe, only slide up the sync point with a majority on
# replication
broker.update_reconciler_sync(max_sync)

View File

@ -31,7 +31,7 @@ from swift.common.bufferedhttp import http_connect
from swift.common.exceptions import ConnectionTimeout
from swift.common.ring import Ring
from swift.common.utils import get_logger, config_true_value, ismount, \
dump_recon_cache, quorum_size, Timestamp
dump_recon_cache, majority_size, Timestamp
from swift.common.daemon import Daemon
from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR
@ -238,7 +238,7 @@ class ContainerUpdater(Daemon):
for event in events:
if is_success(event.wait()):
successes += 1
if successes >= quorum_size(len(events)):
if successes >= majority_size(len(events)):
self.logger.increment('successes')
self.successes += 1
self.logger.debug(

View File

@ -1119,9 +1119,9 @@ class TestStoragePolicies(unittest.TestCase):
def test_quorum_size_replication(self):
expected_sizes = {1: 1,
2: 2,
2: 1,
3: 2,
4: 3,
4: 2,
5: 3}
for n, expected in expected_sizes.items():
policy = StoragePolicy(0, 'zero',

View File

@ -2504,13 +2504,23 @@ cluster_dfw1 = http://dfw1.host/v1/
self.assertFalse(utils.streq_const_time('a', 'aaaaa'))
self.assertFalse(utils.streq_const_time('ABC123', 'abc123'))
def test_replication_quorum_size(self):
def test_quorum_size(self):
expected_sizes = {1: 1,
2: 1,
3: 2,
4: 2,
5: 3}
got_sizes = dict([(n, utils.quorum_size(n))
for n in expected_sizes])
self.assertEqual(expected_sizes, got_sizes)
def test_majority_size(self):
expected_sizes = {1: 1,
2: 2,
3: 2,
4: 3,
5: 3}
got_sizes = dict([(n, utils.quorum_size(n))
got_sizes = dict([(n, utils.majority_size(n))
for n in expected_sizes])
self.assertEqual(expected_sizes, got_sizes)

View File

@ -320,16 +320,16 @@ class TestAccountController4Replicas(TestAccountController):
((201, 201, 201, 201), 201),
((201, 201, 201, 404), 201),
((201, 201, 201, 503), 201),
((201, 201, 404, 404), 503),
((201, 201, 404, 503), 503),
((201, 201, 503, 503), 503),
((201, 201, 404, 404), 201),
((201, 201, 404, 503), 201),
((201, 201, 503, 503), 201),
((201, 404, 404, 404), 404),
((201, 404, 404, 503), 503),
((201, 404, 404, 503), 404),
((201, 404, 503, 503), 503),
((201, 503, 503, 503), 503),
((404, 404, 404, 404), 404),
((404, 404, 404, 503), 404),
((404, 404, 503, 503), 503),
((404, 404, 503, 503), 404),
((404, 503, 503, 503), 503),
((503, 503, 503, 503), 503)
]
@ -340,16 +340,16 @@ class TestAccountController4Replicas(TestAccountController):
((204, 204, 204, 204), 204),
((204, 204, 204, 404), 204),
((204, 204, 204, 503), 204),
((204, 204, 404, 404), 503),
((204, 204, 404, 503), 503),
((204, 204, 503, 503), 503),
((204, 204, 404, 404), 204),
((204, 204, 404, 503), 204),
((204, 204, 503, 503), 204),
((204, 404, 404, 404), 404),
((204, 404, 404, 503), 503),
((204, 404, 404, 503), 404),
((204, 404, 503, 503), 503),
((204, 503, 503, 503), 503),
((404, 404, 404, 404), 404),
((404, 404, 404, 503), 404),
((404, 404, 503, 503), 503),
((404, 404, 503, 503), 404),
((404, 503, 503, 503), 503),
((503, 503, 503, 503), 503)
]
@ -360,16 +360,16 @@ class TestAccountController4Replicas(TestAccountController):
((204, 204, 204, 204), 204),
((204, 204, 204, 404), 204),
((204, 204, 204, 503), 204),
((204, 204, 404, 404), 503),
((204, 204, 404, 503), 503),
((204, 204, 503, 503), 503),
((204, 204, 404, 404), 204),
((204, 204, 404, 503), 204),
((204, 204, 503, 503), 204),
((204, 404, 404, 404), 404),
((204, 404, 404, 503), 503),
((204, 404, 404, 503), 404),
((204, 404, 503, 503), 503),
((204, 503, 503, 503), 503),
((404, 404, 404, 404), 404),
((404, 404, 404, 503), 404),
((404, 404, 503, 503), 503),
((404, 404, 503, 503), 404),
((404, 503, 503, 503), 503),
((503, 503, 503, 503), 503)
]

View File

@ -662,12 +662,15 @@ class TestFuncs(unittest.TestCase):
base = Controller(self.app)
# just throw a bunch of test cases at it
self.assertEqual(base.have_quorum([201, 404], 3), False)
self.assertEqual(base.have_quorum([201, 201], 4), False)
self.assertEqual(base.have_quorum([201, 201, 404, 404], 4), False)
self.assertEqual(base.have_quorum([201, 503, 503, 201], 4), False)
self.assertEqual(base.have_quorum([201, 201], 4), True)
self.assertEqual(base.have_quorum([201], 4), False)
self.assertEqual(base.have_quorum([201, 201, 404, 404], 4), True)
self.assertEqual(base.have_quorum([201, 302, 418, 503], 4), False)
self.assertEqual(base.have_quorum([201, 503, 503, 201], 4), True)
self.assertEqual(base.have_quorum([201, 201], 3), True)
self.assertEqual(base.have_quorum([404, 404], 3), True)
self.assertEqual(base.have_quorum([201, 201], 2), True)
self.assertEqual(base.have_quorum([201, 404], 2), True)
self.assertEqual(base.have_quorum([404, 404], 2), True)
self.assertEqual(base.have_quorum([201, 404, 201, 201], 4), True)

View File

@ -278,16 +278,16 @@ class TestContainerController4Replicas(TestContainerController):
((201, 201, 201, 201), 201),
((201, 201, 201, 404), 201),
((201, 201, 201, 503), 201),
((201, 201, 404, 404), 503),
((201, 201, 404, 503), 503),
((201, 201, 503, 503), 503),
((201, 201, 404, 404), 201),
((201, 201, 404, 503), 201),
((201, 201, 503, 503), 201),
((201, 404, 404, 404), 404),
((201, 404, 404, 503), 503),
((201, 404, 404, 503), 404),
((201, 404, 503, 503), 503),
((201, 503, 503, 503), 503),
((404, 404, 404, 404), 404),
((404, 404, 404, 503), 404),
((404, 404, 503, 503), 503),
((404, 404, 503, 503), 404),
((404, 503, 503, 503), 503),
((503, 503, 503, 503), 503)
]
@ -298,16 +298,16 @@ class TestContainerController4Replicas(TestContainerController):
((204, 204, 204, 204), 204),
((204, 204, 204, 404), 204),
((204, 204, 204, 503), 204),
((204, 204, 404, 404), 503),
((204, 204, 404, 503), 503),
((204, 204, 503, 503), 503),
((204, 204, 404, 404), 204),
((204, 204, 404, 503), 204),
((204, 204, 503, 503), 204),
((204, 404, 404, 404), 404),
((204, 404, 404, 503), 503),
((204, 404, 404, 503), 404),
((204, 404, 503, 503), 503),
((204, 503, 503, 503), 503),
((404, 404, 404, 404), 404),
((404, 404, 404, 503), 404),
((404, 404, 503, 503), 503),
((404, 404, 503, 503), 404),
((404, 503, 503, 503), 503),
((503, 503, 503, 503), 503)
]
@ -318,16 +318,16 @@ class TestContainerController4Replicas(TestContainerController):
((204, 204, 204, 204), 204),
((204, 204, 204, 404), 204),
((204, 204, 204, 503), 204),
((204, 204, 404, 404), 503),
((204, 204, 404, 503), 503),
((204, 204, 503, 503), 503),
((204, 204, 404, 404), 204),
((204, 204, 404, 503), 204),
((204, 204, 503, 503), 204),
((204, 404, 404, 404), 404),
((204, 404, 404, 503), 503),
((204, 404, 404, 503), 404),
((204, 404, 503, 503), 503),
((204, 503, 503, 503), 503),
((404, 404, 404, 404), 404),
((404, 404, 404, 503), 404),
((404, 404, 503, 503), 503),
((404, 404, 503, 503), 404),
((404, 503, 503, 503), 503),
((503, 503, 503, 503), 503)
]