Avoid simultaneous restarts

It is possible for two or more percona-cluster units to simultaneously attempt to restart and join the cluster. When this race condition occurs one unit may error with: "Failed to start mysql (max retries reached)" We already have the control mechanism distributed_wait used in other charms. This change implements this mechanism for percona-cluster. Configuration options allow for fine tuning. The balance is time vs tolerance for collision errors. CI systems may tolerate the occasion false positive for time saved. Where production deployments can sacrifice a bit of time for a guaranteed deploy. Change-Id: I52e7f8e410ecd77a7a142d44b43414e33eff3a6e Closes-Bug: #1745432
2018-01-25 15:20:29 -08:00 · 2018-01-25 15:20:29 -08:00 · bd5474ce2f
parent dc19ecb4a3
commit bd5474ce2f
4 changed files with 86 additions and 0 deletions
--- a/config.yaml
+++ b/config.yaml
@ -285,3 +285,26 @@ options:
    description: |
      A comma-separated list of nagios service groups.
      If left empty, the nagios_context will be used as the servicegroup
+  modulo-nodes:
+    type: int
+    default:
+    description: |
+      This config option is rarely required but is provided for fine tuning, it
+      is safe to leave unset. Modulo nodes is used to help avoid restart
+      collisions as well as distribute load on the cloud at larger scale.
+      During restarts and cluster joins percona needs to execute these
+      operations serially. By setting modulo-nodes to the size of the cluster
+      and known-wait to a reasonable value, the charm will distribute the
+      operations serially. If this value is unset, the charm will check
+      min-cluster-size or else finally default to the size of the cluster
+      based on peer relations. Setting this value to 0 will execute operations
+      with no wait time. Setting this value to less than the cluster size will
+      distribute load but may lead to restart collisions.
+  known-wait:
+    type: int
+    default: 30
+    description: |
+      Known wait along with modulo nodes is used to help avoid restart
+      collisions. Known wait is the amount of time between one node executing
+      an operation and another. On slower hardware this value may need to be
+      larger than the default of 30 seconds.
--- a/hooks/percona_hooks.py
+++ b/hooks/percona_hooks.py
@ -106,6 +106,7 @@ from percona_utils import (
    update_bootstrap_uuid,
    LeaderNoBootstrapUUIDError,
    update_root_password,
+    cluster_wait,
 )

 from charmhelpers.core.unitdata import kv
@ -239,6 +240,8 @@ def render_config_restart_on_changed(clustered, hosts, bootstrap=False):
            # new units will join and apply their own config.
            if not seeded():
                action = service_restart
+                # If we are restarting avoid simultaneous restart collisions
+                cluster_wait()
            else:
                action = service_start

--- a/hooks/percona_utils.py
+++ b/hooks/percona_utils.py
@ -55,6 +55,7 @@ from charmhelpers.contrib.database.mysql import (
 )
 from charmhelpers.contrib.hahelpers.cluster import (
    is_clustered,
+    distributed_wait,
 )
 from charmhelpers.contrib.openstack.utils import (
    make_assess_status_func,
@ -887,3 +888,34 @@ def update_root_password():
        log(('Cannot connect using new password, not updating password in '
             'the relation'), level=WARNING)
        return
+
+
+def cluster_wait():
+    ''' Wait for operations based on modulo distribution
+
+    Use the distributed_wait function to determine how long to wait before
+    running an operation like restart or cluster join. By setting modulo to
+    the exact number of nodes in the cluster we get serial operations.
+
+    Check for explicit configuration parameters for modulo distribution.
+    The config setting modulo-nodes has first priority. If modulo-nodes is not
+    set, check min-cluster-size. Finally, if neither value is set, determine
+    how many peers there are from the cluster relation.
+
+    @side_effect: distributed_wait is called which calls time.sleep()
+    @return: None
+    '''
+    wait = config('known-wait')
+    if config('modulo-nodes') is not None:
+        # modulo-nodes has first priority
+        num_nodes = config('modulo-nodes')
+    elif config('min-cluster-size'):
+        # min-cluster-size is consulted next
+        num_nodes = config('min-cluster-size')
+    else:
+        # If nothing explicit is configured, determine cluster size based on
+        # peer relations
+        num_nodes = 1
+        for rid in relation_ids('cluster'):
+            num_nodes += len(related_units(rid))
+    distributed_wait(modulo=num_nodes, wait=wait)
--- a/unit_tests/test_percona_utils.py
+++ b/unit_tests/test_percona_utils.py
@ -254,6 +254,7 @@ TO_PATCH = [
    'leader_get',
    'is_unit_paused_set',
    'is_clustered',
+    'distributed_wait',
 ]


@ -476,6 +477,33 @@ class UtilsTestsCTC(CharmTestCase):
        mock_cluster_ready.return_value = True
        self.assertTrue(percona_utils.leader_node_is_ready())

+    def test_cluster_wait(self):
+        self.relation_ids.return_value = ['amqp:27']
+        self.related_units.return_value = ['unit/1', 'unit/2', 'unit/3']
+        # Default check peer relation
+        _config = {'known-wait': 30}
+        self.config.side_effect = lambda key: _config.get(key)
+        percona_utils.cluster_wait()
+        self.distributed_wait.assert_called_with(modulo=4, wait=30)
+
+        # Use Min Cluster Size
+        _config = {'min-cluster-size': 5, 'known-wait': 30}
+        self.config.side_effect = lambda key: _config.get(key)
+        percona_utils.cluster_wait()
+        self.distributed_wait.assert_called_with(modulo=5, wait=30)
+
+        # Override with modulo-nodes
+        _config = {'min-cluster-size': 5, 'modulo-nodes': 10, 'known-wait': 60}
+        self.config.side_effect = lambda key: _config.get(key)
+        percona_utils.cluster_wait()
+        self.distributed_wait.assert_called_with(modulo=10, wait=60)
+
+        # Just modulo-nodes
+        _config = {'modulo-nodes': 10, 'known-wait': 60}
+        self.config.side_effect = lambda key: _config.get(key)
+        percona_utils.cluster_wait()
+        self.distributed_wait.assert_called_with(modulo=10, wait=60)
+

 class TestResolveHostnameToIP(CharmTestCase):