From 12af56c74693949d89d3e75526f156e34c2ee8f5 Mon Sep 17 00:00:00 2001 From: James Page Date: Fri, 3 Mar 2017 13:52:42 +0000 Subject: [PATCH] Recheck sync status when assessing status of unit After a resume operation, or after a boot following a prolonged period of downtime, it may take some time for the local unit to re-sync with its peers. Update the function that assesses local unit status to recheck sync status up to 10 times (with an increasing delay between recheck) before declaring that the unit is in a blocked state. Change-Id: Idaa960ade4c52c9ebba6c65a55bade6b22e90cdc Closes-Bug: 1626450 --- hooks/percona_utils.py | 29 +++++++++++++++++++++++++---- unit_tests/test_percona_utils.py | 12 +++++++++++- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/hooks/percona_utils.py b/hooks/percona_utils.py index 6b9ee07..07e197b 100644 --- a/hooks/percona_utils.py +++ b/hooks/percona_utils.py @@ -7,6 +7,7 @@ import os import shutil import uuid +from charmhelpers.core.decorators import retry_on_exception from charmhelpers.core.host import ( lsb_release, mkdir, @@ -441,11 +442,29 @@ def cluster_in_sync(): return False +class DesyncedException(Exception): + '''Raised if PXC unit is not in sync with its peers''' + pass + + def charm_check_func(): """Custom function to assess the status of the current unit @returns (status, message) - tuple of strings if an issue """ + + @retry_on_exception(num_retries=10, + base_delay=2, + exc_type=DesyncedException) + def _cluster_in_sync(): + '''Helper func to wait for a while for resync to occur + + @raise DesynedException: raised if local unit is not in sync + with its peers + ''' + if not cluster_in_sync(): + raise DesyncedException() + min_size = config('min-cluster-size') # Ensure that number of peers > cluster size configuration if not is_sufficient_peers(): @@ -456,10 +475,12 @@ def charm_check_func(): # and has the required peers if not is_bootstrapped(): return ('waiting', 'Unit waiting for cluster bootstrap') - elif is_bootstrapped() and cluster_in_sync(): - return ('active', 'Unit is ready and clustered') - else: - return ('blocked', 'Unit is not in sync') + elif is_bootstrapped(): + try: + _cluster_in_sync() + return ('active', 'Unit is ready and clustered') + except DesyncedException: + return ('blocked', 'Unit is not in sync') else: return ('active', 'Unit is ready') diff --git a/unit_tests/test_percona_utils.py b/unit_tests/test_percona_utils.py index 61d10f6..fcd91ed 100644 --- a/unit_tests/test_percona_utils.py +++ b/unit_tests/test_percona_utils.py @@ -287,7 +287,8 @@ class UtilsTestsCTC(CharmTestCase): stat, _ = percona_utils.charm_check_func() assert stat == 'active' - def test_bootstrapped_not_in_sync(self): + @mock.patch('time.sleep', return_value=None) + def test_bootstrapped_not_in_sync(self, mock_time): self.config.return_value = 3 self.is_sufficient_peers.return_value = True self.is_bootstrapped.return_value = True @@ -295,6 +296,15 @@ class UtilsTestsCTC(CharmTestCase): stat, _ = percona_utils.charm_check_func() assert stat == 'blocked' + @mock.patch('time.sleep', return_value=None) + def test_bootstrapped_not_in_sync_to_synced(self, mock_time): + self.config.return_value = 3 + self.is_sufficient_peers.return_value = True + self.is_bootstrapped.return_value = True + self.cluster_in_sync.side_effect = [False, False, True] + stat, _ = percona_utils.charm_check_func() + assert stat == 'active' + @mock.patch.object(percona_utils, 'determine_packages') @mock.patch.object(percona_utils, 'application_version_set') @mock.patch.object(percona_utils, 'get_upstream_version')