Recheck sync status when assessing status of unit

After a resume operation, or after a boot following a prolonged
period of downtime, it may take some time for the local unit
to re-sync with its peers.

Update the function that assesses local unit status to recheck
sync status up to 10 times (with an increasing delay between
recheck) before declaring that the unit is in a blocked state.

Change-Id: Idaa960ade4c52c9ebba6c65a55bade6b22e90cdc
Closes-Bug: 1626450
This commit is contained in:
James Page 2017-03-03 13:52:42 +00:00
parent 7340fdab14
commit 12af56c746
2 changed files with 36 additions and 5 deletions

View File

@ -7,6 +7,7 @@ import os
import shutil
import uuid
from charmhelpers.core.decorators import retry_on_exception
from charmhelpers.core.host import (
lsb_release,
mkdir,
@ -441,11 +442,29 @@ def cluster_in_sync():
return False
class DesyncedException(Exception):
'''Raised if PXC unit is not in sync with its peers'''
pass
def charm_check_func():
"""Custom function to assess the status of the current unit
@returns (status, message) - tuple of strings if an issue
"""
@retry_on_exception(num_retries=10,
base_delay=2,
exc_type=DesyncedException)
def _cluster_in_sync():
'''Helper func to wait for a while for resync to occur
@raise DesynedException: raised if local unit is not in sync
with its peers
'''
if not cluster_in_sync():
raise DesyncedException()
min_size = config('min-cluster-size')
# Ensure that number of peers > cluster size configuration
if not is_sufficient_peers():
@ -456,10 +475,12 @@ def charm_check_func():
# and has the required peers
if not is_bootstrapped():
return ('waiting', 'Unit waiting for cluster bootstrap')
elif is_bootstrapped() and cluster_in_sync():
return ('active', 'Unit is ready and clustered')
else:
return ('blocked', 'Unit is not in sync')
elif is_bootstrapped():
try:
_cluster_in_sync()
return ('active', 'Unit is ready and clustered')
except DesyncedException:
return ('blocked', 'Unit is not in sync')
else:
return ('active', 'Unit is ready')

View File

@ -287,7 +287,8 @@ class UtilsTestsCTC(CharmTestCase):
stat, _ = percona_utils.charm_check_func()
assert stat == 'active'
def test_bootstrapped_not_in_sync(self):
@mock.patch('time.sleep', return_value=None)
def test_bootstrapped_not_in_sync(self, mock_time):
self.config.return_value = 3
self.is_sufficient_peers.return_value = True
self.is_bootstrapped.return_value = True
@ -295,6 +296,15 @@ class UtilsTestsCTC(CharmTestCase):
stat, _ = percona_utils.charm_check_func()
assert stat == 'blocked'
@mock.patch('time.sleep', return_value=None)
def test_bootstrapped_not_in_sync_to_synced(self, mock_time):
self.config.return_value = 3
self.is_sufficient_peers.return_value = True
self.is_bootstrapped.return_value = True
self.cluster_in_sync.side_effect = [False, False, True]
stat, _ = percona_utils.charm_check_func()
assert stat == 'active'
@mock.patch.object(percona_utils, 'determine_packages')
@mock.patch.object(percona_utils, 'application_version_set')
@mock.patch.object(percona_utils, 'get_upstream_version')