From 3649d58e1ec597b1ad520631a3a824aeece771ef Mon Sep 17 00:00:00 2001 From: James Page Date: Tue, 6 Oct 2015 11:06:36 +0100 Subject: [PATCH] Add basic status assessment for monitor role --- hooks/ceph.py | 4 +++- hooks/hooks.py | 46 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/hooks/ceph.py b/hooks/ceph.py index d4b98b5..1d77030 100644 --- a/hooks/ceph.py +++ b/hooks/ceph.py @@ -21,7 +21,8 @@ from charmhelpers.core.hookenv import ( log, ERROR, WARNING, - cached + cached, + status_set, ) from charmhelpers.contrib.storage.linux.utils import ( zap_disk, @@ -365,6 +366,7 @@ def osdize_dev(dev, osd_format, osd_journal, reformat_osd=False, log('Looks like {} is in use, skipping.'.format(dev)) return + status_set('maintenance', 'Initializing device {}'.format(dev)) cmd = ['ceph-disk-prepare'] # Later versions of ceph support more options if cmp_pkgrevno('ceph', '0.48.3') >= 0: diff --git a/hooks/hooks.py b/hooks/hooks.py index 2306fcf..c680a4a 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -26,7 +26,9 @@ from charmhelpers.core.hookenv import ( remote_unit, Hooks, UnregisteredHookError, service_name, - relations_of_type + relations_of_type, + status_set, + local_unit, ) from charmhelpers.core.host import ( service_restart, @@ -152,6 +154,7 @@ def config_changed(): # Support use of single node ceph if (not ceph.is_bootstrapped() and int(config('monitor-count')) == 1): + status_set('maintenance', 'Bootstrapping single Ceph MON') ceph.bootstrap_monitor_cluster(config('monitor-secret')) ceph.wait_for_bootstrap() @@ -181,6 +184,20 @@ def get_mon_hosts(): return hosts +def get_peer_units(): + ''' + Returns a dictionary of unit names from the mon peer relation with + a flag indicating whether the unit has presented its address + ''' + units = {} + units[local_unit()] = True + for relid in relation_ids('mon'): + for unit in related_units(relid): + addr = relation_get('ceph-public-address', unit, relid) + units[unit] = addr is not None + return units + + def reformat_osd(): if config('osd-reformat'): return True @@ -210,6 +227,7 @@ def mon_relation(): moncount = int(config('monitor-count')) if len(get_mon_hosts()) >= moncount: + status_set('maintenance', 'Bootstrapping MON cluster') ceph.bootstrap_monitor_cluster(config('monitor-secret')) ceph.wait_for_bootstrap() for dev in get_devices(): @@ -384,8 +402,34 @@ def update_nrpe_config(): nrpe_setup.write() +def assess_status(): + '''Assess status of current unit''' + moncount = int(config('monitor-count')) + units = get_peer_units() + # not enough peers and mon_count > 1 + if len(units.keys()) < moncount: + status_set('blocked', 'Insufficient peer units to bootstrap' + ' cluster (require {})'.format(moncount)) + return + + # mon_count > 1, peers, but no ceph-public-address + ready = sum(1 for unit_ready in units.itervalues() if unit_ready) + if ready < moncount: + status_set('waiting', 'Peer units detected, waiting for addresses') + return + + # active - bootstrapped + quorum status check + if ceph.is_bootstrapped() and ceph.is_quorum(): + status_set('active', 'Unit active and clustered') + else: + # Unit should be running and clustered, but no quorum + # TODO: should this be blocked or waiting? + status_set('blocked', 'Unit not clustered (no quorum)') + + if __name__ == '__main__': try: hooks.execute(sys.argv) except UnregisteredHookError as e: log('Unknown hook {} - skipping.'.format(e)) + assess_status()