From 50b301dab4c2a5f42e5fe60d2053c56adebd230c Mon Sep 17 00:00:00 2001 From: David Ames Date: Mon, 13 Aug 2018 16:13:19 -0700 Subject: [PATCH] Series Upgrade Implement the series-upgrade feature allowing to move between Ubuntu series. Change-Id: Iad105bb3287041d360852498f8a44c87e06bee16 --- actions.yaml | 6 +++ actions/actions.py | 19 +++++++- actions/complete-cluster-series-upgrade | 1 + hooks/charmhelpers/contrib/openstack/utils.py | 48 ++++++++++++++++++- hooks/post-series-upgrade | 1 + hooks/pre-series-upgrade | 1 + hooks/rabbit_utils.py | 15 ++++++ hooks/rabbitmq_server_relations.py | 41 +++++++++++++++- unit_tests/test_rabbit_utils.py | 45 +++++++++++++++++ 9 files changed, 173 insertions(+), 4 deletions(-) create mode 120000 actions/complete-cluster-series-upgrade create mode 120000 hooks/post-series-upgrade create mode 120000 hooks/pre-series-upgrade diff --git a/actions.yaml b/actions.yaml index 8029a5ee..f71441ef 100644 --- a/actions.yaml +++ b/actions.yaml @@ -21,3 +21,9 @@ check-queues: type: string default: "/" description: Show queues from the specified vhost. Eg; "openstack". +complete-cluster-series-upgrade: + description: | + Perform final operations post series upgrade. Inform all nodes in the + cluster the upgrade is complete cluster wide. + This action should be performed on the current leader. Note the leader may + have changed during the series upgrade process. diff --git a/actions/actions.py b/actions/actions.py index 98e6140e..d2af6352 100755 --- a/actions/actions.py +++ b/actions/actions.py @@ -29,6 +29,8 @@ from charmhelpers.core.hookenv import ( action_fail, action_set, action_get, + is_leader, + leader_set, ) from rabbit_utils import ( @@ -36,6 +38,7 @@ from rabbit_utils import ( CONFIG_FILES, pause_unit_helper, resume_unit_helper, + assess_status, ) @@ -89,10 +92,24 @@ def check_queues(args): action_fail('Failed to run rabbitmqctl list_queues') +def complete_cluster_series_upgrade(args): + """ Complete the series upgrade process + + After all nodes have been upgraded, this action is run to inform the whole + cluster the upgrade is done. Config files will be re-rendered with each + peer in the wsrep_cluster_address config. + """ + if is_leader(): + # Unset cluster_series_upgrading + leader_set(cluster_series_upgrading="") + assess_status(ConfigRenderer(CONFIG_FILES)) + + # A dictionary of all the defined actions to callables (which take # parsed arguments). ACTIONS = {"pause": pause, "resume": resume, "cluster-status": cluster_status, - "check-queues": check_queues} + "check-queues": check_queues, + "complete-cluster-series-upgrade": complete_cluster_series_upgrade} def main(args): diff --git a/actions/complete-cluster-series-upgrade b/actions/complete-cluster-series-upgrade new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/actions/complete-cluster-series-upgrade @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/hooks/charmhelpers/contrib/openstack/utils.py b/hooks/charmhelpers/contrib/openstack/utils.py index 0180e555..24f5b808 100644 --- a/hooks/charmhelpers/contrib/openstack/utils.py +++ b/hooks/charmhelpers/contrib/openstack/utils.py @@ -831,12 +831,25 @@ def _ows_check_if_paused(services=None, ports=None): """Check if the unit is supposed to be paused, and if so check that the services/ports (if passed) are actually stopped/not being listened to. - if the unit isn't supposed to be paused, just return None, None + If the unit isn't supposed to be paused, just return None, None + + If the unit is performing a series upgrade, return a message indicating + this. @param services: OPTIONAL services spec or list of service names. @param ports: OPTIONAL list of port numbers. @returns state, message or None, None """ + if is_unit_upgrading_set(): + state, message = check_actually_paused(services=services, + ports=ports) + if state is None: + # we're paused okay, so set maintenance and return + state = "blocked" + message = ("Ready for do-release-upgrade and reboot. " + "Set complete when finished.") + return state, message + if is_unit_paused_set(): state, message = check_actually_paused(services=services, ports=ports) @@ -1339,7 +1352,7 @@ def pause_unit(assess_status_func, services=None, ports=None, message = assess_status_func() if message: messages.append(message) - if messages: + if messages and not is_unit_upgrading_set(): raise Exception("Couldn't pause: {}".format("; ".join(messages))) @@ -1689,3 +1702,34 @@ def install_os_snaps(snaps, refresh=False): snap_install(snap, _ensure_flag(snaps[snap]['channel']), _ensure_flag(snaps[snap]['mode'])) + + +def set_unit_upgrading(): + """Set the unit to a upgrading state in the local kv() store. + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-upgrading', True) + + +def clear_unit_upgrading(): + """Clear the unit from a upgrading state in the local kv() store + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-upgrading', False) + + +def is_unit_upgrading_set(): + """Return the state of the kv().get('unit-upgrading'). + + To help with units that don't have HookData() (testing) + if it excepts, return False + """ + try: + with unitdata.HookData()() as t: + kv = t[0] + # transform something truth-y into a Boolean. + return not(not(kv.get('unit-upgrading'))) + except Exception: + return False diff --git a/hooks/post-series-upgrade b/hooks/post-series-upgrade new file mode 120000 index 00000000..eecaaa74 --- /dev/null +++ b/hooks/post-series-upgrade @@ -0,0 +1 @@ +rabbitmq_server_relations.py \ No newline at end of file diff --git a/hooks/pre-series-upgrade b/hooks/pre-series-upgrade new file mode 120000 index 00000000..eecaaa74 --- /dev/null +++ b/hooks/pre-series-upgrade @@ -0,0 +1 @@ +rabbitmq_server_relations.py \ No newline at end of file diff --git a/hooks/rabbit_utils.py b/hooks/rabbit_utils.py index 7c090c19..1edf2233 100644 --- a/hooks/rabbit_utils.py +++ b/hooks/rabbit_utils.py @@ -807,6 +807,9 @@ def clustered(): def assess_cluster_status(*args): ''' Assess the status for the current running unit ''' + if is_unit_paused_set(): + return "maintenance", "Paused" + # NOTE: ensure rabbitmq is actually installed before doing # any checks if rabbitmq_is_installed(): @@ -909,6 +912,18 @@ def assess_status_func(configs): services=services(), ports=None) if state == 'active' and clustered(): message = 'Unit is ready and clustered' + # Remind the administrator cluster_series_upgrading is set. + # If the cluster has completed the series upgrade, run the + # complete-cluster-series-upgrade action to clear this setting. + if leader_get('cluster_series_upgrading'): + message += (", Run complete-cluster-series-upgrade when the " + "cluster has completed its upgrade.") + # Edge case when the first rabbitmq unit is upgraded it will show + # waiting for peers. Force "active" workload state for various + # testing suites like zaza to recognize a successful series upgrade + # of the first unit. + if state == "waiting": + state = "active" status_set(state, message) return _assess_status_func diff --git a/hooks/rabbitmq_server_relations.py b/hooks/rabbitmq_server_relations.py index b45190f6..db633bcd 100755 --- a/hooks/rabbitmq_server_relations.py +++ b/hooks/rabbitmq_server_relations.py @@ -54,6 +54,10 @@ from charmhelpers.contrib.hahelpers.cluster import ( ) from charmhelpers.contrib.openstack.utils import ( is_unit_paused_set, + set_unit_upgrading, + is_unit_upgrading_set, + clear_unit_paused, + clear_unit_upgrading, ) import charmhelpers.contrib.storage.linux.ceph as ceph @@ -71,6 +75,8 @@ from charmhelpers.core.hookenv import ( DEBUG, ERROR, INFO, + leader_set, + leader_get, relation_get, relation_clear, relation_set, @@ -735,6 +741,11 @@ MAN_PLUGIN = 'rabbitmq_management' @rabbit.restart_on_change(rabbit.restart_map()) @harden() def config_changed(): + + if is_unit_paused_set(): + log("Do not run config_changed while unit is paused", "WARNING") + return + # Update hosts with this unit's information rabbit.update_hosts_file( {rabbit.get_unit_ip(config_override=rabbit.CLUSTER_OVERRIDE_CONFIG, @@ -820,6 +831,11 @@ def leader_elected(): @hooks.hook('leader-settings-changed') def leader_settings_changed(): + + if is_unit_paused_set(): + log("Do not run config_changed while unit is paused", "WARNING") + return + if not os.path.exists(rabbit.RABBITMQ_CTL): log('Deferring cookie configuration, RabbitMQ not yet installed') return @@ -842,6 +858,29 @@ def pre_install_hooks(): subprocess.check_call(['sh', '-c', f]) +@hooks.hook('pre-series-upgrade') +def series_upgrade_prepare(): + set_unit_upgrading() + if not is_unit_paused_set(): + log("Pausing unit for series upgrade.") + rabbit.pause_unit_helper(rabbit.ConfigRenderer(rabbit.CONFIG_FILES)) + if is_leader(): + if not leader_get('cluster_series_upgrading'): + # Inform the entire cluster a series upgrade is occurring. + # Run the complete-cluster-series-upgrade action on the leader to + # clear this setting when the full cluster has completed its + # upgrade. + leader_set(cluster_series_upgrading=True) + + +@hooks.hook('post-series-upgrade') +def series_upgrade_complete(): + log("Running complete series upgrade hook", "INFO") + clear_unit_paused() + clear_unit_upgrading() + rabbit.resume_unit_helper(rabbit.ConfigRenderer(rabbit.CONFIG_FILES)) + + @hooks.hook('update-status') @harden() def update_status(): @@ -860,7 +899,7 @@ def update_status(): # # Have a look at the docstring of the stop() function for detailed # explanation. - if is_leader(): + if is_leader() and not is_unit_paused_set(): rabbit.check_cluster_memberships() if __name__ == '__main__': diff --git a/unit_tests/test_rabbit_utils.py b/unit_tests/test_rabbit_utils.py index 7b84015f..321eb4a4 100644 --- a/unit_tests/test_rabbit_utils.py +++ b/unit_tests/test_rabbit_utils.py @@ -360,6 +360,7 @@ class UtilsTests(CharmTestCase): assess_cluster_status, status_set, clustered): + self.leader_get.return_value = None services.return_value = 's1' _determine_os_workload_status.return_value = ('active', '') clustered.return_value = True @@ -371,6 +372,50 @@ class UtilsTests(CharmTestCase): status_set.assert_called_once_with('active', 'Unit is ready and clustered') + @mock.patch.object(rabbit_utils, 'clustered') + @mock.patch.object(rabbit_utils, 'status_set') + @mock.patch.object(rabbit_utils, 'assess_cluster_status') + @mock.patch.object(rabbit_utils, 'services') + @mock.patch.object(rabbit_utils, '_determine_os_workload_status') + def test_assess_status_func_cluster_upgrading( + self, _determine_os_workload_status, services, + assess_cluster_status, status_set, clustered): + self.leader_get.return_value = True + services.return_value = 's1' + _determine_os_workload_status.return_value = ('active', '') + clustered.return_value = True + rabbit_utils.assess_status_func('test-config')() + # ports=None whilst port checks are disabled. + _determine_os_workload_status.assert_called_once_with( + 'test-config', {}, charm_func=assess_cluster_status, services='s1', + ports=None) + status_set.assert_called_once_with( + 'active', 'Unit is ready and clustered, Run ' + 'complete-cluster-series-upgrade when the cluster has completed ' + 'its upgrade.') + + @mock.patch.object(rabbit_utils, 'clustered') + @mock.patch.object(rabbit_utils, 'status_set') + @mock.patch.object(rabbit_utils, 'assess_cluster_status') + @mock.patch.object(rabbit_utils, 'services') + @mock.patch.object(rabbit_utils, '_determine_os_workload_status') + def test_assess_status_func_cluster_upgrading_first_unit( + self, _determine_os_workload_status, services, + assess_cluster_status, status_set, clustered): + self.leader_get.return_value = True + services.return_value = 's1' + _determine_os_workload_status.return_value = ('waiting', 'No peers') + clustered.return_value = False + rabbit_utils.assess_status_func('test-config')() + # ports=None whilst port checks are disabled. + _determine_os_workload_status.assert_called_once_with( + 'test-config', {}, charm_func=assess_cluster_status, services='s1', + ports=None) + status_set.assert_called_once_with( + 'active', 'No peers, Run ' + 'complete-cluster-series-upgrade when the cluster has completed ' + 'its upgrade.') + def test_pause_unit_helper(self): with mock.patch.object(rabbit_utils, '_pause_resume_helper') as prh: rabbit_utils.pause_unit_helper('random-config')