diff --git a/actions.yaml b/actions.yaml index 8029a5ee..f71441ef 100644 --- a/actions.yaml +++ b/actions.yaml @@ -21,3 +21,9 @@ check-queues: type: string default: "/" description: Show queues from the specified vhost. Eg; "openstack". +complete-cluster-series-upgrade: + description: | + Perform final operations post series upgrade. Inform all nodes in the + cluster the upgrade is complete cluster wide. + This action should be performed on the current leader. Note the leader may + have changed during the series upgrade process. diff --git a/actions/actions.py b/actions/actions.py index 98e6140e..d2af6352 100755 --- a/actions/actions.py +++ b/actions/actions.py @@ -29,6 +29,8 @@ from charmhelpers.core.hookenv import ( action_fail, action_set, action_get, + is_leader, + leader_set, ) from rabbit_utils import ( @@ -36,6 +38,7 @@ from rabbit_utils import ( CONFIG_FILES, pause_unit_helper, resume_unit_helper, + assess_status, ) @@ -89,10 +92,24 @@ def check_queues(args): action_fail('Failed to run rabbitmqctl list_queues') +def complete_cluster_series_upgrade(args): + """ Complete the series upgrade process + + After all nodes have been upgraded, this action is run to inform the whole + cluster the upgrade is done. Config files will be re-rendered with each + peer in the wsrep_cluster_address config. + """ + if is_leader(): + # Unset cluster_series_upgrading + leader_set(cluster_series_upgrading="") + assess_status(ConfigRenderer(CONFIG_FILES)) + + # A dictionary of all the defined actions to callables (which take # parsed arguments). ACTIONS = {"pause": pause, "resume": resume, "cluster-status": cluster_status, - "check-queues": check_queues} + "check-queues": check_queues, + "complete-cluster-series-upgrade": complete_cluster_series_upgrade} def main(args): diff --git a/actions/complete-cluster-series-upgrade b/actions/complete-cluster-series-upgrade new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/actions/complete-cluster-series-upgrade @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/hooks/post-series-upgrade b/hooks/post-series-upgrade new file mode 120000 index 00000000..eecaaa74 --- /dev/null +++ b/hooks/post-series-upgrade @@ -0,0 +1 @@ +rabbitmq_server_relations.py \ No newline at end of file diff --git a/hooks/pre-series-upgrade b/hooks/pre-series-upgrade new file mode 120000 index 00000000..eecaaa74 --- /dev/null +++ b/hooks/pre-series-upgrade @@ -0,0 +1 @@ +rabbitmq_server_relations.py \ No newline at end of file diff --git a/hooks/rabbit_utils.py b/hooks/rabbit_utils.py index 7c090c19..1edf2233 100644 --- a/hooks/rabbit_utils.py +++ b/hooks/rabbit_utils.py @@ -807,6 +807,9 @@ def clustered(): def assess_cluster_status(*args): ''' Assess the status for the current running unit ''' + if is_unit_paused_set(): + return "maintenance", "Paused" + # NOTE: ensure rabbitmq is actually installed before doing # any checks if rabbitmq_is_installed(): @@ -909,6 +912,18 @@ def assess_status_func(configs): services=services(), ports=None) if state == 'active' and clustered(): message = 'Unit is ready and clustered' + # Remind the administrator cluster_series_upgrading is set. + # If the cluster has completed the series upgrade, run the + # complete-cluster-series-upgrade action to clear this setting. + if leader_get('cluster_series_upgrading'): + message += (", Run complete-cluster-series-upgrade when the " + "cluster has completed its upgrade.") + # Edge case when the first rabbitmq unit is upgraded it will show + # waiting for peers. Force "active" workload state for various + # testing suites like zaza to recognize a successful series upgrade + # of the first unit. + if state == "waiting": + state = "active" status_set(state, message) return _assess_status_func diff --git a/hooks/rabbitmq_server_relations.py b/hooks/rabbitmq_server_relations.py index 9c849c1b..d15a3250 100755 --- a/hooks/rabbitmq_server_relations.py +++ b/hooks/rabbitmq_server_relations.py @@ -54,6 +54,10 @@ from charmhelpers.contrib.hahelpers.cluster import ( ) from charmhelpers.contrib.openstack.utils import ( is_unit_paused_set, + set_unit_upgrading, + is_unit_upgrading_set, + clear_unit_paused, + clear_unit_upgrading, ) import charmhelpers.contrib.storage.linux.ceph as ceph @@ -71,6 +75,8 @@ from charmhelpers.core.hookenv import ( DEBUG, ERROR, INFO, + leader_set, + leader_get, relation_get, relation_clear, relation_set, @@ -735,6 +741,11 @@ MAN_PLUGIN = 'rabbitmq_management' @rabbit.restart_on_change(rabbit.restart_map()) @harden() def config_changed(): + + if is_unit_paused_set(): + log("Do not run config_changed while unit is paused", "WARNING") + return + # Update hosts with this unit's information rabbit.update_hosts_file( {rabbit.get_unit_ip(config_override=rabbit.CLUSTER_OVERRIDE_CONFIG, @@ -820,6 +831,11 @@ def leader_elected(): @hooks.hook('leader-settings-changed') def leader_settings_changed(): + + if is_unit_paused_set(): + log("Do not run config_changed while unit is paused", "WARNING") + return + if not os.path.exists(rabbit.RABBITMQ_CTL): log('Deferring cookie configuration, RabbitMQ not yet installed') return @@ -842,6 +858,29 @@ def pre_install_hooks(): subprocess.check_call(['sh', '-c', f]) +@hooks.hook('pre-series-upgrade') +def series_upgrade_prepare(): + set_unit_upgrading() + if not is_unit_paused_set(): + log("Pausing unit for series upgrade.") + rabbit.pause_unit_helper(rabbit.ConfigRenderer(rabbit.CONFIG_FILES)) + if is_leader(): + if not leader_get('cluster_series_upgrading'): + # Inform the entire cluster a series upgrade is occurring. + # Run the complete-cluster-series-upgrade action on the leader to + # clear this setting when the full cluster has completed its + # upgrade. + leader_set(cluster_series_upgrading=True) + + +@hooks.hook('post-series-upgrade') +def series_upgrade_complete(): + log("Running complete series upgrade hook", "INFO") + clear_unit_paused() + clear_unit_upgrading() + rabbit.resume_unit_helper(rabbit.ConfigRenderer(rabbit.CONFIG_FILES)) + + @hooks.hook('update-status') @harden() def update_status(): @@ -860,7 +899,7 @@ def update_status(): # # Have a look at the docstring of the stop() function for detailed # explanation. - if is_leader(): + if is_leader() and not is_unit_paused_set(): rabbit.check_cluster_memberships() if __name__ == '__main__': diff --git a/unit_tests/test_rabbit_utils.py b/unit_tests/test_rabbit_utils.py index 7b84015f..321eb4a4 100644 --- a/unit_tests/test_rabbit_utils.py +++ b/unit_tests/test_rabbit_utils.py @@ -360,6 +360,7 @@ class UtilsTests(CharmTestCase): assess_cluster_status, status_set, clustered): + self.leader_get.return_value = None services.return_value = 's1' _determine_os_workload_status.return_value = ('active', '') clustered.return_value = True @@ -371,6 +372,50 @@ class UtilsTests(CharmTestCase): status_set.assert_called_once_with('active', 'Unit is ready and clustered') + @mock.patch.object(rabbit_utils, 'clustered') + @mock.patch.object(rabbit_utils, 'status_set') + @mock.patch.object(rabbit_utils, 'assess_cluster_status') + @mock.patch.object(rabbit_utils, 'services') + @mock.patch.object(rabbit_utils, '_determine_os_workload_status') + def test_assess_status_func_cluster_upgrading( + self, _determine_os_workload_status, services, + assess_cluster_status, status_set, clustered): + self.leader_get.return_value = True + services.return_value = 's1' + _determine_os_workload_status.return_value = ('active', '') + clustered.return_value = True + rabbit_utils.assess_status_func('test-config')() + # ports=None whilst port checks are disabled. + _determine_os_workload_status.assert_called_once_with( + 'test-config', {}, charm_func=assess_cluster_status, services='s1', + ports=None) + status_set.assert_called_once_with( + 'active', 'Unit is ready and clustered, Run ' + 'complete-cluster-series-upgrade when the cluster has completed ' + 'its upgrade.') + + @mock.patch.object(rabbit_utils, 'clustered') + @mock.patch.object(rabbit_utils, 'status_set') + @mock.patch.object(rabbit_utils, 'assess_cluster_status') + @mock.patch.object(rabbit_utils, 'services') + @mock.patch.object(rabbit_utils, '_determine_os_workload_status') + def test_assess_status_func_cluster_upgrading_first_unit( + self, _determine_os_workload_status, services, + assess_cluster_status, status_set, clustered): + self.leader_get.return_value = True + services.return_value = 's1' + _determine_os_workload_status.return_value = ('waiting', 'No peers') + clustered.return_value = False + rabbit_utils.assess_status_func('test-config')() + # ports=None whilst port checks are disabled. + _determine_os_workload_status.assert_called_once_with( + 'test-config', {}, charm_func=assess_cluster_status, services='s1', + ports=None) + status_set.assert_called_once_with( + 'active', 'No peers, Run ' + 'complete-cluster-series-upgrade when the cluster has completed ' + 'its upgrade.') + def test_pause_unit_helper(self): with mock.patch.object(rabbit_utils, '_pause_resume_helper') as prh: rabbit_utils.pause_unit_helper('random-config')