From 50b301dab4c2a5f42e5fe60d2053c56adebd230c Mon Sep 17 00:00:00 2001
From: David Ames <david.ames@canonical.com>
Date: Mon, 13 Aug 2018 16:13:19 -0700
Subject: [PATCH] Series Upgrade

Implement the series-upgrade feature allowing to move between Ubuntu
series.

Change-Id: Iad105bb3287041d360852498f8a44c87e06bee16
---
 actions.yaml                                  |  6 +++
 actions/actions.py                            | 19 +++++++-
 actions/complete-cluster-series-upgrade       |  1 +
 hooks/charmhelpers/contrib/openstack/utils.py | 48 ++++++++++++++++++-
 hooks/post-series-upgrade                     |  1 +
 hooks/pre-series-upgrade                      |  1 +
 hooks/rabbit_utils.py                         | 15 ++++++
 hooks/rabbitmq_server_relations.py            | 41 +++++++++++++++-
 unit_tests/test_rabbit_utils.py               | 45 +++++++++++++++++
 9 files changed, 173 insertions(+), 4 deletions(-)
 create mode 120000 actions/complete-cluster-series-upgrade
 create mode 120000 hooks/post-series-upgrade
 create mode 120000 hooks/pre-series-upgrade

diff --git a/actions.yaml b/actions.yaml
index 8029a5ee..f71441ef 100644
--- a/actions.yaml
+++ b/actions.yaml
@@ -21,3 +21,9 @@ check-queues:
       type: string
       default: "/"
       description:  Show queues from the specified vhost.  Eg; "openstack".
+complete-cluster-series-upgrade:
+  description: |
+    Perform final operations post series upgrade. Inform all nodes in the
+    cluster the upgrade is complete cluster wide.
+    This action should be performed on the current leader. Note the leader may
+    have changed during the series upgrade process.
diff --git a/actions/actions.py b/actions/actions.py
index 98e6140e..d2af6352 100755
--- a/actions/actions.py
+++ b/actions/actions.py
@@ -29,6 +29,8 @@ from charmhelpers.core.hookenv import (
     action_fail,
     action_set,
     action_get,
+    is_leader,
+    leader_set,
 )
 
 from rabbit_utils import (
@@ -36,6 +38,7 @@ from rabbit_utils import (
     CONFIG_FILES,
     pause_unit_helper,
     resume_unit_helper,
+    assess_status,
 )
 
 
@@ -89,10 +92,24 @@ def check_queues(args):
         action_fail('Failed to run rabbitmqctl list_queues')
 
 
+def complete_cluster_series_upgrade(args):
+    """ Complete the series upgrade process
+
+    After all nodes have been upgraded, this action is run to inform the whole
+    cluster the upgrade is done. Config files will be re-rendered with each
+    peer in the wsrep_cluster_address config.
+    """
+    if is_leader():
+        # Unset cluster_series_upgrading
+        leader_set(cluster_series_upgrading="")
+    assess_status(ConfigRenderer(CONFIG_FILES))
+
+
 # A dictionary of all the defined actions to callables (which take
 # parsed arguments).
 ACTIONS = {"pause": pause, "resume": resume, "cluster-status": cluster_status,
-           "check-queues": check_queues}
+           "check-queues": check_queues,
+           "complete-cluster-series-upgrade": complete_cluster_series_upgrade}
 
 
 def main(args):
diff --git a/actions/complete-cluster-series-upgrade b/actions/complete-cluster-series-upgrade
new file mode 120000
index 00000000..405a394e
--- /dev/null
+++ b/actions/complete-cluster-series-upgrade
@@ -0,0 +1 @@
+actions.py
\ No newline at end of file
diff --git a/hooks/charmhelpers/contrib/openstack/utils.py b/hooks/charmhelpers/contrib/openstack/utils.py
index 0180e555..24f5b808 100644
--- a/hooks/charmhelpers/contrib/openstack/utils.py
+++ b/hooks/charmhelpers/contrib/openstack/utils.py
@@ -831,12 +831,25 @@ def _ows_check_if_paused(services=None, ports=None):
     """Check if the unit is supposed to be paused, and if so check that the
     services/ports (if passed) are actually stopped/not being listened to.
 
-    if the unit isn't supposed to be paused, just return None, None
+    If the unit isn't supposed to be paused, just return None, None
+
+    If the unit is performing a series upgrade, return a message indicating
+    this.
 
     @param services: OPTIONAL services spec or list of service names.
     @param ports: OPTIONAL list of port numbers.
     @returns state, message or None, None
     """
+    if is_unit_upgrading_set():
+        state, message = check_actually_paused(services=services,
+                                               ports=ports)
+        if state is None:
+            # we're paused okay, so set maintenance and return
+            state = "blocked"
+            message = ("Ready for do-release-upgrade and reboot. "
+                       "Set complete when finished.")
+        return state, message
+
     if is_unit_paused_set():
         state, message = check_actually_paused(services=services,
                                                ports=ports)
@@ -1339,7 +1352,7 @@ def pause_unit(assess_status_func, services=None, ports=None,
         message = assess_status_func()
         if message:
             messages.append(message)
-    if messages:
+    if messages and not is_unit_upgrading_set():
         raise Exception("Couldn't pause: {}".format("; ".join(messages)))
 
 
@@ -1689,3 +1702,34 @@ def install_os_snaps(snaps, refresh=False):
             snap_install(snap,
                          _ensure_flag(snaps[snap]['channel']),
                          _ensure_flag(snaps[snap]['mode']))
+
+
+def set_unit_upgrading():
+    """Set the unit to a upgrading state in the local kv() store.
+    """
+    with unitdata.HookData()() as t:
+        kv = t[0]
+        kv.set('unit-upgrading', True)
+
+
+def clear_unit_upgrading():
+    """Clear the unit from a upgrading state in the local kv() store
+    """
+    with unitdata.HookData()() as t:
+        kv = t[0]
+        kv.set('unit-upgrading', False)
+
+
+def is_unit_upgrading_set():
+    """Return the state of the kv().get('unit-upgrading').
+
+    To help with units that don't have HookData() (testing)
+    if it excepts, return False
+    """
+    try:
+        with unitdata.HookData()() as t:
+            kv = t[0]
+            # transform something truth-y into a Boolean.
+            return not(not(kv.get('unit-upgrading')))
+    except Exception:
+        return False
diff --git a/hooks/post-series-upgrade b/hooks/post-series-upgrade
new file mode 120000
index 00000000..eecaaa74
--- /dev/null
+++ b/hooks/post-series-upgrade
@@ -0,0 +1 @@
+rabbitmq_server_relations.py
\ No newline at end of file
diff --git a/hooks/pre-series-upgrade b/hooks/pre-series-upgrade
new file mode 120000
index 00000000..eecaaa74
--- /dev/null
+++ b/hooks/pre-series-upgrade
@@ -0,0 +1 @@
+rabbitmq_server_relations.py
\ No newline at end of file
diff --git a/hooks/rabbit_utils.py b/hooks/rabbit_utils.py
index 7c090c19..1edf2233 100644
--- a/hooks/rabbit_utils.py
+++ b/hooks/rabbit_utils.py
@@ -807,6 +807,9 @@ def clustered():
 
 def assess_cluster_status(*args):
     ''' Assess the status for the current running unit '''
+    if is_unit_paused_set():
+        return "maintenance", "Paused"
+
     # NOTE: ensure rabbitmq is actually installed before doing
     #       any checks
     if rabbitmq_is_installed():
@@ -909,6 +912,18 @@ def assess_status_func(configs):
             services=services(), ports=None)
         if state == 'active' and clustered():
             message = 'Unit is ready and clustered'
+        # Remind the administrator cluster_series_upgrading is set.
+        # If the cluster has completed the series upgrade, run the
+        # complete-cluster-series-upgrade action to clear this setting.
+        if leader_get('cluster_series_upgrading'):
+            message += (", Run complete-cluster-series-upgrade when the "
+                        "cluster has completed its upgrade.")
+            # Edge case when the first rabbitmq unit is upgraded it will show
+            # waiting for peers. Force "active" workload state for various
+            # testing suites like zaza to recognize a successful series upgrade
+            # of the first unit.
+            if state == "waiting":
+                state = "active"
         status_set(state, message)
 
     return _assess_status_func
diff --git a/hooks/rabbitmq_server_relations.py b/hooks/rabbitmq_server_relations.py
index b45190f6..db633bcd 100755
--- a/hooks/rabbitmq_server_relations.py
+++ b/hooks/rabbitmq_server_relations.py
@@ -54,6 +54,10 @@ from charmhelpers.contrib.hahelpers.cluster import (
 )
 from charmhelpers.contrib.openstack.utils import (
     is_unit_paused_set,
+    set_unit_upgrading,
+    is_unit_upgrading_set,
+    clear_unit_paused,
+    clear_unit_upgrading,
 )
 
 import charmhelpers.contrib.storage.linux.ceph as ceph
@@ -71,6 +75,8 @@ from charmhelpers.core.hookenv import (
     DEBUG,
     ERROR,
     INFO,
+    leader_set,
+    leader_get,
     relation_get,
     relation_clear,
     relation_set,
@@ -735,6 +741,11 @@ MAN_PLUGIN = 'rabbitmq_management'
 @rabbit.restart_on_change(rabbit.restart_map())
 @harden()
 def config_changed():
+
+    if is_unit_paused_set():
+        log("Do not run config_changed while unit is paused", "WARNING")
+        return
+
     # Update hosts with this unit's information
     rabbit.update_hosts_file(
         {rabbit.get_unit_ip(config_override=rabbit.CLUSTER_OVERRIDE_CONFIG,
@@ -820,6 +831,11 @@ def leader_elected():
 
 @hooks.hook('leader-settings-changed')
 def leader_settings_changed():
+
+    if is_unit_paused_set():
+        log("Do not run config_changed while unit is paused", "WARNING")
+        return
+
     if not os.path.exists(rabbit.RABBITMQ_CTL):
         log('Deferring cookie configuration, RabbitMQ not yet installed')
         return
@@ -842,6 +858,29 @@ def pre_install_hooks():
             subprocess.check_call(['sh', '-c', f])
 
 
+@hooks.hook('pre-series-upgrade')
+def series_upgrade_prepare():
+    set_unit_upgrading()
+    if not is_unit_paused_set():
+        log("Pausing unit for series upgrade.")
+        rabbit.pause_unit_helper(rabbit.ConfigRenderer(rabbit.CONFIG_FILES))
+    if is_leader():
+        if not leader_get('cluster_series_upgrading'):
+            # Inform the entire cluster a series upgrade is occurring.
+            # Run the complete-cluster-series-upgrade action on the leader to
+            # clear this setting when the full cluster has completed its
+            # upgrade.
+            leader_set(cluster_series_upgrading=True)
+
+
+@hooks.hook('post-series-upgrade')
+def series_upgrade_complete():
+    log("Running complete series upgrade hook", "INFO")
+    clear_unit_paused()
+    clear_unit_upgrading()
+    rabbit.resume_unit_helper(rabbit.ConfigRenderer(rabbit.CONFIG_FILES))
+
+
 @hooks.hook('update-status')
 @harden()
 def update_status():
@@ -860,7 +899,7 @@ def update_status():
     #
     # Have a look at the docstring of the stop() function for detailed
     # explanation.
-    if is_leader():
+    if is_leader() and not is_unit_paused_set():
         rabbit.check_cluster_memberships()
 
 if __name__ == '__main__':
diff --git a/unit_tests/test_rabbit_utils.py b/unit_tests/test_rabbit_utils.py
index 7b84015f..321eb4a4 100644
--- a/unit_tests/test_rabbit_utils.py
+++ b/unit_tests/test_rabbit_utils.py
@@ -360,6 +360,7 @@ class UtilsTests(CharmTestCase):
                                 assess_cluster_status,
                                 status_set,
                                 clustered):
+        self.leader_get.return_value = None
         services.return_value = 's1'
         _determine_os_workload_status.return_value = ('active', '')
         clustered.return_value = True
@@ -371,6 +372,50 @@ class UtilsTests(CharmTestCase):
         status_set.assert_called_once_with('active',
                                            'Unit is ready and clustered')
 
+    @mock.patch.object(rabbit_utils, 'clustered')
+    @mock.patch.object(rabbit_utils, 'status_set')
+    @mock.patch.object(rabbit_utils, 'assess_cluster_status')
+    @mock.patch.object(rabbit_utils, 'services')
+    @mock.patch.object(rabbit_utils, '_determine_os_workload_status')
+    def test_assess_status_func_cluster_upgrading(
+            self, _determine_os_workload_status, services,
+            assess_cluster_status, status_set, clustered):
+        self.leader_get.return_value = True
+        services.return_value = 's1'
+        _determine_os_workload_status.return_value = ('active', '')
+        clustered.return_value = True
+        rabbit_utils.assess_status_func('test-config')()
+        # ports=None whilst port checks are disabled.
+        _determine_os_workload_status.assert_called_once_with(
+            'test-config', {}, charm_func=assess_cluster_status, services='s1',
+            ports=None)
+        status_set.assert_called_once_with(
+            'active', 'Unit is ready and clustered, Run '
+            'complete-cluster-series-upgrade when the cluster has completed '
+            'its upgrade.')
+
+    @mock.patch.object(rabbit_utils, 'clustered')
+    @mock.patch.object(rabbit_utils, 'status_set')
+    @mock.patch.object(rabbit_utils, 'assess_cluster_status')
+    @mock.patch.object(rabbit_utils, 'services')
+    @mock.patch.object(rabbit_utils, '_determine_os_workload_status')
+    def test_assess_status_func_cluster_upgrading_first_unit(
+            self, _determine_os_workload_status, services,
+            assess_cluster_status, status_set, clustered):
+        self.leader_get.return_value = True
+        services.return_value = 's1'
+        _determine_os_workload_status.return_value = ('waiting', 'No peers')
+        clustered.return_value = False
+        rabbit_utils.assess_status_func('test-config')()
+        # ports=None whilst port checks are disabled.
+        _determine_os_workload_status.assert_called_once_with(
+            'test-config', {}, charm_func=assess_cluster_status, services='s1',
+            ports=None)
+        status_set.assert_called_once_with(
+            'active', 'No peers, Run '
+            'complete-cluster-series-upgrade when the cluster has completed '
+            'its upgrade.')
+
     def test_pause_unit_helper(self):
         with mock.patch.object(rabbit_utils, '_pause_resume_helper') as prh:
             rabbit_utils.pause_unit_helper('random-config')