From ab813a982d2d091e727a0d05d0305c7cfc5681c9 Mon Sep 17 00:00:00 2001 From: Liam Young Date: Wed, 24 Nov 2021 10:08:03 +0000 Subject: [PATCH] Use cluster strategy 'ignore' for install Use cluster-partition-handling strategy 'ignore' during charm installation regardless of the charm config setting. Once the leader has checked it is clustered with peers then it sets the cluster-partition-handling strategy to be whatever the user set in charm config. Partial-Bug: 1802315 Change-Id: Ic03bbe55ea8aab8b285977a5c0f9410b5bbf35c8 --- hooks/rabbit_utils.py | 7 ++++ hooks/rabbitmq_context.py | 5 ++- hooks/rabbitmq_server_relations.py | 20 +++++++++++ unit_tests/test_rabbit_utils.py | 17 +++++++-- unit_tests/test_rabbitmq_context.py | 56 ++++++++++++++++++++++++----- 5 files changed, 93 insertions(+), 12 deletions(-) diff --git a/hooks/rabbit_utils.py b/hooks/rabbit_utils.py index 7658975f..0176ccda 100644 --- a/hooks/rabbit_utils.py +++ b/hooks/rabbit_utils.py @@ -103,6 +103,8 @@ from charmhelpers.fetch import ( get_upstream_version, ) +CLUSTER_MODE_KEY = 'cluster-partition-handling' +CLUSTER_MODE_FOR_INSTALL = 'ignore' PACKAGES = ['rabbitmq-server', 'python3-amqplib', 'lockfile-progs', 'python3-croniter'] @@ -1089,6 +1091,11 @@ def assess_cluster_status(*args): return ( 'blocked', 'Unable to determine if the rabbitmq service is up') + if leader_get(CLUSTER_MODE_KEY) != config(CLUSTER_MODE_KEY): + return ( + 'waiting', + 'Not reached target {} mode'.format(CLUSTER_MODE_KEY)) + # we're active - so just return the 'active' state, but if 'active' # is returned, then it is ignored by the assess_status system. return 'active', "message is ignored" diff --git a/hooks/rabbitmq_context.py b/hooks/rabbitmq_context.py index 7eec5f5b..1d0087e2 100644 --- a/hooks/rabbitmq_context.py +++ b/hooks/rabbitmq_context.py @@ -19,6 +19,7 @@ import pwd import re import sys +import rabbit_utils import ssl_utils from charmhelpers.contrib.ssl.service import ServiceCA @@ -28,6 +29,7 @@ from charmhelpers.core.hookenv import ( open_port, close_port, config, + leader_get, log, service_name, relation_ids, @@ -185,7 +187,8 @@ class RabbitMQClusterContext(object): def __call__(self): ctxt = {'cluster_partition_handling': - config('cluster-partition-handling'), + (leader_get(rabbit_utils.CLUSTER_MODE_KEY) or + rabbit_utils.CLUSTER_MODE_FOR_INSTALL), 'mnesia_table_loading_retry_timeout': config('mnesia-table-loading-retry-timeout'), 'mnesia_table_loading_retry_limit': diff --git a/hooks/rabbitmq_server_relations.py b/hooks/rabbitmq_server_relations.py index d477dcfc..5e614018 100755 --- a/hooks/rabbitmq_server_relations.py +++ b/hooks/rabbitmq_server_relations.py @@ -132,6 +132,11 @@ INITIAL_CLIENT_UPDATE_KEY = 'initial_client_update_done' def install(): pre_install_hooks() add_source(config('source'), config('key')) + if is_leader() and not leader_get(rabbit.CLUSTER_MODE_KEY): + log("Setting {} to {} for installation phase.".format( + rabbit.CLUSTER_MODE_KEY, + rabbit.CLUSTER_MODE_FOR_INSTALL)) + leader_set({rabbit.CLUSTER_MODE_KEY: rabbit.CLUSTER_MODE_FOR_INSTALL}) rabbit.install_or_upgrade_packages() @@ -426,6 +431,7 @@ def cluster_joined(relation_id=None): @hooks.hook('cluster-relation-changed') +@rabbit.restart_on_change(rabbit.restart_map()) def cluster_changed(relation_id=None, remote_unit=None): # Future travelers beware ordering is significant rdata = relation_get(rid=relation_id, unit=remote_unit) @@ -463,6 +469,17 @@ def cluster_changed(relation_id=None, remote_unit=None): # Local rabbit maybe clustered now so check and inform clients if # needed. update_clients() + if is_leader(): + if (leader_get(rabbit.CLUSTER_MODE_KEY) != + config(rabbit.CLUSTER_MODE_KEY)): + log("Informing peers via leaderdb to change {} to {}".format( + rabbit.CLUSTER_MODE_KEY, + config(rabbit.CLUSTER_MODE_KEY))) + leader_set({ + rabbit.CLUSTER_MODE_KEY: config( + rabbit.CLUSTER_MODE_KEY)}) + rabbit.ConfigRenderer( + rabbit.CONFIG_FILES).write_all() if not is_leader() and is_relation_made('nrpe-external-master'): update_nrpe_checks() @@ -783,6 +800,7 @@ def leader_elected(): @hooks.hook('leader-settings-changed') +@rabbit.restart_on_change(rabbit.restart_map()) def leader_settings_changed(): if is_unit_paused_set(): @@ -804,6 +822,8 @@ def leader_settings_changed(): for rid in relation_ids('cluster'): relation_set(relation_id=rid, relation_settings={'cookie': cookie}) update_clients() + rabbit.ConfigRenderer( + rabbit.CONFIG_FILES()).write_all() def pre_install_hooks(): diff --git a/unit_tests/test_rabbit_utils.py b/unit_tests/test_rabbit_utils.py index b07f60cd..0166144e 100644 --- a/unit_tests/test_rabbit_utils.py +++ b/unit_tests/test_rabbit_utils.py @@ -1004,6 +1004,7 @@ class UtilsTests(CharmTestCase): '-p', 'test' ) + @mock.patch.object(rabbit_utils, 'leader_get') @mock.patch.object(rabbit_utils, 'is_partitioned') @mock.patch.object(rabbit_utils, 'wait_app') @mock.patch.object(rabbit_utils, 'check_cluster_memberships') @@ -1014,12 +1015,15 @@ class UtilsTests(CharmTestCase): def test_assess_cluster_status( self, rabbitmq_is_installed, is_unit_paused_set, is_sufficient_peers, clustered, check_cluster_memberships, - wait_app, is_partitioned): + wait_app, is_partitioned, leader_get): is_partitioned.return_value = False self.relation_ids.return_value = ["cluster:1"] self.related_units.return_value = ["rabbitmq-server/1"] _min = 3 - self.config.return_value = _min + _config = { + 'min-cluster-size': _min, + 'cluster-partition-handling': 'autoheal'} + self.config.side_effect = lambda key: _config.get(key) # Paused is_unit_paused_set.return_value = True @@ -1074,7 +1078,16 @@ class UtilsTests(CharmTestCase): "RabbitMQ is partitioned") self.assertEqual(_expected, rabbit_utils.assess_cluster_status()) + # CLUSTER_MODE_KEY not at target + is_partitioned.return_value = False + leader_get.return_value = 'ignore' + _expected = ( + "waiting", + "Not reached target cluster-partition-handling mode") + self.assertEqual(_expected, rabbit_utils.assess_cluster_status()) + # All OK + leader_get.return_value = 'autoheal' wait_app.return_value = True is_partitioned.return_value = False _expected = ( diff --git a/unit_tests/test_rabbitmq_context.py b/unit_tests/test_rabbitmq_context.py index 7be96398..f01baf2b 100644 --- a/unit_tests/test_rabbitmq_context.py +++ b/unit_tests/test_rabbitmq_context.py @@ -110,9 +110,11 @@ class TestRabbitMQSSLContext(unittest.TestCase): class TestRabbitMQClusterContext(unittest.TestCase): + @mock.patch.object(rabbitmq_context, 'leader_get') @mock.patch.object(rabbitmq_context, 'cmp_pkgrevno') @mock.patch("rabbitmq_context.config") - def test_context_ssl_off(self, config, mock_cmp_pkgrevno): + def test_context_ssl_off(self, config, mock_cmp_pkgrevno, mock_leader_get): + mock_leader_get.return_value = 'ignore' config_data = {'cluster-partition-handling': 'ignore', 'connection-backlog': 200, 'mnesia-table-loading-retry-timeout': 25000, @@ -131,15 +133,18 @@ class TestRabbitMQClusterContext(unittest.TestCase): }) config.assert_has_calls( - [mock.call("cluster-partition-handling"), - mock.call("mnesia-table-loading-retry-timeout"), + [mock.call("mnesia-table-loading-retry-timeout"), mock.call("mnesia-table-loading-retry-limit"), mock.call("connection-backlog")], mock.call('queue-master-locator')) + mock_leader_get.assert_called_once_with("cluster-partition-handling") + @mock.patch.object(rabbitmq_context, 'leader_get') @mock.patch.object(rabbitmq_context, 'cmp_pkgrevno') @mock.patch("rabbitmq_context.config") - def test_queue_master_locator_min_masters(self, config, mock_cmp_pkgrevno): + def test_queue_master_locator_min_masters(self, config, mock_cmp_pkgrevno, + mock_leader_get): + mock_leader_get.return_value = 'ignore' config_data = {'cluster-partition-handling': 'ignore', 'connection-backlog': 200, 'mnesia-table-loading-retry-timeout': 25000, @@ -157,13 +162,16 @@ class TestRabbitMQClusterContext(unittest.TestCase): 'queue_master_locator': 'min-masters', }) - config.assert_has_calls([mock.call("cluster-partition-handling"), - mock.call("connection-backlog")], + config.assert_has_calls([mock.call("connection-backlog")], mock.call('queue-master-locator')) + mock_leader_get.assert_called_once_with("cluster-partition-handling") + @mock.patch.object(rabbitmq_context, 'leader_get') @mock.patch.object(rabbitmq_context, 'cmp_pkgrevno') @mock.patch("rabbitmq_context.config") - def test_rabbit_server_3pt6(self, config, mock_cmp_pkgrevno): + def test_rabbit_server_3pt6(self, config, mock_cmp_pkgrevno, + mock_leader_get): + mock_leader_get.return_value = 'ignore' config_data = {'cluster-partition-handling': 'ignore', 'connection-backlog': 200, 'mnesia-table-loading-retry-timeout': 25000, @@ -181,11 +189,41 @@ class TestRabbitMQClusterContext(unittest.TestCase): }) config.assert_has_calls( - [mock.call("cluster-partition-handling"), - mock.call("mnesia-table-loading-retry-timeout"), + [mock.call("mnesia-table-loading-retry-timeout"), mock.call("mnesia-table-loading-retry-limit"), mock.call("connection-backlog")]) assert mock.call('queue-master-locator') not in config.mock_calls + mock_leader_get.assert_called_once_with("cluster-partition-handling") + + @mock.patch.object(rabbitmq_context, 'leader_get') + @mock.patch.object(rabbitmq_context, 'cmp_pkgrevno') + @mock.patch("rabbitmq_context.config") + def test_partition_handling(self, config, mock_cmp_pkgrevno, + mock_leader_get): + mock_leader_get.return_value = 'ignore' + config_data = {'cluster-partition-handling': 'autoheal', + 'connection-backlog': 200, + 'mnesia-table-loading-retry-timeout': 25000, + 'mnesia-table-loading-retry-limit': 12, + 'queue-master-locator': 'min-masters'} + config.side_effect = config_data.get + mock_cmp_pkgrevno.return_value = -1 + + self.assertEqual( + rabbitmq_context.RabbitMQClusterContext().__call__(), { + 'cluster_partition_handling': "ignore", + 'connection_backlog': 200, + 'mnesia_table_loading_retry_timeout': 25000, + 'mnesia_table_loading_retry_limit': 12, + }) + mock_leader_get.return_value = None + self.assertEqual( + rabbitmq_context.RabbitMQClusterContext().__call__(), { + 'cluster_partition_handling': "ignore", + 'connection_backlog': 200, + 'mnesia_table_loading_retry_timeout': 25000, + 'mnesia_table_loading_retry_limit': 12, + }) class TestRabbitMQEnvContext(unittest.TestCase):