From 6a66fd16124c60d056fc710c1f47df6855c8ba1e Mon Sep 17 00:00:00 2001 From: Felipe Reyes Date: Wed, 24 May 2017 12:22:37 -0400 Subject: [PATCH] Set clustered atrribute in the cluster's leader When forming the cluster the leader node (defined by leader_node_hostname) is the one that won't join a cluster, all the other peers will join this leader, this prevented the leader from setting the clustered attribute in the cluster relation. This patch makes all the units to run the cluster_with() function and cluster_with() skips 'rabbitmqctl join_cluster' for the nodes already present in the running section of 'rabbitmqctl cluster_status' and sets the clustered attribute if it hasn't been set already. Also cluster_with() is called within the upgrade-charm hook to allow existing deployments set the attribute if needed. Change-Id: I234046de279e1c28f7ad640c2cd7c641e864525b Closes-Bug: 1691510 --- hooks/rabbit_utils.py | 29 +++++++++++++++++-------- hooks/rabbitmq_server_relations.py | 9 ++++++-- tests/basic_deployment.py | 22 +++++++++++++++++++ unit_tests/test_rabbit_utils.py | 34 +++++++++++++++++++++++++++++- 4 files changed, 82 insertions(+), 12 deletions(-) diff --git a/hooks/rabbit_utils.py b/hooks/rabbit_utils.py index b5f310bb..a94985db 100644 --- a/hooks/rabbit_utils.py +++ b/hooks/rabbit_utils.py @@ -45,6 +45,7 @@ from charmhelpers.contrib.network.ip import ( ) from charmhelpers.core.hookenv import ( + relation_id, relation_ids, related_units, relations_for_id, @@ -62,6 +63,7 @@ from charmhelpers.core.hookenv import ( network_get_primary_address, is_leader, leader_get, + local_unit, ) from charmhelpers.core.host import ( @@ -418,15 +420,24 @@ def join_cluster(node): def cluster_with(): log('Clustering with new node') - if clustered(): - log('Node is already clustered, skipping') - return False - # check the leader and try to cluster with it node = leader_node() if node: if node in running_nodes(): log('Host already clustered with %s.' % node) + + cluster_rid = relation_id('cluster', local_unit()) + is_clustered = relation_get(attribute='clustered', rid=cluster_rid) + + log('am I clustered?: %s' % bool(is_clustered), level=DEBUG) + if not is_clustered: + # NOTE(freyes): this node needs to be marked as clustered, it's + # part of the cluster according to 'rabbitmqctl cluster_status' + # (LP: #1691510) + relation_set(relation_id=cluster_rid, + clustered=get_unit_hostname(), + timestamp=time.time()) + return False # NOTE: The primary problem rabbitmq has clustering is when # more than one node attempts to cluster at the same time. @@ -1052,8 +1063,8 @@ def cluster_ready(): """ min_size = config('min-cluster-size') units = 1 - for relation_id in relation_ids('cluster'): - units += len(related_units(relation_id)) + for rid in relation_ids('cluster'): + units += len(related_units(rid)) if not min_size: min_size = units @@ -1063,10 +1074,10 @@ def cluster_ready(): if not clustered(): return False clustered_units = 1 - for relation_id in relation_ids('cluster'): - for remote_unit in related_units(relation_id): + for rid in relation_ids('cluster'): + for remote_unit in related_units(rid): if not relation_get(attribute='clustered', - rid=relation_id, + rid=rid, unit=remote_unit): log("{} is not yet clustered".format(remote_unit), DEBUG) diff --git a/hooks/rabbitmq_server_relations.py b/hooks/rabbitmq_server_relations.py index eed5e162..ce456c74 100755 --- a/hooks/rabbitmq_server_relations.py +++ b/hooks/rabbitmq_server_relations.py @@ -305,9 +305,10 @@ def cluster_changed(relation_id=None, remote_unit=None): 'rabbitmq cluster config.', level=INFO) return - # cluster with node? + # NOTE(freyes): all the nodes need to marked as 'clustered' (LP: #1691510) + rabbit.cluster_with() + if not is_leader(): - rabbit.cluster_with() update_nrpe_checks() @@ -586,6 +587,10 @@ def upgrade_charm(): new = os.path.join('var/lib/rabbitmq', 'nagios.passwd') shutil.move(old, new) + # NOTE(freyes): cluster_with() will take care of marking the node as + # 'clustered' for existing deployments (LP: #1691510). + rabbit.cluster_with() + MAN_PLUGIN = 'rabbitmq_management' diff --git a/tests/basic_deployment.py b/tests/basic_deployment.py index 7c0d5ea3..57e95563 100644 --- a/tests/basic_deployment.py +++ b/tests/basic_deployment.py @@ -313,6 +313,28 @@ class RmqBasicDeployment(OpenStackAmuletDeployment): u.log.info('OK\n') + def test_103_clustered_attribute(self): + """Verify the 'clustered' attribute was set in the 'cluster' relation + for all the units""" + + for unit in self.d.sentry['rabbitmq-server']: + rid, code = unit.run('relation-ids cluster') + print('unit: %s , code: %s , output: %s' % (unit.info['unit_name'], + code, rid)) + assert code == 0 + + clustered, code = unit.run('relation-get -r %s clustered %s' + % (rid, unit.info['unit_name'])) + print('unit: %s , code: %s , output: %s' % (unit.info['unit_name'], + code, clustered)) + assert code == 0 + + hostname, code = unit.run('hostname') + print('unit: %s , code: %s , output: %s' % (unit.info['unit_name'], + code, hostname)) + assert code == 0 + assert hostname == clustered + def test_200_rmq_cinder_amqp_relation(self): """Verify the rabbitmq-server:cinder amqp relation data""" u.log.debug('Checking rmq:cinder amqp relation data...') diff --git a/unit_tests/test_rabbit_utils.py b/unit_tests/test_rabbit_utils.py index e4c3fde8..db4f6b19 100644 --- a/unit_tests/test_rabbit_utils.py +++ b/unit_tests/test_rabbit_utils.py @@ -253,6 +253,9 @@ class UtilsTests(CharmTestCase): 'rabbit@juju-devel7-machine-11'], stderr=-2) + @mock.patch('rabbit_utils.relation_get') + @mock.patch('rabbit_utils.relation_id') + @mock.patch('rabbit_utils.peer_retrieve') @mock.patch('rabbit_utils.subprocess.check_call') @mock.patch('rabbit_utils.subprocess.check_output') @mock.patch('rabbit_utils.time') @@ -263,11 +266,14 @@ class UtilsTests(CharmTestCase): def test_cluster_with_clustered(self, mock_cmp_pkgrevno, mock_clustered, mock_leader_node, mock_running_nodes, mock_time, mock_check_output, - mock_check_call): + mock_check_call, mock_peer_retrieve, + mock_relation_id, mock_relation_get): mock_clustered.return_value = True + mock_peer_retrieve.return_value = 'juju-devel7-machine-11' mock_leader_node.return_value = 'rabbit@juju-devel7-machine-11' mock_running_nodes.return_value = ['rabbit@juju-devel5-machine-19', 'rabbit@juju-devel7-machine-11'] + mock_relation_id.return_value = 'cluster:1' rabbit_utils.cluster_with() self.assertEqual(0, mock_check_output.call_count) @@ -289,6 +295,32 @@ class UtilsTests(CharmTestCase): rabbit_utils.cluster_with() self.assertEqual(0, mock_check_output.call_count) + @mock.patch('time.time') + @mock.patch('rabbit_utils.relation_set') + @mock.patch('rabbit_utils.get_unit_hostname') + @mock.patch('rabbit_utils.relation_get') + @mock.patch('rabbit_utils.relation_id') + @mock.patch('rabbit_utils.running_nodes') + @mock.patch('rabbit_utils.peer_retrieve') + def test_cluster_with_single_node(self, mock_peer_retrieve, + mock_running_nodes, mock_relation_id, + mock_relation_get, + mock_get_unit_hostname, + mock_relation_set, mock_time): + + mock_peer_retrieve.return_value = 'localhost' + mock_running_nodes.return_value = ['rabbit@localhost'] + mock_relation_id.return_value = 'cluster:1' + mock_relation_get.return_value = False + mock_get_unit_hostname.return_value = 'localhost' + mock_time.return_value = 1234.1 + + self.assertFalse(rabbit_utils.cluster_with()) + + mock_relation_set.assert_called_with(relation_id='cluster:1', + clustered='localhost', + timestamp=1234.1) + @mock.patch('rabbit_utils.application_version_set') @mock.patch('rabbit_utils.get_upstream_version') def test_assess_status(self, mock_get_upstream_version,