Set clustered atrribute in the cluster's leader

When forming the cluster the leader node (defined by
leader_node_hostname) is the one that won't join a cluster, all the
other peers will join this leader, this prevented the leader from
setting the clustered attribute in the cluster relation.

This patch makes all the units to run the cluster_with() function
and cluster_with() skips 'rabbitmqctl join_cluster' for the nodes
already present in the running section of 'rabbitmqctl cluster_status'
and sets the clustered attribute if it hasn't been set already.

Also cluster_with() is called within the upgrade-charm hook to allow
existing deployments set the attribute if needed.

Change-Id: I234046de279e1c28f7ad640c2cd7c641e864525b
Closes-Bug: 1691510
This commit is contained in:
Felipe Reyes 2017-05-24 12:22:37 -04:00 committed by David Ames
parent d25c496c82
commit 6a66fd1612
4 changed files with 82 additions and 12 deletions

View File

@ -45,6 +45,7 @@ from charmhelpers.contrib.network.ip import (
)
from charmhelpers.core.hookenv import (
relation_id,
relation_ids,
related_units,
relations_for_id,
@ -62,6 +63,7 @@ from charmhelpers.core.hookenv import (
network_get_primary_address,
is_leader,
leader_get,
local_unit,
)
from charmhelpers.core.host import (
@ -418,15 +420,24 @@ def join_cluster(node):
def cluster_with():
log('Clustering with new node')
if clustered():
log('Node is already clustered, skipping')
return False
# check the leader and try to cluster with it
node = leader_node()
if node:
if node in running_nodes():
log('Host already clustered with %s.' % node)
cluster_rid = relation_id('cluster', local_unit())
is_clustered = relation_get(attribute='clustered', rid=cluster_rid)
log('am I clustered?: %s' % bool(is_clustered), level=DEBUG)
if not is_clustered:
# NOTE(freyes): this node needs to be marked as clustered, it's
# part of the cluster according to 'rabbitmqctl cluster_status'
# (LP: #1691510)
relation_set(relation_id=cluster_rid,
clustered=get_unit_hostname(),
timestamp=time.time())
return False
# NOTE: The primary problem rabbitmq has clustering is when
# more than one node attempts to cluster at the same time.
@ -1052,8 +1063,8 @@ def cluster_ready():
"""
min_size = config('min-cluster-size')
units = 1
for relation_id in relation_ids('cluster'):
units += len(related_units(relation_id))
for rid in relation_ids('cluster'):
units += len(related_units(rid))
if not min_size:
min_size = units
@ -1063,10 +1074,10 @@ def cluster_ready():
if not clustered():
return False
clustered_units = 1
for relation_id in relation_ids('cluster'):
for remote_unit in related_units(relation_id):
for rid in relation_ids('cluster'):
for remote_unit in related_units(rid):
if not relation_get(attribute='clustered',
rid=relation_id,
rid=rid,
unit=remote_unit):
log("{} is not yet clustered".format(remote_unit),
DEBUG)

View File

@ -305,9 +305,10 @@ def cluster_changed(relation_id=None, remote_unit=None):
'rabbitmq cluster config.', level=INFO)
return
# cluster with node?
# NOTE(freyes): all the nodes need to marked as 'clustered' (LP: #1691510)
rabbit.cluster_with()
if not is_leader():
rabbit.cluster_with()
update_nrpe_checks()
@ -586,6 +587,10 @@ def upgrade_charm():
new = os.path.join('var/lib/rabbitmq', 'nagios.passwd')
shutil.move(old, new)
# NOTE(freyes): cluster_with() will take care of marking the node as
# 'clustered' for existing deployments (LP: #1691510).
rabbit.cluster_with()
MAN_PLUGIN = 'rabbitmq_management'

View File

@ -313,6 +313,28 @@ class RmqBasicDeployment(OpenStackAmuletDeployment):
u.log.info('OK\n')
def test_103_clustered_attribute(self):
"""Verify the 'clustered' attribute was set in the 'cluster' relation
for all the units"""
for unit in self.d.sentry['rabbitmq-server']:
rid, code = unit.run('relation-ids cluster')
print('unit: %s , code: %s , output: %s' % (unit.info['unit_name'],
code, rid))
assert code == 0
clustered, code = unit.run('relation-get -r %s clustered %s'
% (rid, unit.info['unit_name']))
print('unit: %s , code: %s , output: %s' % (unit.info['unit_name'],
code, clustered))
assert code == 0
hostname, code = unit.run('hostname')
print('unit: %s , code: %s , output: %s' % (unit.info['unit_name'],
code, hostname))
assert code == 0
assert hostname == clustered
def test_200_rmq_cinder_amqp_relation(self):
"""Verify the rabbitmq-server:cinder amqp relation data"""
u.log.debug('Checking rmq:cinder amqp relation data...')

View File

@ -253,6 +253,9 @@ class UtilsTests(CharmTestCase):
'rabbit@juju-devel7-machine-11'],
stderr=-2)
@mock.patch('rabbit_utils.relation_get')
@mock.patch('rabbit_utils.relation_id')
@mock.patch('rabbit_utils.peer_retrieve')
@mock.patch('rabbit_utils.subprocess.check_call')
@mock.patch('rabbit_utils.subprocess.check_output')
@mock.patch('rabbit_utils.time')
@ -263,11 +266,14 @@ class UtilsTests(CharmTestCase):
def test_cluster_with_clustered(self, mock_cmp_pkgrevno, mock_clustered,
mock_leader_node, mock_running_nodes,
mock_time, mock_check_output,
mock_check_call):
mock_check_call, mock_peer_retrieve,
mock_relation_id, mock_relation_get):
mock_clustered.return_value = True
mock_peer_retrieve.return_value = 'juju-devel7-machine-11'
mock_leader_node.return_value = 'rabbit@juju-devel7-machine-11'
mock_running_nodes.return_value = ['rabbit@juju-devel5-machine-19',
'rabbit@juju-devel7-machine-11']
mock_relation_id.return_value = 'cluster:1'
rabbit_utils.cluster_with()
self.assertEqual(0, mock_check_output.call_count)
@ -289,6 +295,32 @@ class UtilsTests(CharmTestCase):
rabbit_utils.cluster_with()
self.assertEqual(0, mock_check_output.call_count)
@mock.patch('time.time')
@mock.patch('rabbit_utils.relation_set')
@mock.patch('rabbit_utils.get_unit_hostname')
@mock.patch('rabbit_utils.relation_get')
@mock.patch('rabbit_utils.relation_id')
@mock.patch('rabbit_utils.running_nodes')
@mock.patch('rabbit_utils.peer_retrieve')
def test_cluster_with_single_node(self, mock_peer_retrieve,
mock_running_nodes, mock_relation_id,
mock_relation_get,
mock_get_unit_hostname,
mock_relation_set, mock_time):
mock_peer_retrieve.return_value = 'localhost'
mock_running_nodes.return_value = ['rabbit@localhost']
mock_relation_id.return_value = 'cluster:1'
mock_relation_get.return_value = False
mock_get_unit_hostname.return_value = 'localhost'
mock_time.return_value = 1234.1
self.assertFalse(rabbit_utils.cluster_with())
mock_relation_set.assert_called_with(relation_id='cluster:1',
clustered='localhost',
timestamp=1234.1)
@mock.patch('rabbit_utils.application_version_set')
@mock.patch('rabbit_utils.get_upstream_version')
def test_assess_status(self, mock_get_upstream_version,