From 0c2f3e6b1b598df9a4dd5068b34b20bd38ef37fd Mon Sep 17 00:00:00 2001 From: David Ames Date: Thu, 20 Oct 2016 15:59:38 -0700 Subject: [PATCH] Fix DNS resolution problems There are two distinct DNS resolution problems. One, the nodename reverse DNS resolution. And two, resolution of peers. This attempts to fix both problems. Nodename is only required when rabbitmq runs multiple instances on the same host. Having this set complicates the DNS requirements. Removing this setting may simplify the DNS problems this charm has faced. For peer resolution force the use of hostname rather than dynamically attempting to resolve one from an IP. Set /etc/hosts with the local hostname and with each peer. Standardize the way an IPv4 or IPv6 address is selected throughout the charm. Standardize the way a hostname is selected throughout the charm. Partial-Bug: 1584902 Change-Id: I105eb2684e61a553a52c5a944e8c562945e2a6eb --- hooks/rabbit_utils.py | 69 +++++++++++++++-- hooks/rabbitmq_server_relations.py | 81 +++----------------- tests/basic_deployment.py | 18 ----- unit_tests/test_rabbit_utils.py | 6 +- unit_tests/test_rabbitmq_server_relations.py | 41 ++++++---- 5 files changed, 100 insertions(+), 115 deletions(-) diff --git a/hooks/rabbit_utils.py b/hooks/rabbit_utils.py index 4d18a576..4659b439 100644 --- a/hooks/rabbit_utils.py +++ b/hooks/rabbit_utils.py @@ -21,6 +21,7 @@ import tempfile import random import time import socket +from collections import OrderedDict from rabbitmq_context import ( RabbitMQSSLContext, @@ -38,6 +39,11 @@ from charmhelpers.contrib.openstack.utils import ( is_unit_paused_set, ) +from charmhelpers.contrib.network.ip import ( + get_ipv6_addr, + get_address_in_network, +) + from charmhelpers.core.hookenv import ( relation_ids, related_units, @@ -49,6 +55,8 @@ from charmhelpers.core.hookenv import ( unit_get, relation_set, application_version_set, + config, + network_get_primary_address, ) from charmhelpers.core.host import ( @@ -66,9 +74,9 @@ from charmhelpers.contrib.peerstorage import ( peer_retrieve ) + from charmhelpers.fetch import get_upstream_version -from collections import OrderedDict PACKAGES = ['rabbitmq-server', 'python-amqplib', 'lockfile-progs'] @@ -337,7 +345,7 @@ def wait_app(): pid_file = run_dir + 'pid' else: pid_file = '/var/lib/rabbitmq/mnesia/rabbit@' \ - + get_local_nodename() + '.pid' + + socket.gethostname() + '.pid' log('Waiting for rabbitmq app to start: {}'.format(pid_file), DEBUG) try: rabbitmqctl('wait', pid_file) @@ -402,7 +410,7 @@ def cluster_with(): join_cluster(node) # NOTE: toggle the cluster relation to ensure that any peers # already clustered re-assess status correctly - relation_set(clustered=get_local_nodename()) + relation_set(clustered=get_unit_hostname()) return True except subprocess.CalledProcessError as e: status_set('blocked', 'Failed to cluster with %s. Exception: %s' @@ -674,12 +682,17 @@ def running_nodes(): @cached def leader_node(): - ''' Provide the leader node for clustering ''' + ''' Provide the leader node for clustering + + @returns leader node's hostname or None + ''' # Each rabbitmq node should join_cluster with the leader # to avoid split-brain clusters. - leader_node_ip = peer_retrieve('leader_node_ip') - if leader_node_ip: - return "rabbit@" + get_node_hostname(leader_node_ip) + leader_node_hostname = peer_retrieve('leader_node_hostname') + if leader_node_hostname: + return "rabbit@" + leader_node_hostname + else: + return None def get_node_hostname(ip_addr): @@ -857,3 +870,45 @@ def _pause_resume_helper(f, configs): f(assess_status_func(configs), services=services(), ports=None) + + +def get_unit_ip(amqp_relation=False): + """Return this unit's IP. + Future proof to allow for network spaces or other more complex addresss + selection. + + @raises Exception if prefer-ipv6 is configured but IPv6 unsupported. + @returns IPv6 or IPv4 address + """ + + fallback = get_host_ip(unit_get('private-address')) + if config('prefer-ipv6'): + assert_charm_supports_ipv6() + return get_ipv6_addr()[0] + elif amqp_relation: + if config('access-network'): + # NOTE(jamespage) + # override private-address settings if access-network is + # configured and an appropriate network interface is + # configured. + return get_address_in_network(config('access-network'), + fallback) + else: + # NOTE(jamespage) + # Try using network spaces if access-network is not + # configured, fallback to private address if not + # supported + try: + return network_get_primary_address('amqp') + except NotImplementedError: + return fallback + else: + return fallback + + +def get_unit_hostname(): + """Return this unit's hostname. + + @returns hostname + """ + return socket.gethostname() diff --git a/hooks/rabbitmq_server_relations.py b/hooks/rabbitmq_server_relations.py index a4fcbb37..da112c85 100755 --- a/hooks/rabbitmq_server_relations.py +++ b/hooks/rabbitmq_server_relations.py @@ -19,7 +19,6 @@ import shutil import sys import subprocess import glob -import socket try: import yaml # flake8: noqa @@ -42,14 +41,9 @@ from charmhelpers.contrib.hahelpers.cluster import ( is_elected_leader ) from charmhelpers.contrib.openstack.utils import ( - get_host_ip, is_unit_paused_set, ) -from charmhelpers.contrib.network.ip import ( - get_ipv6_addr -) - import charmhelpers.contrib.storage.linux.ceph as ceph from charmhelpers.contrib.openstack.utils import save_script_rc from charmhelpers.contrib.hardening.harden import harden @@ -74,7 +68,6 @@ from charmhelpers.core.hookenv import ( service_name, local_unit, config, - unit_get, is_relation_made, Hooks, UnregisteredHookError, @@ -82,7 +75,6 @@ from charmhelpers.core.hookenv import ( charm_dir, status_set, unit_private_ip, - network_get_primary_address, ) from charmhelpers.core.host import ( cmp_pkgrevno, @@ -102,7 +94,6 @@ from charmhelpers.contrib.peerstorage import ( leader_get, ) -from charmhelpers.contrib.network.ip import get_address_in_network hooks = Hooks() @@ -116,7 +107,7 @@ SCRIPTS_DIR = '/usr/local/bin' STATS_CRONFILE = '/etc/cron.d/rabbitmq-stats' STATS_DATAFILE = os.path.join(RABBIT_DIR, 'data', '{}_queue_stats.dat' - ''.format(socket.gethostname())) + ''.format(rabbit.get_unit_hostname())) @hooks.hook('install.real') @@ -126,25 +117,6 @@ def install(): # NOTE(jamespage) install actually happens in config_changed hook -def configure_nodename(): - '''Set RABBITMQ_NODENAME to something that's resolvable by my peers''' - nodename = rabbit.get_local_nodename() - log('configuring nodename', level=INFO) - if (nodename and - rabbit.get_node_name() != 'rabbit@%s' % nodename): - log('forcing nodename=%s' % nodename, level=INFO) - # would like to have used the restart_on_change decorator, but - # need to stop it under current nodename prior to updating env - log('Stopping rabbitmq-server.') - service_stop('rabbitmq-server') - rabbit.update_rmq_env_conf(hostname='rabbit@%s' % nodename, - ipv6=config('prefer-ipv6')) - if not is_unit_paused_set(): - log('Starting rabbitmq-server.') - service_restart('rabbitmq-server') - rabbit.wait_app() - - def configure_amqp(username, vhost, admin=False): # get and update service password password = rabbit.get_rabbit_password(username) @@ -165,10 +137,7 @@ def configure_amqp(username, vhost, admin=False): @hooks.hook('amqp-relation-changed') def amqp_changed(relation_id=None, remote_unit=None): - if config('prefer-ipv6'): - host_addr = get_ipv6_addr()[0] - else: - host_addr = unit_get('private-address') + host_addr = rabbit.get_unit_ip() if not is_elected_leader('res_rabbitmq_vip'): # NOTE(jamespage) clear relation to deal with data being @@ -225,30 +194,9 @@ def amqp_changed(relation_id=None, remote_unit=None): queues[amqp]['username'], queues[amqp]['vhost']) - if config('prefer-ipv6'): - relation_settings['private-address'] = host_addr - else: - hostname = None - fallback = unit_get('private-address') - if config('access-network'): - # NOTE(jamespage) - # override private-address settings if access-network is - # configured and an appropriate network interface is - # configured. - hostname = get_address_in_network(config('access-network'), - fallback) - else: - # NOTE(jamespage) - # Try using network spaces if access-network is not - # configured, fallback to private address if not - # supported - try: - hostname = network_get_primary_address('amqp') - except NotImplementedError: - hostname = fallback - - relation_settings['hostname'] = \ - relation_settings['private-address'] = hostname + relation_settings['hostname'] = \ + relation_settings['private-address'] = \ + rabbit.get_unit_ip(amqp_relation=True) ssl_utils.configure_client_ssl(relation_settings) @@ -311,15 +259,10 @@ def is_sufficient_peers(): @hooks.hook('cluster-relation-joined') def cluster_joined(relation_id=None): relation_settings = { - 'hostname': rabbit.get_local_nodename(), + 'hostname': rabbit.get_unit_hostname(), + 'private-address': rabbit.get_unit_ip(), } - if config('prefer-ipv6'): - relation_settings['private-address'] = get_ipv6_addr()[0] - else: - relation_settings['private-address'] = get_host_ip( - unit_get('private-address')) - relation_set(relation_id=relation_id, relation_settings=relation_settings) @@ -329,8 +272,6 @@ def cluster_joined(relation_id=None): 'rabbitmq cluster config.') return - configure_nodename() - try: if not is_leader(): log('Not the leader, deferring cookie propagation to leader') @@ -353,6 +294,7 @@ def cluster_joined(relation_id=None): cookie = open(rabbit.COOKIE_PATH, 'r').read().strip() peer_store('cookie', cookie) peer_store('leader_node_ip', unit_private_ip()) + peer_store('leader_node_hostname', rabbit.get_unit_hostname()) @hooks.hook('cluster-relation-changed') @@ -670,8 +612,9 @@ MAN_PLUGIN = 'rabbitmq_management' @harden() def config_changed(): - if config('prefer-ipv6'): - rabbit.assert_charm_supports_ipv6() + # Update hosts with this unit's information + rabbit.update_hosts_file({rabbit.get_unit_ip(): + rabbit.get_unit_hostname()}) # Add archive source if provided add_source(config('source'), config('key')) @@ -690,8 +633,6 @@ def config_changed(): chown(RABBIT_DIR, rabbit.RABBIT_USER, rabbit.RABBIT_USER) chmod(RABBIT_DIR, 0o775) - configure_nodename() - if config('management_plugin') is True: rabbit.enable_plugin(MAN_PLUGIN) open_port(55672) diff --git a/tests/basic_deployment.py b/tests/basic_deployment.py index 9a8a5c5c..aa13cd73 100644 --- a/tests/basic_deployment.py +++ b/tests/basic_deployment.py @@ -380,24 +380,6 @@ class RmqBasicDeployment(OpenStackAmuletDeployment): u.log.info('OK\n') - def test_300_rmq_config(self): - """Verify the data in the rabbitmq conf file.""" - conf = '/etc/rabbitmq/rabbitmq-env.conf' - sentry_units = self._get_rmq_sentry_units() - for unit in sentry_units: - host_name = unit.file_contents('/etc/hostname').strip() - u.log.debug('Checking rabbitmq config file data on ' - '{} ({})...'.format(unit.info['unit_name'], - host_name)) - expected = { - 'RABBITMQ_NODENAME': 'rabbit@{}'.format(host_name) - } - - file_contents = unit.file_contents(conf) - u.validate_sectionless_conf(file_contents, expected) - - u.log.info('OK\n') - def test_400_rmq_cluster_running_nodes(self): """Verify that cluster status from each rmq juju unit shows every cluster node as a running member in that cluster.""" diff --git a/unit_tests/test_rabbit_utils.py b/unit_tests/test_rabbit_utils.py index 1651efdf..25177f2c 100644 --- a/unit_tests/test_rabbit_utils.py +++ b/unit_tests/test_rabbit_utils.py @@ -180,11 +180,9 @@ class UtilsTests(unittest.TestCase): 'juju-devel3-machine-13') mock_get_hostname.assert_called_with('192.168.20.50', fqdn=False) - @mock.patch('rabbit_utils.get_node_hostname') @mock.patch('rabbit_utils.peer_retrieve') - def test_leader_node(self, mock_peer_retrieve, mock_get_node_hostname): - mock_peer_retrieve.return_value = '192.168.20.50' - mock_get_node_hostname.return_value = 'juju-devel3-machine-15' + def test_leader_node(self, mock_peer_retrieve): + mock_peer_retrieve.return_value = 'juju-devel3-machine-15' self.assertEqual(rabbit_utils.leader_node(), 'rabbit@juju-devel3-machine-15') diff --git a/unit_tests/test_rabbitmq_server_relations.py b/unit_tests/test_rabbitmq_server_relations.py index 04f10ad8..e09503c3 100644 --- a/unit_tests/test_rabbitmq_server_relations.py +++ b/unit_tests/test_rabbitmq_server_relations.py @@ -38,20 +38,25 @@ class RelationUtil(TestCase): super(RelationUtil, self).setUp() @patch('rabbitmq_server_relations.peer_store_and_set') - @patch('rabbitmq_server_relations.get_ipv6_addr') @patch('rabbitmq_server_relations.config') @patch('rabbitmq_server_relations.relation_set') @patch('rabbitmq_server_relations.cmp_pkgrevno') @patch('rabbitmq_server_relations.is_clustered') @patch('rabbitmq_server_relations.ssl_utils.configure_client_ssl') - @patch('rabbitmq_server_relations.unit_get') + @patch('rabbitmq_server_relations.rabbit.get_unit_ip') @patch('rabbitmq_server_relations.relation_get') @patch('rabbitmq_server_relations.is_elected_leader') def test_amqp_changed_compare_versions_ha_queues( self, - is_elected_leader, relation_get, unit_get, configure_client_ssl, - is_clustered, cmp_pkgrevno, relation_set, mock_config, - mock_get_ipv6_addr, mock_peer_store_and_set): + is_elected_leader, + relation_get, + get_unit_ip, + configure_client_ssl, + is_clustered, + cmp_pkgrevno, + relation_set, + mock_config, + mock_peer_store_and_set): """ Compare version above and below 3.0.1. Make sure ha_queues is set correctly on each side. @@ -65,8 +70,7 @@ class RelationUtil(TestCase): mock_config.side_effect = config host_addr = "10.1.2.3" - unit_get.return_value = host_addr - mock_get_ipv6_addr.return_value = [host_addr] + get_unit_ip.return_value = host_addr is_elected_leader.return_value = True relation_get.return_value = {} is_clustered.return_value = False @@ -87,20 +91,25 @@ class RelationUtil(TestCase): relation_id=None) @patch('rabbitmq_server_relations.peer_store_and_set') - @patch('rabbitmq_server_relations.get_ipv6_addr') @patch('rabbitmq_server_relations.config') @patch('rabbitmq_server_relations.relation_set') @patch('rabbitmq_server_relations.cmp_pkgrevno') @patch('rabbitmq_server_relations.is_clustered') @patch('rabbitmq_server_relations.ssl_utils.configure_client_ssl') - @patch('rabbitmq_server_relations.unit_get') + @patch('rabbitmq_server_relations.rabbit.get_unit_ip') @patch('rabbitmq_server_relations.relation_get') @patch('rabbitmq_server_relations.is_elected_leader') def test_amqp_changed_compare_versions_ha_queues_prefer_ipv6( self, - is_elected_leader, relation_get, unit_get, configure_client_ssl, - is_clustered, cmp_pkgrevno, relation_set, mock_config, - mock_get_ipv6_addr, mock_peer_store_and_set): + is_elected_leader, + relation_get, + get_unit_ip, + configure_client_ssl, + is_clustered, + cmp_pkgrevno, + relation_set, + mock_config, + mock_peer_store_and_set): """ Compare version above and below 3.0.1. Make sure ha_queues is set correctly on each side. @@ -114,9 +123,7 @@ class RelationUtil(TestCase): mock_config.side_effect = config ipv6_addr = "2001:db8:1:0:f816:3eff:fed6:c140" - mock_get_ipv6_addr.return_value = [ipv6_addr] - host_addr = "10.1.2.3" - unit_get.return_value = host_addr + get_unit_ip.return_value = ipv6_addr is_elected_leader.return_value = True relation_get.return_value = {} is_clustered.return_value = False @@ -125,13 +132,15 @@ class RelationUtil(TestCase): rabbitmq_server_relations.amqp_changed(None, None) mock_peer_store_and_set.assert_called_with( relation_settings={'private-address': ipv6_addr, + 'hostname': ipv6_addr, 'ha_queues': True}, relation_id=None) cmp_pkgrevno.return_value = 1 rabbitmq_server_relations.amqp_changed(None, None) mock_peer_store_and_set.assert_called_with( - relation_settings={'private-address': ipv6_addr}, + relation_settings={'private-address': ipv6_addr, + 'hostname': ipv6_addr}, relation_id=None) @patch.object(rabbitmq_server_relations, 'is_leader')