diff --git a/hooks/install b/hooks/install index 29ff6894..9be898b7 100755 --- a/hooks/install +++ b/hooks/install @@ -2,7 +2,7 @@ # Wrapper to deal with newer Ubuntu versions that don't have py2 installed # by default. -declare -a DEPS=('apt' 'netaddr' 'netifaces' 'pip' 'yaml' 'dnspython') +declare -a DEPS=('apt' 'netaddr' 'netifaces' 'pip' 'yaml' 'dnspython' 'requests') check_and_install() { pkg="${1}-${2}" diff --git a/hooks/rabbit_utils.py b/hooks/rabbit_utils.py index 96435987..ac3142b8 100644 --- a/hooks/rabbit_utils.py +++ b/hooks/rabbit_utils.py @@ -20,6 +20,7 @@ import glob import tempfile import time import socket + from collections import OrderedDict from rabbitmq_context import ( @@ -220,27 +221,25 @@ def user_exists(user): for line in out.split('\n')[1:]: _user = line.split('\t')[0] if _user == user: - admin = line.split('\t')[1] - return True, (admin == '[administrator]') - return False, False + return True + return False -def create_user(user, password, admin=False): - exists, is_admin = user_exists(user) +def create_user(user, password, tags=[]): + exists = user_exists(user) if not exists: log('Creating new user (%s).' % user) rabbitmqctl('add_user', user, password) - if admin == is_admin: - return + if 'administrator' in tags: + log('Granting admin access to {}'.format(user)) - if admin: - log('Granting user (%s) admin access.' % user) - rabbitmqctl('set_user_tags', user, 'administrator') - else: - log('Revoking user (%s) admin access.' % user) - rabbitmqctl('set_user_tags', user) + log('Adding tags [{}] to user {}'.format( + ', '.join(tags), + user + )) + rabbitmqctl('set_user_tags', user, ' '.join(tags)) def grant_permissions(user, vhost): diff --git a/hooks/rabbitmq_server_relations.py b/hooks/rabbitmq_server_relations.py index f066aab0..0cd29c45 100755 --- a/hooks/rabbitmq_server_relations.py +++ b/hooks/rabbitmq_server_relations.py @@ -29,6 +29,17 @@ except ImportError: subprocess.check_call(['apt-get', 'install', '-y', 'python3-yaml']) import yaml # flake8: noqa +try: + import requests # flake8: noqa +except ImportError: + if sys.version_info.major == 2: + subprocess.check_call(['apt-get', 'install', '-y', + 'python-requests']) + else: + subprocess.check_call(['apt-get', 'install', '-y', + 'python3-requests']) + import requests # flake8: noqa + import rabbit_utils as rabbit import ssl_utils @@ -189,7 +200,10 @@ def configure_amqp(username, vhost, relation_id, admin=False): # update vhost rabbit.create_vhost(vhost) - rabbit.create_user(username, password, admin) + if admin: + rabbit.create_user(username, password, ['administrator']) + else: + rabbit.create_user(username, password) rabbit.grant_permissions(username, vhost) # NOTE(freyes): after rabbitmq-server 3.0 the method to define HA in the @@ -584,10 +598,10 @@ def ceph_changed(): @hooks.hook('nrpe-external-master-relation-changed') def update_nrpe_checks(): if os.path.isdir(NAGIOS_PLUGINS): - rsync(os.path.join(os.getenv('CHARM_DIR'), 'scripts', + rsync(os.path.join(charm_dir(), 'scripts', 'check_rabbitmq.py'), os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq.py')) - rsync(os.path.join(os.getenv('CHARM_DIR'), 'scripts', + rsync(os.path.join(charm_dir(), 'scripts', 'check_rabbitmq_queues.py'), os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_queues.py')) if config('stats_cron_schedule'): @@ -598,6 +612,10 @@ def update_nrpe_checks(): rsync(os.path.join(charm_dir(), 'scripts', 'collect_rabbitmq_stats.sh'), script) write_file(STATS_CRONFILE, cronjob) + if config('management_plugin'): + rsync(os.path.join(charm_dir(), 'scripts', + 'check_rabbitmq_cluster.py'), + os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_cluster.py')) elif os.path.isfile(STATS_CRONFILE): os.remove(STATS_CRONFILE) @@ -612,7 +630,7 @@ def update_nrpe_checks(): password = rabbit.get_rabbit_password(user, local=True) rabbit.create_vhost(vhost) - rabbit.create_user(user, password) + rabbit.create_user(user, password, ['monitoring']) rabbit.grant_permissions(user, vhost) nrpe_compat = nrpe.NRPE(hostname=hostname) @@ -633,6 +651,19 @@ def update_nrpe_checks(): check_cmd='{}/check_rabbitmq_queues.py{} {}'.format( NAGIOS_PLUGINS, cmd, STATS_DATAFILE) ) + if config('management_plugin'): + # add NRPE check + nrpe_compat.add_check( + shortname=rabbit.RABBIT_USER + '_cluster', + description='Check RabbitMQ Cluster', + check_cmd='{}/check_rabbitmq_cluster.py --port {} --user {} --password {}'.format( + NAGIOS_PLUGINS, + rabbit.get_managment_port(), + user, + password + ) + ) + nrpe_compat.write() diff --git a/scripts/check_rabbitmq_cluster.py b/scripts/check_rabbitmq_cluster.py new file mode 100755 index 00000000..251da161 --- /dev/null +++ b/scripts/check_rabbitmq_cluster.py @@ -0,0 +1,107 @@ +#!/usr/bin/python +""" +Checks for RabbitMQ cluster partitions. + +Copyright (C) 2017 Canonical +All Rights Reserved +Author: James Hebden + +This Nagios check will use the HTTP management API +to fetch cluster status, and check it for problems +such as partitions and offline nodes. +""" + +from optparse import OptionParser +import json +import requests +import socket +import sys + +if __name__ == '__main__': + + hostname = socket.gethostname() + + parser = OptionParser() + parser.add_option("--host", dest="host", + help="RabbitMQ host to connect to [default=%default]", + metavar="HOST", default="localhost") + parser.add_option("--port", dest="port", type="int", + help="port RabbitMQ is running on [default=%default]", + metavar="PORT", default=5672) + parser.add_option("-v", "--verbose", default=False, action="store_true", + help="verbose run") + parser.add_option("-u", "--user", dest="user", default="guest", + help="RabbitMQ user [default=%default]", + metavar="USER") + parser.add_option("-p", "--password", dest="password", default="guest", + help="RabbitMQ password [default=%default]", + metavar="PASSWORD") + parser.add_option("-t", "--tls", dest="tls", default=False, + help="Use TLS to talk to RabbitMQ? [default=%default]", + metavar="TLS") + parser.add_option("-H", "--hostname", + dest="hostname", + default=hostname, + help="""Override hostname used when querying + cluster status [default=%default]""") + parser.add_option("-R", "--rabbitname", + dest="rabbitname", + default="rabbit", + help="""Override rabbit user ID used when querying + cluster status [default=%default]""") + (options, args) = parser.parse_args() + + if options.verbose: + print("Checking host: %s@%s:%d") % ( + options.user, + options.host, + options.port + ) + + if (options.tls): + proto = 'https' + else: + proto = 'http' + + query = '{0}://{1}:{2}@{3}:{4}/api/nodes/{5}@{6}'.format( + proto, + options.user, + options.password, + options.host, + options.port, + options.rabbitname, + options.hostname, + ) + + try: + partition_data = requests.get(query).text + + except requests.ConnectionError as error: + print( + "ERROR: could not connect to cluster: {0}".format( + error + ) + ) + sys.exit(3) + + if options.verbose: + print(partition_data) + + try: + partitions = len(json.loads(partition_data)['partitions']) + cluster = len(json.loads(partition_data)['cluster_links']) + + except: + print( + "UNKNOWN: Could not parse cluster status data returned by RabbitMQ" + ) + sys.exit(3) + + if(partitions > 0 or cluster < 0): + print( + "CRITICAL: %d partitions detected, %d nodes online." + ) % (partitions, cluster) + sys.exit(2) + else: + print("OK: No partitions detected") + sys.exit(0)