Merge "Add Nagios check for cluster partitions"
This commit is contained in:
commit
4b74f45412
|
@ -2,7 +2,7 @@
|
|||
# Wrapper to deal with newer Ubuntu versions that don't have py2 installed
|
||||
# by default.
|
||||
|
||||
declare -a DEPS=('apt' 'netaddr' 'netifaces' 'pip' 'yaml' 'dnspython')
|
||||
declare -a DEPS=('apt' 'netaddr' 'netifaces' 'pip' 'yaml' 'dnspython' 'requests')
|
||||
|
||||
check_and_install() {
|
||||
pkg="${1}-${2}"
|
||||
|
|
|
@ -20,6 +20,7 @@ import glob
|
|||
import tempfile
|
||||
import time
|
||||
import socket
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from rabbitmq_context import (
|
||||
|
@ -220,27 +221,25 @@ def user_exists(user):
|
|||
for line in out.split('\n')[1:]:
|
||||
_user = line.split('\t')[0]
|
||||
if _user == user:
|
||||
admin = line.split('\t')[1]
|
||||
return True, (admin == '[administrator]')
|
||||
return False, False
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def create_user(user, password, admin=False):
|
||||
exists, is_admin = user_exists(user)
|
||||
def create_user(user, password, tags=[]):
|
||||
exists = user_exists(user)
|
||||
|
||||
if not exists:
|
||||
log('Creating new user (%s).' % user)
|
||||
rabbitmqctl('add_user', user, password)
|
||||
|
||||
if admin == is_admin:
|
||||
return
|
||||
if 'administrator' in tags:
|
||||
log('Granting admin access to {}'.format(user))
|
||||
|
||||
if admin:
|
||||
log('Granting user (%s) admin access.' % user)
|
||||
rabbitmqctl('set_user_tags', user, 'administrator')
|
||||
else:
|
||||
log('Revoking user (%s) admin access.' % user)
|
||||
rabbitmqctl('set_user_tags', user)
|
||||
log('Adding tags [{}] to user {}'.format(
|
||||
', '.join(tags),
|
||||
user
|
||||
))
|
||||
rabbitmqctl('set_user_tags', user, ' '.join(tags))
|
||||
|
||||
|
||||
def grant_permissions(user, vhost):
|
||||
|
|
|
@ -29,6 +29,17 @@ except ImportError:
|
|||
subprocess.check_call(['apt-get', 'install', '-y', 'python3-yaml'])
|
||||
import yaml # flake8: noqa
|
||||
|
||||
try:
|
||||
import requests # flake8: noqa
|
||||
except ImportError:
|
||||
if sys.version_info.major == 2:
|
||||
subprocess.check_call(['apt-get', 'install', '-y',
|
||||
'python-requests'])
|
||||
else:
|
||||
subprocess.check_call(['apt-get', 'install', '-y',
|
||||
'python3-requests'])
|
||||
import requests # flake8: noqa
|
||||
|
||||
import rabbit_utils as rabbit
|
||||
import ssl_utils
|
||||
|
||||
|
@ -189,7 +200,10 @@ def configure_amqp(username, vhost, relation_id, admin=False):
|
|||
|
||||
# update vhost
|
||||
rabbit.create_vhost(vhost)
|
||||
rabbit.create_user(username, password, admin)
|
||||
if admin:
|
||||
rabbit.create_user(username, password, ['administrator'])
|
||||
else:
|
||||
rabbit.create_user(username, password)
|
||||
rabbit.grant_permissions(username, vhost)
|
||||
|
||||
# NOTE(freyes): after rabbitmq-server 3.0 the method to define HA in the
|
||||
|
@ -584,10 +598,10 @@ def ceph_changed():
|
|||
@hooks.hook('nrpe-external-master-relation-changed')
|
||||
def update_nrpe_checks():
|
||||
if os.path.isdir(NAGIOS_PLUGINS):
|
||||
rsync(os.path.join(os.getenv('CHARM_DIR'), 'scripts',
|
||||
rsync(os.path.join(charm_dir(), 'scripts',
|
||||
'check_rabbitmq.py'),
|
||||
os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq.py'))
|
||||
rsync(os.path.join(os.getenv('CHARM_DIR'), 'scripts',
|
||||
rsync(os.path.join(charm_dir(), 'scripts',
|
||||
'check_rabbitmq_queues.py'),
|
||||
os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_queues.py'))
|
||||
if config('stats_cron_schedule'):
|
||||
|
@ -598,6 +612,10 @@ def update_nrpe_checks():
|
|||
rsync(os.path.join(charm_dir(), 'scripts',
|
||||
'collect_rabbitmq_stats.sh'), script)
|
||||
write_file(STATS_CRONFILE, cronjob)
|
||||
if config('management_plugin'):
|
||||
rsync(os.path.join(charm_dir(), 'scripts',
|
||||
'check_rabbitmq_cluster.py'),
|
||||
os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_cluster.py'))
|
||||
elif os.path.isfile(STATS_CRONFILE):
|
||||
os.remove(STATS_CRONFILE)
|
||||
|
||||
|
@ -612,7 +630,7 @@ def update_nrpe_checks():
|
|||
password = rabbit.get_rabbit_password(user, local=True)
|
||||
|
||||
rabbit.create_vhost(vhost)
|
||||
rabbit.create_user(user, password)
|
||||
rabbit.create_user(user, password, ['monitoring'])
|
||||
rabbit.grant_permissions(user, vhost)
|
||||
|
||||
nrpe_compat = nrpe.NRPE(hostname=hostname)
|
||||
|
@ -633,6 +651,19 @@ def update_nrpe_checks():
|
|||
check_cmd='{}/check_rabbitmq_queues.py{} {}'.format(
|
||||
NAGIOS_PLUGINS, cmd, STATS_DATAFILE)
|
||||
)
|
||||
if config('management_plugin'):
|
||||
# add NRPE check
|
||||
nrpe_compat.add_check(
|
||||
shortname=rabbit.RABBIT_USER + '_cluster',
|
||||
description='Check RabbitMQ Cluster',
|
||||
check_cmd='{}/check_rabbitmq_cluster.py --port {} --user {} --password {}'.format(
|
||||
NAGIOS_PLUGINS,
|
||||
rabbit.get_managment_port(),
|
||||
user,
|
||||
password
|
||||
)
|
||||
)
|
||||
|
||||
nrpe_compat.write()
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
#!/usr/bin/python
|
||||
"""
|
||||
Checks for RabbitMQ cluster partitions.
|
||||
|
||||
Copyright (C) 2017 Canonical
|
||||
All Rights Reserved
|
||||
Author: James Hebden
|
||||
|
||||
This Nagios check will use the HTTP management API
|
||||
to fetch cluster status, and check it for problems
|
||||
such as partitions and offline nodes.
|
||||
"""
|
||||
|
||||
from optparse import OptionParser
|
||||
import json
|
||||
import requests
|
||||
import socket
|
||||
import sys
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
hostname = socket.gethostname()
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option("--host", dest="host",
|
||||
help="RabbitMQ host to connect to [default=%default]",
|
||||
metavar="HOST", default="localhost")
|
||||
parser.add_option("--port", dest="port", type="int",
|
||||
help="port RabbitMQ is running on [default=%default]",
|
||||
metavar="PORT", default=5672)
|
||||
parser.add_option("-v", "--verbose", default=False, action="store_true",
|
||||
help="verbose run")
|
||||
parser.add_option("-u", "--user", dest="user", default="guest",
|
||||
help="RabbitMQ user [default=%default]",
|
||||
metavar="USER")
|
||||
parser.add_option("-p", "--password", dest="password", default="guest",
|
||||
help="RabbitMQ password [default=%default]",
|
||||
metavar="PASSWORD")
|
||||
parser.add_option("-t", "--tls", dest="tls", default=False,
|
||||
help="Use TLS to talk to RabbitMQ? [default=%default]",
|
||||
metavar="TLS")
|
||||
parser.add_option("-H", "--hostname",
|
||||
dest="hostname",
|
||||
default=hostname,
|
||||
help="""Override hostname used when querying
|
||||
cluster status [default=%default]""")
|
||||
parser.add_option("-R", "--rabbitname",
|
||||
dest="rabbitname",
|
||||
default="rabbit",
|
||||
help="""Override rabbit user ID used when querying
|
||||
cluster status [default=%default]""")
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if options.verbose:
|
||||
print("Checking host: %s@%s:%d") % (
|
||||
options.user,
|
||||
options.host,
|
||||
options.port
|
||||
)
|
||||
|
||||
if (options.tls):
|
||||
proto = 'https'
|
||||
else:
|
||||
proto = 'http'
|
||||
|
||||
query = '{0}://{1}:{2}@{3}:{4}/api/nodes/{5}@{6}'.format(
|
||||
proto,
|
||||
options.user,
|
||||
options.password,
|
||||
options.host,
|
||||
options.port,
|
||||
options.rabbitname,
|
||||
options.hostname,
|
||||
)
|
||||
|
||||
try:
|
||||
partition_data = requests.get(query).text
|
||||
|
||||
except requests.ConnectionError as error:
|
||||
print(
|
||||
"ERROR: could not connect to cluster: {0}".format(
|
||||
error
|
||||
)
|
||||
)
|
||||
sys.exit(3)
|
||||
|
||||
if options.verbose:
|
||||
print(partition_data)
|
||||
|
||||
try:
|
||||
partitions = len(json.loads(partition_data)['partitions'])
|
||||
cluster = len(json.loads(partition_data)['cluster_links'])
|
||||
|
||||
except:
|
||||
print(
|
||||
"UNKNOWN: Could not parse cluster status data returned by RabbitMQ"
|
||||
)
|
||||
sys.exit(3)
|
||||
|
||||
if(partitions > 0 or cluster < 0):
|
||||
print(
|
||||
"CRITICAL: %d partitions detected, %d nodes online."
|
||||
) % (partitions, cluster)
|
||||
sys.exit(2)
|
||||
else:
|
||||
print("OK: No partitions detected")
|
||||
sys.exit(0)
|
Loading…
Reference in New Issue