[gnuoy,r=james-page] Add support for nrpe

This commit is contained in:
Liam Young 2015-01-12 17:20:42 +00:00
commit 85c1946e73
12 changed files with 518 additions and 3 deletions

View File

@ -11,3 +11,4 @@ include:
- contrib.network.ip
- contrib.peerstorage
- contrib.python.packages
- contrib.charmsupport

View File

@ -292,3 +292,14 @@ options:
order for this charm to function correctly, the privacy extension must be
disabled and a non-temporary address must be configured/available on
your network interface.
nagios_context:
default: "juju"
type: string
description: |
Used by the nrpe-external-master subordinate charm.
A string that will be prepended to instance name to set the host name
in nagios. So for instance the hostname would be something like:
juju-myservice-0
If you're running multiple environments with the same services in them
this allows you to differentiate between them.

View File

@ -0,0 +1,308 @@
"""Compatibility with the nrpe-external-master charm"""
# Copyright 2012 Canonical Ltd.
#
# Authors:
# Matthew Wedgwood <matthew.wedgwood@canonical.com>
import subprocess
import pwd
import grp
import os
import re
import shlex
import yaml
from charmhelpers.core.hookenv import (
config,
local_unit,
log,
relation_ids,
relation_set,
relations_of_type,
)
from charmhelpers.core.host import service
# This module adds compatibility with the nrpe-external-master and plain nrpe
# subordinate charms. To use it in your charm:
#
# 1. Update metadata.yaml
#
# provides:
# (...)
# nrpe-external-master:
# interface: nrpe-external-master
# scope: container
#
# and/or
#
# provides:
# (...)
# local-monitors:
# interface: local-monitors
# scope: container
#
# 2. Add the following to config.yaml
#
# nagios_context:
# default: "juju"
# type: string
# description: |
# Used by the nrpe subordinate charms.
# A string that will be prepended to instance name to set the host name
# in nagios. So for instance the hostname would be something like:
# juju-myservice-0
# If you're running multiple environments with the same services in them
# this allows you to differentiate between them.
# nagios_servicegroups:
# default: ""
# type: string
# description: |
# A comma-separated list of nagios servicegroups.
# If left empty, the nagios_context will be used as the servicegroup
#
# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
#
# 4. Update your hooks.py with something like this:
#
# from charmsupport.nrpe import NRPE
# (...)
# def update_nrpe_config():
# nrpe_compat = NRPE()
# nrpe_compat.add_check(
# shortname = "myservice",
# description = "Check MyService",
# check_cmd = "check_http -w 2 -c 10 http://localhost"
# )
# nrpe_compat.add_check(
# "myservice_other",
# "Check for widget failures",
# check_cmd = "/srv/myapp/scripts/widget_check"
# )
# nrpe_compat.write()
#
# def config_changed():
# (...)
# update_nrpe_config()
#
# def nrpe_external_master_relation_changed():
# update_nrpe_config()
#
# def local_monitors_relation_changed():
# update_nrpe_config()
#
# 5. ln -s hooks.py nrpe-external-master-relation-changed
# ln -s hooks.py local-monitors-relation-changed
class CheckException(Exception):
pass
class Check(object):
shortname_re = '[A-Za-z0-9-_]+$'
service_template = ("""
#---------------------------------------------------
# This file is Juju managed
#---------------------------------------------------
define service {{
use active-service
host_name {nagios_hostname}
service_description {nagios_hostname}[{shortname}] """
"""{description}
check_command check_nrpe!{command}
servicegroups {nagios_servicegroup}
}}
""")
def __init__(self, shortname, description, check_cmd):
super(Check, self).__init__()
# XXX: could be better to calculate this from the service name
if not re.match(self.shortname_re, shortname):
raise CheckException("shortname must match {}".format(
Check.shortname_re))
self.shortname = shortname
self.command = "check_{}".format(shortname)
# Note: a set of invalid characters is defined by the
# Nagios server config
# The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
self.description = description
self.check_cmd = self._locate_cmd(check_cmd)
def _locate_cmd(self, check_cmd):
search_path = (
'/usr/lib/nagios/plugins',
'/usr/local/lib/nagios/plugins',
)
parts = shlex.split(check_cmd)
for path in search_path:
if os.path.exists(os.path.join(path, parts[0])):
command = os.path.join(path, parts[0])
if len(parts) > 1:
command += " " + " ".join(parts[1:])
return command
log('Check command not found: {}'.format(parts[0]))
return ''
def write(self, nagios_context, hostname, nagios_servicegroups=None):
nrpe_check_file = '/etc/nagios/nrpe.d/{}.cfg'.format(
self.command)
with open(nrpe_check_file, 'w') as nrpe_check_config:
nrpe_check_config.write("# check {}\n".format(self.shortname))
nrpe_check_config.write("command[{}]={}\n".format(
self.command, self.check_cmd))
if not os.path.exists(NRPE.nagios_exportdir):
log('Not writing service config as {} is not accessible'.format(
NRPE.nagios_exportdir))
else:
self.write_service_config(nagios_context, hostname,
nagios_servicegroups)
def write_service_config(self, nagios_context, hostname,
nagios_servicegroups=None):
for f in os.listdir(NRPE.nagios_exportdir):
if re.search('.*{}.cfg'.format(self.command), f):
os.remove(os.path.join(NRPE.nagios_exportdir, f))
if not nagios_servicegroups:
nagios_servicegroups = nagios_context
templ_vars = {
'nagios_hostname': hostname,
'nagios_servicegroup': nagios_servicegroups,
'description': self.description,
'shortname': self.shortname,
'command': self.command,
}
nrpe_service_text = Check.service_template.format(**templ_vars)
nrpe_service_file = '{}/service__{}_{}.cfg'.format(
NRPE.nagios_exportdir, hostname, self.command)
with open(nrpe_service_file, 'w') as nrpe_service_config:
nrpe_service_config.write(str(nrpe_service_text))
def run(self):
subprocess.call(self.check_cmd)
class NRPE(object):
nagios_logdir = '/var/log/nagios'
nagios_exportdir = '/var/lib/nagios/export'
nrpe_confdir = '/etc/nagios/nrpe.d'
def __init__(self, hostname=None):
super(NRPE, self).__init__()
self.config = config()
self.nagios_context = self.config['nagios_context']
if 'nagios_servicegroups' in self.config:
self.nagios_servicegroups = self.config['nagios_servicegroups']
else:
self.nagios_servicegroups = 'juju'
self.unit_name = local_unit().replace('/', '-')
if hostname:
self.hostname = hostname
else:
self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
self.checks = []
def add_check(self, *args, **kwargs):
self.checks.append(Check(*args, **kwargs))
def write(self):
try:
nagios_uid = pwd.getpwnam('nagios').pw_uid
nagios_gid = grp.getgrnam('nagios').gr_gid
except:
log("Nagios user not set up, nrpe checks not updated")
return
if not os.path.exists(NRPE.nagios_logdir):
os.mkdir(NRPE.nagios_logdir)
os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
nrpe_monitors = {}
monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
for nrpecheck in self.checks:
nrpecheck.write(self.nagios_context, self.hostname,
self.nagios_servicegroups)
nrpe_monitors[nrpecheck.shortname] = {
"command": nrpecheck.command,
}
service('restart', 'nagios-nrpe-server')
for rid in relation_ids("local-monitors"):
relation_set(relation_id=rid, monitors=yaml.dump(monitors))
def get_nagios_hostcontext(relation_name='nrpe-external-master'):
"""
Query relation with nrpe subordinate, return the nagios_host_context
:param str relation_name: Name of relation nrpe sub joined to
"""
for rel in relations_of_type(relation_name):
if 'nagios_hostname' in rel:
return rel['nagios_host_context']
def get_nagios_hostname(relation_name='nrpe-external-master'):
"""
Query relation with nrpe subordinate, return the nagios_hostname
:param str relation_name: Name of relation nrpe sub joined to
"""
for rel in relations_of_type(relation_name):
if 'nagios_hostname' in rel:
return rel['nagios_hostname']
def get_nagios_unit_name(relation_name='nrpe-external-master'):
"""
Return the nagios unit name prepended with host_context if needed
:param str relation_name: Name of relation nrpe sub joined to
"""
host_context = get_nagios_hostcontext(relation_name)
if host_context:
unit = "%s:%s" % (host_context, local_unit())
else:
unit = local_unit()
return unit
def add_init_service_checks(nrpe, services, unit_name):
"""
Add checks for each service in list
:param NRPE nrpe: NRPE object to add check to
:param list services: List of services to check
:param str unit_name: Unit name to use in check description
"""
for svc in services:
upstart_init = '/etc/init/%s.conf' % svc
sysv_init = '/etc/init.d/%s' % svc
if os.path.exists(upstart_init):
nrpe.add_check(
shortname=svc,
description='process check {%s}' % unit_name,
check_cmd='check_upstart_job %s' % svc
)
elif os.path.exists(sysv_init):
cronpath = '/etc/cron.d/nagios-service-check-%s' % svc
cron_file = ('*/5 * * * * root '
'/usr/local/lib/nagios/plugins/check_exit_status.pl '
'-s /etc/init.d/%s status > '
'/var/lib/nagios/service-check-%s.txt\n' % (svc,
svc)
)
f = open(cronpath, 'w')
f.write(cron_file)
f.close()
nrpe.add_check(
shortname=svc,
description='process check {%s}' % unit_name,
check_cmd='check_status_file.py -f '
'/var/lib/nagios/service-check-%s.txt' % svc,
)

View File

@ -0,0 +1,159 @@
'''
Functions for managing volumes in juju units. One volume is supported per unit.
Subordinates may have their own storage, provided it is on its own partition.
Configuration stanzas::
volume-ephemeral:
type: boolean
default: true
description: >
If false, a volume is mounted as sepecified in "volume-map"
If true, ephemeral storage will be used, meaning that log data
will only exist as long as the machine. YOU HAVE BEEN WARNED.
volume-map:
type: string
default: {}
description: >
YAML map of units to device names, e.g:
"{ rsyslog/0: /dev/vdb, rsyslog/1: /dev/vdb }"
Service units will raise a configure-error if volume-ephemeral
is 'true' and no volume-map value is set. Use 'juju set' to set a
value and 'juju resolved' to complete configuration.
Usage::
from charmsupport.volumes import configure_volume, VolumeConfigurationError
from charmsupport.hookenv import log, ERROR
def post_mount_hook():
stop_service('myservice')
def post_mount_hook():
start_service('myservice')
if __name__ == '__main__':
try:
configure_volume(before_change=pre_mount_hook,
after_change=post_mount_hook)
except VolumeConfigurationError:
log('Storage could not be configured', ERROR)
'''
# XXX: Known limitations
# - fstab is neither consulted nor updated
import os
from charmhelpers.core import hookenv
from charmhelpers.core import host
import yaml
MOUNT_BASE = '/srv/juju/volumes'
class VolumeConfigurationError(Exception):
'''Volume configuration data is missing or invalid'''
pass
def get_config():
'''Gather and sanity-check volume configuration data'''
volume_config = {}
config = hookenv.config()
errors = False
if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'):
volume_config['ephemeral'] = True
else:
volume_config['ephemeral'] = False
try:
volume_map = yaml.safe_load(config.get('volume-map', '{}'))
except yaml.YAMLError as e:
hookenv.log("Error parsing YAML volume-map: {}".format(e),
hookenv.ERROR)
errors = True
if volume_map is None:
# probably an empty string
volume_map = {}
elif not isinstance(volume_map, dict):
hookenv.log("Volume-map should be a dictionary, not {}".format(
type(volume_map)))
errors = True
volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME'])
if volume_config['device'] and volume_config['ephemeral']:
# asked for ephemeral storage but also defined a volume ID
hookenv.log('A volume is defined for this unit, but ephemeral '
'storage was requested', hookenv.ERROR)
errors = True
elif not volume_config['device'] and not volume_config['ephemeral']:
# asked for permanent storage but did not define volume ID
hookenv.log('Ephemeral storage was requested, but there is no volume '
'defined for this unit.', hookenv.ERROR)
errors = True
unit_mount_name = hookenv.local_unit().replace('/', '-')
volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name)
if errors:
return None
return volume_config
def mount_volume(config):
if os.path.exists(config['mountpoint']):
if not os.path.isdir(config['mountpoint']):
hookenv.log('Not a directory: {}'.format(config['mountpoint']))
raise VolumeConfigurationError()
else:
host.mkdir(config['mountpoint'])
if os.path.ismount(config['mountpoint']):
unmount_volume(config)
if not host.mount(config['device'], config['mountpoint'], persist=True):
raise VolumeConfigurationError()
def unmount_volume(config):
if os.path.ismount(config['mountpoint']):
if not host.umount(config['mountpoint'], persist=True):
raise VolumeConfigurationError()
def managed_mounts():
'''List of all mounted managed volumes'''
return filter(lambda mount: mount[0].startswith(MOUNT_BASE), host.mounts())
def configure_volume(before_change=lambda: None, after_change=lambda: None):
'''Set up storage (or don't) according to the charm's volume configuration.
Returns the mount point or "ephemeral". before_change and after_change
are optional functions to be called if the volume configuration changes.
'''
config = get_config()
if not config:
hookenv.log('Failed to read volume configuration', hookenv.CRITICAL)
raise VolumeConfigurationError()
if config['ephemeral']:
if os.path.ismount(config['mountpoint']):
before_change()
unmount_volume(config)
after_change()
return 'ephemeral'
else:
# persistent storage
if os.path.ismount(config['mountpoint']):
mounts = dict(managed_mounts())
if mounts.get(config['mountpoint']) != config['device']:
before_change()
unmount_volume(config)
mount_volume(config)
after_change()
else:
before_change()
mount_volume(config)
after_change()
return config['mountpoint']

View File

@ -53,6 +53,7 @@ UBUNTU_OPENSTACK_RELEASE = OrderedDict([
('saucy', 'havana'),
('trusty', 'icehouse'),
('utopic', 'juno'),
('vivid', 'kilo'),
])
@ -64,6 +65,7 @@ OPENSTACK_CODENAMES = OrderedDict([
('2013.2', 'havana'),
('2014.1', 'icehouse'),
('2014.2', 'juno'),
('2015.1', 'kilo'),
])
# The ugly duckling
@ -84,6 +86,7 @@ SWIFT_CODENAMES = OrderedDict([
('2.0.0', 'juno'),
('2.1.0', 'juno'),
('2.2.0', 'juno'),
('2.2.1', 'kilo'),
])
DEFAULT_LOOPBACK_SIZE = '5G'
@ -289,6 +292,9 @@ def configure_installation_source(rel):
'juno': 'trusty-updates/juno',
'juno/updates': 'trusty-updates/juno',
'juno/proposed': 'trusty-proposed/juno',
'kilo': 'trusty-updates/kilo',
'kilo/updates': 'trusty-updates/kilo',
'kilo/proposed': 'trusty-proposed/kilo',
}
try:

View File

@ -64,9 +64,16 @@ CLOUD_ARCHIVE_POCKETS = {
'trusty-juno/updates': 'trusty-updates/juno',
'trusty-updates/juno': 'trusty-updates/juno',
'juno/proposed': 'trusty-proposed/juno',
'juno/proposed': 'trusty-proposed/juno',
'trusty-juno/proposed': 'trusty-proposed/juno',
'trusty-proposed/juno': 'trusty-proposed/juno',
# Kilo
'kilo': 'trusty-updates/kilo',
'trusty-kilo': 'trusty-updates/kilo',
'trusty-kilo/updates': 'trusty-updates/kilo',
'trusty-updates/kilo': 'trusty-updates/kilo',
'kilo/proposed': 'trusty-proposed/kilo',
'trusty-kilo/proposed': 'trusty-proposed/kilo',
'trusty-proposed/kilo': 'trusty-proposed/kilo',
}
# The order of this list is very important. Handlers should be listed in from

View File

@ -116,6 +116,8 @@ from charmhelpers.contrib.network.ip import (
from charmhelpers.contrib.openstack.context import ADDRESS_TYPES
from charmhelpers.contrib.charmsupport import nrpe
hooks = Hooks()
CONFIGS = register_configs()
@ -131,8 +133,9 @@ def install():
if os.path.isdir(_files):
for f in os.listdir(_files):
f = os.path.join(_files, f)
log('Installing %s to /usr/bin' % f)
shutil.copy2(f, '/usr/bin')
if os.path.isfile(f):
log('Installing %s to /usr/bin' % f)
shutil.copy2(f, '/usr/bin')
[open_port(port) for port in determine_ports()]
log('Disabling services into db relation joined')
disable_services()
@ -166,6 +169,7 @@ def config_changed():
for r_id in relation_ids('identity-service'):
identity_joined(rid=r_id)
[cluster_joined(rid) for rid in relation_ids('cluster')]
update_nrpe_config()
@hooks.hook('amqp-relation-joined')
@ -775,6 +779,7 @@ def upgrade_charm():
for r_id in relation_ids('cloud-compute'):
for unit in related_units(r_id):
compute_changed(r_id, unit)
update_nrpe_config()
# remote_restart is defaulted to true as nova-cells may have started the
@ -849,6 +854,18 @@ def neutron_api_relation_broken():
quantum_joined(rid=rid)
@hooks.hook('nrpe-external-master-relation-joined',
'nrpe-external-master-relation-changed')
def update_nrpe_config():
# python-dbus is used by check_upstart_job
apt_install('python-dbus')
hostname = nrpe.get_nagios_hostname()
current_unit = nrpe.get_nagios_unit_name()
nrpe_setup = nrpe.NRPE(hostname=hostname)
nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
nrpe_setup.write()
@hooks.hook('memcache-relation-joined',
'memcache-relation-departed',
'memcache-relation-changed',

View File

@ -0,0 +1 @@
nova_cc_hooks.py

View File

@ -0,0 +1 @@
nova_cc_hooks.py

View File

@ -7,6 +7,9 @@ description: |
categories:
- openstack
provides:
nrpe-external-master:
interface: nrpe-external-master
scope: container
cloud-controller:
interface: nova
requires:

View File

@ -64,6 +64,7 @@ TO_PATCH = [
'migrate_nova_database',
'migrate_neutron_database',
'uuid',
'update_nrpe_config',
]