[bradm] Sync charmhelpers nrpe support, and add nrpe checks

This commit is contained in:
Brad Marshall 2015-02-12 09:49:44 +10:00
parent 0c57fcc521
commit 9c3ca6e743
16 changed files with 1447 additions and 14 deletions

View File

@ -9,3 +9,4 @@ include:
- contrib.network.ip
- contrib.openstack.utils
- contrib.python.packages
- contrib.charmsupport

99
files/nrpe/check_corosync_rings Executable file
View File

@ -0,0 +1,99 @@
#!/usr/bin/perl
#
# check_corosync_rings
#
# Copyright © 2011 Phil Garner, Sysnix Consultants Limited
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Authors: Phil Garner - phil@sysnix.com & Peter Mottram peter@sysnix.com
#
# v0.1 05/01/2011
# v0.2 31/10/2011 - additional crit when closing the file handle and additional
# comments added
#
# NOTE:- Requires Perl 5.8 or higher & the Perl Module Nagios::Plugin
# Nagios user will need sudo acces - suggest adding line below to
# sudoers.
# nagios ALL=(ALL) NOPASSWD: /usr/sbin/corosync-cfgtool -s
#
# In sudoers if requiretty is on (off state is default)
# you will also need to add the line below
# Defaults:nagios !requiretty
#
use warnings;
use strict;
use Nagios::Plugin;
# Lines below may need changing if corosync-cfgtool or sudo installed in a
# diffrent location.
my $sudo = '/usr/bin/sudo';
my $cfgtool = '/usr/sbin/corosync-cfgtool -s';
# Now set up the plugin
my $np = Nagios::Plugin->new(
shortname => 'check_cororings',
version => '0.2',
usage => "Usage: %s <ARGS>\n\t\t--help for help\n",
license => "License - GPL v3 see code for more details",
url => "http://www.sysnix.com",
blurb =>
"\tNagios plugin that checks the status of corosync rings, requires Perl \t5.8+ and CPAN modules Nagios::Plugin.",
);
#Args
$np->add_arg(
spec => 'rings|r=s',
help =>
'How many rings should be running (optinal) sends Crit if incorrect number of rings found.',
required => 0,
);
$np->getopts;
my $found = 0;
my $fh;
my $rings = $np->opts->rings;
# Run cfgtools spin through output and get info needed
open( $fh, "$sudo $cfgtool |" )
or $np->nagios_exit( CRITICAL, "Running corosync-cfgtool failed" );
foreach my $line (<$fh>) {
if ( $line =~ m/status\s*=\s*(\S.+)/ ) {
my $status = $1;
if ( $status =~ m/^ring (\d+) active with no faults/ ) {
$np->add_message( OK, "ring $1 OK" );
}
else {
$np->add_message( CRITICAL, $status );
}
$found++;
}
}
close($fh) or $np->nagios_exit( CRITICAL, "Running corosync-cfgtool failed" );
# Check we found some rings and apply -r arg if needed
if ( $found == 0 ) {
$np->nagios_exit( CRITICAL, "No Rings Found" );
}
elsif ( defined $rings && $rings != $found ) {
$np->nagios_exit( CRITICAL, "Expected $rings rings but found $found" );
}
$np->nagios_exit( $np->check_messages() );

201
files/nrpe/check_crm Executable file
View File

@ -0,0 +1,201 @@
#!/usr/bin/perl
#
# check_crm_v0_7
#
# Copyright © 2013 Philip Garner, Sysnix Consultants Limited
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Authors: Phil Garner - phil@sysnix.com & Peter Mottram - peter@sysnix.com
#
# v0.1 09/01/2011
# v0.2 11/01/2011
# v0.3 22/08/2011 - bug fix and changes suggested by Vadym Chepkov
# v0.4 23/08/2011 - update for spelling and anchor regex capture (Vadym Chepkov)
# v0.5 29/09/2011 - Add standby warn/crit suggested by Sönke Martens & removal
# of 'our' to 'my' to completely avoid problems with ePN
# v0.6 14/03/2013 - Change from \w+ to \S+ in stopped check to cope with
# Servers that have non word charachters in. Suggested by
# Igal Baevsky.
# v0.7 01/09/2013 - In testing as still not fully tested. Adds optional
# constraints check (Boris Wesslowski). Adds fail count
# threshold ( Zoran Bosnjak & Marko Hrastovec )
#
# NOTES: Requires Perl 5.8 or higher & the Perl Module Nagios::Plugin
# Nagios user will need sudo acces - suggest adding line below to
# sudoers
# nagios ALL=(ALL) NOPASSWD: /usr/sbin/crm_mon -1 -r -f
#
# if you want to check for location constraints (-c) also add
# nagios ALL=(ALL) NOPASSWD: /usr/sbin/crm configure show
#
# In sudoers if requiretty is on (off state is default)
# you will also need to add the line below
# Defaults:nagios !requiretty
#
use warnings;
use strict;
use Nagios::Plugin;
# Lines below may need changing if crm_mon or sudo installed in a
# different location.
my $sudo = '/usr/bin/sudo';
my $crm_mon = '/usr/sbin/crm_mon -1 -r -f';
my $crm_configure_show = '/usr/sbin/crm configure show';
my $np = Nagios::Plugin->new(
shortname => 'check_crm',
version => '0.7',
usage => "Usage: %s <ARGS>\n\t\t--help for help\n",
);
$np->add_arg(
spec => 'warning|w',
help =>
'If failed Nodes, stopped Resources detected or Standby Nodes sends Warning instead of Critical (default) as long as there are no other errors and there is Quorum',
required => 0,
);
$np->add_arg(
spec => 'standbyignore|s',
help => 'Ignore any node(s) in standby, by default sends Critical',
required => 0,
);
$np->add_arg(
spec => 'constraint|constraints|c',
help => 'Also check configuration for location constraints (caused by migrations) and warn if there are any. Requires additional privileges see notes',
required => 0,
);
$np->add_arg(
spec => 'failcount|failcounts|f=i',
help => 'resource fail count to start warning on [default = 1].',
required => 0,
default => 1,
);
$np->getopts;
my $ConstraintsFlag = $np->opts->constraint;
my @standby;
# Check for -w option set warn if this is case instead of crit
my $warn_or_crit = 'CRITICAL';
$warn_or_crit = 'WARNING' if $np->opts->warning;
my $fh;
open( $fh, "$sudo $crm_mon |" )
or $np->nagios_exit( CRITICAL, "Running $sudo $crm_mon has failed" );
foreach my $line (<$fh>) {
if ( $line =~ m/Connection to cluster failed\:(.*)/i ) {
# Check Cluster connected
$np->nagios_exit( CRITICAL, "Connection to cluster FAILED: $1" );
}
elsif ( $line =~ m/Current DC:/ ) {
# Check for Quorum
if ( $line =~ m/partition with quorum$/ ) {
# Assume cluster is OK - we only add warn/crit after here
$np->add_message( OK, "Cluster OK" );
}
else {
$np->add_message( CRITICAL, "No Quorum" );
}
}
elsif ( $line =~ m/^offline:\s*\[\s*(\S.*?)\s*\]/i ) {
# Count offline nodes
my @offline = split( /\s+/, $1 );
my $numoffline = scalar @offline;
$np->add_message( $warn_or_crit, ": $numoffline Nodes Offline" );
}
elsif ( $line =~ m/^node\s+(\S.*):\s*standby/i ) {
# Check for standby nodes (suggested by Sönke Martens)
# See later in code for message created from this
push @standby, $1;
}
elsif ( $line =~ m/\s*(\S+)\s+\(\S+\)\:\s+Stopped/ ) {
# Check Resources Stopped
$np->add_message( $warn_or_crit, ": $1 Stopped" );
}
elsif ( $line =~ m/\s*stopped\:\s*\[(.*)\]/i ) {
# Check Master/Slave stopped
$np->add_message( $warn_or_crit, ": $1 Stopped" );
}
elsif ( $line =~ m/^Failed actions\:/ ) {
# Check Failed Actions
$np->add_message( CRITICAL,
": FAILED actions detected or not cleaned up" );
}
elsif ( $line =~ m/\s*(\S+?)\s+ \(.*\)\:\s+\w+\s+\w+\s+\(unmanaged\)\s+/i )
{
# Check Unmanaged
$np->add_message( CRITICAL, ": $1 unmanaged FAILED" );
}
elsif ( $line =~ m/\s*(\S+?)\s+ \(.*\)\:\s+not installed/i ) {
# Check for errors
$np->add_message( CRITICAL, ": $1 not installed" );
}
elsif ( $line =~ m/\s*(\S+?):.*fail-count=(\d+)/i ) {
if ( $2 >= $np->opts->failcount ) {
# Check for resource Fail count (suggested by Vadym Chepkov)
$np->add_message( WARNING, ": $1 failure detected, fail-count=$2" );
}
}
}
# If found any Nodes in standby & no -s option used send warn/crit
if ( scalar @standby > 0 && !$np->opts->standbyignore ) {
$np->add_message( $warn_or_crit,
": " . join( ', ', @standby ) . " in Standby" );
}
close($fh) or $np->nagios_exit( CRITICAL, "Running $crm_mon FAILED" );
# if -c flag set check configuration for constraints
if ($ConstraintsFlag) {
open( $fh, "$sudo $crm_configure_show|" )
or $np->nagios_exit( CRITICAL,
"Running $sudo $crm_configure_show has failed" );
foreach my $line (<$fh>) {
if ( $line =~ m/location cli-(prefer|standby)-\S+\s+(\S+)/ ) {
$np->add_message( WARNING,
": $2 blocking location constraint detected" );
}
}
close($fh)
or $np->nagios_exit( CRITICAL, "Running $crm_configure_show FAILED" );
}
$np->nagios_exit( $np->check_messages() );

32
files/nrpe/check_haproxy.sh Executable file
View File

@ -0,0 +1,32 @@
#!/bin/bash
#--------------------------------------------
# This file is managed by Juju
#--------------------------------------------
#
# Copyright 2009,2012 Canonical Ltd.
# Author: Tom Haddon
CRITICAL=0
NOTACTIVE=''
LOGFILE=/var/log/nagios/check_haproxy.log
AUTH=$(grep -r "stats auth" /etc/haproxy | head -1 | awk '{print $4}')
for appserver in $(grep ' server' /etc/haproxy/haproxy.cfg | awk '{print $2'});
do
output=$(/usr/lib/nagios/plugins/check_http -a ${AUTH} -I 127.0.0.1 -p 8888 --regex="class=\"(active|backup)(2|3).*${appserver}" -e ' 200 OK')
if [ $? != 0 ]; then
date >> $LOGFILE
echo $output >> $LOGFILE
/usr/lib/nagios/plugins/check_http -a ${AUTH} -I 127.0.0.1 -p 8888 -v | grep $appserver >> $LOGFILE 2>&1
CRITICAL=1
NOTACTIVE="${NOTACTIVE} $appserver"
fi
done
if [ $CRITICAL = 1 ]; then
echo "CRITICAL:${NOTACTIVE}"
exit 2
fi
echo "OK: All haproxy instances looking good"
exit 0

View File

@ -0,0 +1,30 @@
#!/bin/bash
#--------------------------------------------
# This file is managed by Juju
#--------------------------------------------
#
# Copyright 2009,2012 Canonical Ltd.
# Author: Tom Haddon
# These should be config options at some stage
CURRQthrsh=0
MAXQthrsh=100
AUTH=$(grep -r "stats auth" /etc/haproxy | head -1 | awk '{print $4}')
HAPROXYSTATS=$(/usr/lib/nagios/plugins/check_http -a ${AUTH} -I 127.0.0.1 -p 8888 -u '/;csv' -v)
for BACKEND in $(echo $HAPROXYSTATS| xargs -n1 | grep BACKEND | awk -F , '{print $1}')
do
CURRQ=$(echo "$HAPROXYSTATS" | grep $BACKEND | grep BACKEND | cut -d , -f 3)
MAXQ=$(echo "$HAPROXYSTATS" | grep $BACKEND | grep BACKEND | cut -d , -f 4)
if [[ $CURRQ -gt $CURRQthrsh || $MAXQ -gt $MAXQthrsh ]] ; then
echo "CRITICAL: queue depth for $BACKEND - CURRENT:$CURRQ MAX:$MAXQ"
exit 2
fi
done
echo "OK: All haproxy queue depths looking good"
exit 0

View File

@ -0,0 +1,15 @@
# Copyright 2014-2015 Canonical Limited.
#
# This file is part of charm-helpers.
#
# charm-helpers is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License version 3 as
# published by the Free Software Foundation.
#
# charm-helpers is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>.

View File

@ -0,0 +1,324 @@
# Copyright 2014-2015 Canonical Limited.
#
# This file is part of charm-helpers.
#
# charm-helpers is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License version 3 as
# published by the Free Software Foundation.
#
# charm-helpers is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>.
"""Compatibility with the nrpe-external-master charm"""
# Copyright 2012 Canonical Ltd.
#
# Authors:
# Matthew Wedgwood <matthew.wedgwood@canonical.com>
import subprocess
import pwd
import grp
import os
import re
import shlex
import yaml
from charmhelpers.core.hookenv import (
config,
local_unit,
log,
relation_ids,
relation_set,
relations_of_type,
)
from charmhelpers.core.host import service
# This module adds compatibility with the nrpe-external-master and plain nrpe
# subordinate charms. To use it in your charm:
#
# 1. Update metadata.yaml
#
# provides:
# (...)
# nrpe-external-master:
# interface: nrpe-external-master
# scope: container
#
# and/or
#
# provides:
# (...)
# local-monitors:
# interface: local-monitors
# scope: container
#
# 2. Add the following to config.yaml
#
# nagios_context:
# default: "juju"
# type: string
# description: |
# Used by the nrpe subordinate charms.
# A string that will be prepended to instance name to set the host name
# in nagios. So for instance the hostname would be something like:
# juju-myservice-0
# If you're running multiple environments with the same services in them
# this allows you to differentiate between them.
# nagios_servicegroups:
# default: ""
# type: string
# description: |
# A comma-separated list of nagios servicegroups.
# If left empty, the nagios_context will be used as the servicegroup
#
# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
#
# 4. Update your hooks.py with something like this:
#
# from charmsupport.nrpe import NRPE
# (...)
# def update_nrpe_config():
# nrpe_compat = NRPE()
# nrpe_compat.add_check(
# shortname = "myservice",
# description = "Check MyService",
# check_cmd = "check_http -w 2 -c 10 http://localhost"
# )
# nrpe_compat.add_check(
# "myservice_other",
# "Check for widget failures",
# check_cmd = "/srv/myapp/scripts/widget_check"
# )
# nrpe_compat.write()
#
# def config_changed():
# (...)
# update_nrpe_config()
#
# def nrpe_external_master_relation_changed():
# update_nrpe_config()
#
# def local_monitors_relation_changed():
# update_nrpe_config()
#
# 5. ln -s hooks.py nrpe-external-master-relation-changed
# ln -s hooks.py local-monitors-relation-changed
class CheckException(Exception):
pass
class Check(object):
shortname_re = '[A-Za-z0-9-_]+$'
service_template = ("""
#---------------------------------------------------
# This file is Juju managed
#---------------------------------------------------
define service {{
use active-service
host_name {nagios_hostname}
service_description {nagios_hostname}[{shortname}] """
"""{description}
check_command check_nrpe!{command}
servicegroups {nagios_servicegroup}
}}
""")
def __init__(self, shortname, description, check_cmd):
super(Check, self).__init__()
# XXX: could be better to calculate this from the service name
if not re.match(self.shortname_re, shortname):
raise CheckException("shortname must match {}".format(
Check.shortname_re))
self.shortname = shortname
self.command = "check_{}".format(shortname)
# Note: a set of invalid characters is defined by the
# Nagios server config
# The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
self.description = description
self.check_cmd = self._locate_cmd(check_cmd)
def _locate_cmd(self, check_cmd):
search_path = (
'/usr/lib/nagios/plugins',
'/usr/local/lib/nagios/plugins',
)
parts = shlex.split(check_cmd)
for path in search_path:
if os.path.exists(os.path.join(path, parts[0])):
command = os.path.join(path, parts[0])
if len(parts) > 1:
command += " " + " ".join(parts[1:])
return command
log('Check command not found: {}'.format(parts[0]))
return ''
def write(self, nagios_context, hostname, nagios_servicegroups=None):
nrpe_check_file = '/etc/nagios/nrpe.d/{}.cfg'.format(
self.command)
with open(nrpe_check_file, 'w') as nrpe_check_config:
nrpe_check_config.write("# check {}\n".format(self.shortname))
nrpe_check_config.write("command[{}]={}\n".format(
self.command, self.check_cmd))
if not os.path.exists(NRPE.nagios_exportdir):
log('Not writing service config as {} is not accessible'.format(
NRPE.nagios_exportdir))
else:
self.write_service_config(nagios_context, hostname,
nagios_servicegroups)
def write_service_config(self, nagios_context, hostname,
nagios_servicegroups=None):
for f in os.listdir(NRPE.nagios_exportdir):
if re.search('.*{}.cfg'.format(self.command), f):
os.remove(os.path.join(NRPE.nagios_exportdir, f))
if not nagios_servicegroups:
nagios_servicegroups = nagios_context
templ_vars = {
'nagios_hostname': hostname,
'nagios_servicegroup': nagios_servicegroups,
'description': self.description,
'shortname': self.shortname,
'command': self.command,
}
nrpe_service_text = Check.service_template.format(**templ_vars)
nrpe_service_file = '{}/service__{}_{}.cfg'.format(
NRPE.nagios_exportdir, hostname, self.command)
with open(nrpe_service_file, 'w') as nrpe_service_config:
nrpe_service_config.write(str(nrpe_service_text))
def run(self):
subprocess.call(self.check_cmd)
class NRPE(object):
nagios_logdir = '/var/log/nagios'
nagios_exportdir = '/var/lib/nagios/export'
nrpe_confdir = '/etc/nagios/nrpe.d'
def __init__(self, hostname=None):
super(NRPE, self).__init__()
self.config = config()
self.nagios_context = self.config['nagios_context']
if 'nagios_servicegroups' in self.config:
self.nagios_servicegroups = self.config['nagios_servicegroups']
else:
self.nagios_servicegroups = 'juju'
self.unit_name = local_unit().replace('/', '-')
if hostname:
self.hostname = hostname
else:
self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
self.checks = []
def add_check(self, *args, **kwargs):
self.checks.append(Check(*args, **kwargs))
def write(self):
try:
nagios_uid = pwd.getpwnam('nagios').pw_uid
nagios_gid = grp.getgrnam('nagios').gr_gid
except:
log("Nagios user not set up, nrpe checks not updated")
return
if not os.path.exists(NRPE.nagios_logdir):
os.mkdir(NRPE.nagios_logdir)
os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
nrpe_monitors = {}
monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
for nrpecheck in self.checks:
nrpecheck.write(self.nagios_context, self.hostname,
self.nagios_servicegroups)
nrpe_monitors[nrpecheck.shortname] = {
"command": nrpecheck.command,
}
service('restart', 'nagios-nrpe-server')
for rid in relation_ids("local-monitors"):
relation_set(relation_id=rid, monitors=yaml.dump(monitors))
def get_nagios_hostcontext(relation_name='nrpe-external-master'):
"""
Query relation with nrpe subordinate, return the nagios_host_context
:param str relation_name: Name of relation nrpe sub joined to
"""
for rel in relations_of_type(relation_name):
if 'nagios_hostname' in rel:
return rel['nagios_host_context']
def get_nagios_hostname(relation_name='nrpe-external-master'):
"""
Query relation with nrpe subordinate, return the nagios_hostname
:param str relation_name: Name of relation nrpe sub joined to
"""
for rel in relations_of_type(relation_name):
if 'nagios_hostname' in rel:
return rel['nagios_hostname']
def get_nagios_unit_name(relation_name='nrpe-external-master'):
"""
Return the nagios unit name prepended with host_context if needed
:param str relation_name: Name of relation nrpe sub joined to
"""
host_context = get_nagios_hostcontext(relation_name)
if host_context:
unit = "%s:%s" % (host_context, local_unit())
else:
unit = local_unit()
return unit
def add_init_service_checks(nrpe, services, unit_name):
"""
Add checks for each service in list
:param NRPE nrpe: NRPE object to add check to
:param list services: List of services to check
:param str unit_name: Unit name to use in check description
"""
for svc in services:
upstart_init = '/etc/init/%s.conf' % svc
sysv_init = '/etc/init.d/%s' % svc
if os.path.exists(upstart_init):
nrpe.add_check(
shortname=svc,
description='process check {%s}' % unit_name,
check_cmd='check_upstart_job %s' % svc
)
elif os.path.exists(sysv_init):
cronpath = '/etc/cron.d/nagios-service-check-%s' % svc
cron_file = ('*/5 * * * * root '
'/usr/local/lib/nagios/plugins/check_exit_status.pl '
'-s /etc/init.d/%s status > '
'/var/lib/nagios/service-check-%s.txt\n' % (svc,
svc)
)
f = open(cronpath, 'w')
f.write(cron_file)
f.close()
nrpe.add_check(
shortname=svc,
description='process check {%s}' % unit_name,
check_cmd='check_status_file.py -f '
'/var/lib/nagios/service-check-%s.txt' % svc,
)

View File

@ -0,0 +1,175 @@
# Copyright 2014-2015 Canonical Limited.
#
# This file is part of charm-helpers.
#
# charm-helpers is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License version 3 as
# published by the Free Software Foundation.
#
# charm-helpers is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>.
'''
Functions for managing volumes in juju units. One volume is supported per unit.
Subordinates may have their own storage, provided it is on its own partition.
Configuration stanzas::
volume-ephemeral:
type: boolean
default: true
description: >
If false, a volume is mounted as sepecified in "volume-map"
If true, ephemeral storage will be used, meaning that log data
will only exist as long as the machine. YOU HAVE BEEN WARNED.
volume-map:
type: string
default: {}
description: >
YAML map of units to device names, e.g:
"{ rsyslog/0: /dev/vdb, rsyslog/1: /dev/vdb }"
Service units will raise a configure-error if volume-ephemeral
is 'true' and no volume-map value is set. Use 'juju set' to set a
value and 'juju resolved' to complete configuration.
Usage::
from charmsupport.volumes import configure_volume, VolumeConfigurationError
from charmsupport.hookenv import log, ERROR
def post_mount_hook():
stop_service('myservice')
def post_mount_hook():
start_service('myservice')
if __name__ == '__main__':
try:
configure_volume(before_change=pre_mount_hook,
after_change=post_mount_hook)
except VolumeConfigurationError:
log('Storage could not be configured', ERROR)
'''
# XXX: Known limitations
# - fstab is neither consulted nor updated
import os
from charmhelpers.core import hookenv
from charmhelpers.core import host
import yaml
MOUNT_BASE = '/srv/juju/volumes'
class VolumeConfigurationError(Exception):
'''Volume configuration data is missing or invalid'''
pass
def get_config():
'''Gather and sanity-check volume configuration data'''
volume_config = {}
config = hookenv.config()
errors = False
if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'):
volume_config['ephemeral'] = True
else:
volume_config['ephemeral'] = False
try:
volume_map = yaml.safe_load(config.get('volume-map', '{}'))
except yaml.YAMLError as e:
hookenv.log("Error parsing YAML volume-map: {}".format(e),
hookenv.ERROR)
errors = True
if volume_map is None:
# probably an empty string
volume_map = {}
elif not isinstance(volume_map, dict):
hookenv.log("Volume-map should be a dictionary, not {}".format(
type(volume_map)))
errors = True
volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME'])
if volume_config['device'] and volume_config['ephemeral']:
# asked for ephemeral storage but also defined a volume ID
hookenv.log('A volume is defined for this unit, but ephemeral '
'storage was requested', hookenv.ERROR)
errors = True
elif not volume_config['device'] and not volume_config['ephemeral']:
# asked for permanent storage but did not define volume ID
hookenv.log('Ephemeral storage was requested, but there is no volume '
'defined for this unit.', hookenv.ERROR)
errors = True
unit_mount_name = hookenv.local_unit().replace('/', '-')
volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name)
if errors:
return None
return volume_config
def mount_volume(config):
if os.path.exists(config['mountpoint']):
if not os.path.isdir(config['mountpoint']):
hookenv.log('Not a directory: {}'.format(config['mountpoint']))
raise VolumeConfigurationError()
else:
host.mkdir(config['mountpoint'])
if os.path.ismount(config['mountpoint']):
unmount_volume(config)
if not host.mount(config['device'], config['mountpoint'], persist=True):
raise VolumeConfigurationError()
def unmount_volume(config):
if os.path.ismount(config['mountpoint']):
if not host.umount(config['mountpoint'], persist=True):
raise VolumeConfigurationError()
def managed_mounts():
'''List of all mounted managed volumes'''
return filter(lambda mount: mount[0].startswith(MOUNT_BASE), host.mounts())
def configure_volume(before_change=lambda: None, after_change=lambda: None):
'''Set up storage (or don't) according to the charm's volume configuration.
Returns the mount point or "ephemeral". before_change and after_change
are optional functions to be called if the volume configuration changes.
'''
config = get_config()
if not config:
hookenv.log('Failed to read volume configuration', hookenv.CRITICAL)
raise VolumeConfigurationError()
if config['ephemeral']:
if os.path.ismount(config['mountpoint']):
before_change()
unmount_volume(config)
after_change()
return 'ephemeral'
else:
# persistent storage
if os.path.ismount(config['mountpoint']):
mounts = dict(managed_mounts())
if mounts.get(config['mountpoint']) != config['device']:
before_change()
unmount_volume(config)
mount_volume(config)
after_change()
else:
before_change()
mount_volume(config)
after_change()
return config['mountpoint']

View File

@ -191,11 +191,11 @@ def mkdir(path, owner='root', group='root', perms=0o555, force=False):
def write_file(path, content, owner='root', group='root', perms=0o444):
"""Create or overwrite a file with the contents of a string"""
"""Create or overwrite a file with the contents of a byte string."""
log("Writing file {} {}:{} {:o}".format(path, owner, group, perms))
uid = pwd.getpwnam(owner).pw_uid
gid = grp.getgrnam(group).gr_gid
with open(path, 'w') as target:
with open(path, 'wb') as target:
os.fchown(target.fileno(), uid, gid)
os.fchmod(target.fileno(), perms)
target.write(content)
@ -305,11 +305,11 @@ def restart_on_change(restart_map, stopstart=False):
ceph_client_changed function.
"""
def wrap(f):
def wrapped_f(*args):
def wrapped_f(*args, **kwargs):
checksums = {}
for path in restart_map:
checksums[path] = file_hash(path)
f(*args)
f(*args, **kwargs)
restarts = []
for path in restart_map:
if checksums[path] != file_hash(path):
@ -361,7 +361,7 @@ def list_nics(nic_type):
ip_output = (line for line in ip_output if line)
for line in ip_output:
if line.split()[1].startswith(int_type):
matched = re.search('.*: (bond[0-9]+\.[0-9]+)@.*', line)
matched = re.search('.*: (' + int_type + r'[0-9]+\.[0-9]+)@.*', line)
if matched:
interface = matched.groups()[0]
else:

View File

@ -26,25 +26,31 @@ from subprocess import check_call
from charmhelpers.core.hookenv import (
log,
DEBUG,
ERROR,
)
def create(sysctl_dict, sysctl_file):
"""Creates a sysctl.conf file from a YAML associative array
:param sysctl_dict: a dict of sysctl options eg { 'kernel.max_pid': 1337 }
:type sysctl_dict: dict
:param sysctl_dict: a YAML-formatted string of sysctl options eg "{ 'kernel.max_pid': 1337 }"
:type sysctl_dict: str
:param sysctl_file: path to the sysctl file to be saved
:type sysctl_file: str or unicode
:returns: None
"""
sysctl_dict = yaml.load(sysctl_dict)
try:
sysctl_dict_parsed = yaml.safe_load(sysctl_dict)
except yaml.YAMLError:
log("Error parsing YAML sysctl_dict: {}".format(sysctl_dict),
level=ERROR)
return
with open(sysctl_file, "w") as fd:
for key, value in sysctl_dict.items():
for key, value in sysctl_dict_parsed.items():
fd.write("{}={}\n".format(key, value))
log("Updating sysctl_file: %s values: %s" % (sysctl_file, sysctl_dict),
log("Updating sysctl_file: %s values: %s" % (sysctl_file, sysctl_dict_parsed),
level=DEBUG)
check_call(["sysctl", "-p", sysctl_file])

View File

@ -21,7 +21,7 @@ from charmhelpers.core import hookenv
def render(source, target, context, owner='root', group='root',
perms=0o444, templates_dir=None):
perms=0o444, templates_dir=None, encoding='UTF-8'):
"""
Render a template.
@ -64,5 +64,5 @@ def render(source, target, context, owner='root', group='root',
level=hookenv.ERROR)
raise e
content = template.render(context)
host.mkdir(os.path.dirname(target), owner, group)
host.write_file(target, content, owner, group, perms)
host.mkdir(os.path.dirname(target), owner, group, perms=0o755)
host.write_file(target, content.encode(encoding), owner, group, perms)

View File

@ -0,0 +1,477 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2014-2015 Canonical Limited.
#
# This file is part of charm-helpers.
#
# charm-helpers is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License version 3 as
# published by the Free Software Foundation.
#
# charm-helpers is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>.
#
#
# Authors:
# Kapil Thangavelu <kapil.foss@gmail.com>
#
"""
Intro
-----
A simple way to store state in units. This provides a key value
storage with support for versioned, transactional operation,
and can calculate deltas from previous values to simplify unit logic
when processing changes.
Hook Integration
----------------
There are several extant frameworks for hook execution, including
- charmhelpers.core.hookenv.Hooks
- charmhelpers.core.services.ServiceManager
The storage classes are framework agnostic, one simple integration is
via the HookData contextmanager. It will record the current hook
execution environment (including relation data, config data, etc.),
setup a transaction and allow easy access to the changes from
previously seen values. One consequence of the integration is the
reservation of particular keys ('rels', 'unit', 'env', 'config',
'charm_revisions') for their respective values.
Here's a fully worked integration example using hookenv.Hooks::
from charmhelper.core import hookenv, unitdata
hook_data = unitdata.HookData()
db = unitdata.kv()
hooks = hookenv.Hooks()
@hooks.hook
def config_changed():
# Print all changes to configuration from previously seen
# values.
for changed, (prev, cur) in hook_data.conf.items():
print('config changed', changed,
'previous value', prev,
'current value', cur)
# Get some unit specific bookeeping
if not db.get('pkg_key'):
key = urllib.urlopen('https://example.com/pkg_key').read()
db.set('pkg_key', key)
# Directly access all charm config as a mapping.
conf = db.getrange('config', True)
# Directly access all relation data as a mapping
rels = db.getrange('rels', True)
if __name__ == '__main__':
with hook_data():
hook.execute()
A more basic integration is via the hook_scope context manager which simply
manages transaction scope (and records hook name, and timestamp)::
>>> from unitdata import kv
>>> db = kv()
>>> with db.hook_scope('install'):
... # do work, in transactional scope.
... db.set('x', 1)
>>> db.get('x')
1
Usage
-----
Values are automatically json de/serialized to preserve basic typing
and complex data struct capabilities (dicts, lists, ints, booleans, etc).
Individual values can be manipulated via get/set::
>>> kv.set('y', True)
>>> kv.get('y')
True
# We can set complex values (dicts, lists) as a single key.
>>> kv.set('config', {'a': 1, 'b': True'})
# Also supports returning dictionaries as a record which
# provides attribute access.
>>> config = kv.get('config', record=True)
>>> config.b
True
Groups of keys can be manipulated with update/getrange::
>>> kv.update({'z': 1, 'y': 2}, prefix="gui.")
>>> kv.getrange('gui.', strip=True)
{'z': 1, 'y': 2}
When updating values, its very helpful to understand which values
have actually changed and how have they changed. The storage
provides a delta method to provide for this::
>>> data = {'debug': True, 'option': 2}
>>> delta = kv.delta(data, 'config.')
>>> delta.debug.previous
None
>>> delta.debug.current
True
>>> delta
{'debug': (None, True), 'option': (None, 2)}
Note the delta method does not persist the actual change, it needs to
be explicitly saved via 'update' method::
>>> kv.update(data, 'config.')
Values modified in the context of a hook scope retain historical values
associated to the hookname.
>>> with db.hook_scope('config-changed'):
... db.set('x', 42)
>>> db.gethistory('x')
[(1, u'x', 1, u'install', u'2015-01-21T16:49:30.038372'),
(2, u'x', 42, u'config-changed', u'2015-01-21T16:49:30.038786')]
"""
import collections
import contextlib
import datetime
import json
import os
import pprint
import sqlite3
import sys
__author__ = 'Kapil Thangavelu <kapil.foss@gmail.com>'
class Storage(object):
"""Simple key value database for local unit state within charms.
Modifications are automatically committed at hook exit. That's
currently regardless of exit code.
To support dicts, lists, integer, floats, and booleans values
are automatically json encoded/decoded.
"""
def __init__(self, path=None):
self.db_path = path
if path is None:
self.db_path = os.path.join(
os.environ.get('CHARM_DIR', ''), '.unit-state.db')
self.conn = sqlite3.connect('%s' % self.db_path)
self.cursor = self.conn.cursor()
self.revision = None
self._closed = False
self._init()
def close(self):
if self._closed:
return
self.flush(False)
self.cursor.close()
self.conn.close()
self._closed = True
def _scoped_query(self, stmt, params=None):
if params is None:
params = []
return stmt, params
def get(self, key, default=None, record=False):
self.cursor.execute(
*self._scoped_query(
'select data from kv where key=?', [key]))
result = self.cursor.fetchone()
if not result:
return default
if record:
return Record(json.loads(result[0]))
return json.loads(result[0])
def getrange(self, key_prefix, strip=False):
stmt = "select key, data from kv where key like '%s%%'" % key_prefix
self.cursor.execute(*self._scoped_query(stmt))
result = self.cursor.fetchall()
if not result:
return None
if not strip:
key_prefix = ''
return dict([
(k[len(key_prefix):], json.loads(v)) for k, v in result])
def update(self, mapping, prefix=""):
for k, v in mapping.items():
self.set("%s%s" % (prefix, k), v)
def unset(self, key):
self.cursor.execute('delete from kv where key=?', [key])
if self.revision and self.cursor.rowcount:
self.cursor.execute(
'insert into kv_revisions values (?, ?, ?)',
[key, self.revision, json.dumps('DELETED')])
def set(self, key, value):
serialized = json.dumps(value)
self.cursor.execute(
'select data from kv where key=?', [key])
exists = self.cursor.fetchone()
# Skip mutations to the same value
if exists:
if exists[0] == serialized:
return value
if not exists:
self.cursor.execute(
'insert into kv (key, data) values (?, ?)',
(key, serialized))
else:
self.cursor.execute('''
update kv
set data = ?
where key = ?''', [serialized, key])
# Save
if not self.revision:
return value
self.cursor.execute(
'select 1 from kv_revisions where key=? and revision=?',
[key, self.revision])
exists = self.cursor.fetchone()
if not exists:
self.cursor.execute(
'''insert into kv_revisions (
revision, key, data) values (?, ?, ?)''',
(self.revision, key, serialized))
else:
self.cursor.execute(
'''
update kv_revisions
set data = ?
where key = ?
and revision = ?''',
[serialized, key, self.revision])
return value
def delta(self, mapping, prefix):
"""
return a delta containing values that have changed.
"""
previous = self.getrange(prefix, strip=True)
if not previous:
pk = set()
else:
pk = set(previous.keys())
ck = set(mapping.keys())
delta = DeltaSet()
# added
for k in ck.difference(pk):
delta[k] = Delta(None, mapping[k])
# removed
for k in pk.difference(ck):
delta[k] = Delta(previous[k], None)
# changed
for k in pk.intersection(ck):
c = mapping[k]
p = previous[k]
if c != p:
delta[k] = Delta(p, c)
return delta
@contextlib.contextmanager
def hook_scope(self, name=""):
"""Scope all future interactions to the current hook execution
revision."""
assert not self.revision
self.cursor.execute(
'insert into hooks (hook, date) values (?, ?)',
(name or sys.argv[0],
datetime.datetime.utcnow().isoformat()))
self.revision = self.cursor.lastrowid
try:
yield self.revision
self.revision = None
except:
self.flush(False)
self.revision = None
raise
else:
self.flush()
def flush(self, save=True):
if save:
self.conn.commit()
elif self._closed:
return
else:
self.conn.rollback()
def _init(self):
self.cursor.execute('''
create table if not exists kv (
key text,
data text,
primary key (key)
)''')
self.cursor.execute('''
create table if not exists kv_revisions (
key text,
revision integer,
data text,
primary key (key, revision)
)''')
self.cursor.execute('''
create table if not exists hooks (
version integer primary key autoincrement,
hook text,
date text
)''')
self.conn.commit()
def gethistory(self, key, deserialize=False):
self.cursor.execute(
'''
select kv.revision, kv.key, kv.data, h.hook, h.date
from kv_revisions kv,
hooks h
where kv.key=?
and kv.revision = h.version
''', [key])
if deserialize is False:
return self.cursor.fetchall()
return map(_parse_history, self.cursor.fetchall())
def debug(self, fh=sys.stderr):
self.cursor.execute('select * from kv')
pprint.pprint(self.cursor.fetchall(), stream=fh)
self.cursor.execute('select * from kv_revisions')
pprint.pprint(self.cursor.fetchall(), stream=fh)
def _parse_history(d):
return (d[0], d[1], json.loads(d[2]), d[3],
datetime.datetime.strptime(d[-1], "%Y-%m-%dT%H:%M:%S.%f"))
class HookData(object):
"""Simple integration for existing hook exec frameworks.
Records all unit information, and stores deltas for processing
by the hook.
Sample::
from charmhelper.core import hookenv, unitdata
changes = unitdata.HookData()
db = unitdata.kv()
hooks = hookenv.Hooks()
@hooks.hook
def config_changed():
# View all changes to configuration
for changed, (prev, cur) in changes.conf.items():
print('config changed', changed,
'previous value', prev,
'current value', cur)
# Get some unit specific bookeeping
if not db.get('pkg_key'):
key = urllib.urlopen('https://example.com/pkg_key').read()
db.set('pkg_key', key)
if __name__ == '__main__':
with changes():
hook.execute()
"""
def __init__(self):
self.kv = kv()
self.conf = None
self.rels = None
@contextlib.contextmanager
def __call__(self):
from charmhelpers.core import hookenv
hook_name = hookenv.hook_name()
with self.kv.hook_scope(hook_name):
self._record_charm_version(hookenv.charm_dir())
delta_config, delta_relation = self._record_hook(hookenv)
yield self.kv, delta_config, delta_relation
def _record_charm_version(self, charm_dir):
# Record revisions.. charm revisions are meaningless
# to charm authors as they don't control the revision.
# so logic dependnent on revision is not particularly
# useful, however it is useful for debugging analysis.
charm_rev = open(
os.path.join(charm_dir, 'revision')).read().strip()
charm_rev = charm_rev or '0'
revs = self.kv.get('charm_revisions', [])
if not charm_rev in revs:
revs.append(charm_rev.strip() or '0')
self.kv.set('charm_revisions', revs)
def _record_hook(self, hookenv):
data = hookenv.execution_environment()
self.conf = conf_delta = self.kv.delta(data['conf'], 'config')
self.rels = rels_delta = self.kv.delta(data['rels'], 'rels')
self.kv.set('env', data['env'])
self.kv.set('unit', data['unit'])
self.kv.set('relid', data.get('relid'))
return conf_delta, rels_delta
class Record(dict):
__slots__ = ()
def __getattr__(self, k):
if k in self:
return self[k]
raise AttributeError(k)
class DeltaSet(Record):
__slots__ = ()
Delta = collections.namedtuple('Delta', ['previous', 'current'])
_KV = None
def kv():
global _KV
if _KV is None:
_KV = Storage()
return _KV

View File

@ -11,6 +11,7 @@ import ast
import shutil
import sys
import os
import glob
from base64 import b64decode
import maas as MAAS
@ -24,6 +25,7 @@ from charmhelpers.core.hookenv import (
related_units,
relation_ids,
relation_set,
relations_of_type,
unit_get,
config,
Hooks, UnregisteredHookError,
@ -56,6 +58,8 @@ from charmhelpers.contrib.hahelpers.cluster import (
from charmhelpers.contrib.openstack.utils import get_host_ip
from charmhelpers.contrib.charmsupport.nrpe import NRPE
hooks = Hooks()
COROSYNC_CONF = '/etc/corosync/corosync.conf'
@ -68,7 +72,8 @@ COROSYNC_CONF_FILES = [
COROSYNC_CONF
]
PACKAGES = ['corosync', 'pacemaker', 'python-netaddr', 'ipmitool']
PACKAGES = ['corosync', 'pacemaker', 'python-netaddr', 'ipmitool',
'libnagios-plugin-perl']
SUPPORTED_TRANSPORTS = ['udp', 'udpu', 'multicast', 'unicast']
@ -207,10 +212,13 @@ def config_changed():
configure_monitor_host()
configure_stonith()
update_nrpe_config()
@hooks.hook()
def upgrade_charm():
install()
update_nrpe_config()
def restart_corosync():
@ -582,6 +590,66 @@ def assert_charm_supports_ipv6():
"versions less than Trusty 14.04")
@hooks.hook('nrpe-external-master-relation-joined',
'nrpe-external-master-relation-changed')
def update_nrpe_config():
scripts_src = os.path.join(os.environ["CHARM_DIR"], "files",
"nrpe")
scripts_dst = "/usr/local/lib/nagios/plugins"
if not os.path.exists(scripts_dst):
os.makedirs(scripts_dst)
for fname in glob.glob(os.path.join(scripts_src, "*")):
if os.path.isfile(fname):
shutil.copy2(fname,
os.path.join(scripts_dst, os.path.basename(fname)))
sudoers_src = os.path.join(os.environ["CHARM_DIR"], "files",
"sudoers")
sudoers_dst = "/etc/sudoers.d"
for fname in glob.glob(os.path.join(sudoers_src, "*")):
if os.path.isfile(fname):
shutil.copy2(fname,
os.path.join(sudoers_dst, os.path.basename(fname)))
# Find out if nrpe set nagios_hostname
hostname = None
host_context = None
for rel in relations_of_type('nrpe-external-master'):
if 'nagios_hostname' in rel:
hostname = rel['nagios_hostname']
host_context = rel['nagios_host_context']
break
nrpe = NRPE(hostname=hostname)
apt_install('python-dbus')
if host_context:
current_unit = "%s:%s" % (host_context, local_unit())
else:
current_unit = local_unit()
# haproxy checks
nrpe.add_check(
shortname='haproxy_servers',
description='Check HAProxy {%s}' % current_unit,
check_cmd='check_haproxy.sh')
nrpe.add_check(
shortname='haproxy_queue',
description='Check HAProxy queue depth {%s}' % current_unit,
check_cmd='check_haproxy_queue_depth.sh')
# corosync/crm checks
nrpe.add_check(
shortname='corosync_rings',
description='Check Corosync rings {%s}' % current_unit,
check_cmd='check_corosync_rings')
nrpe.add_check(
shortname='crm_status',
description='Check crm status {%s}' % current_unit,
check_cmd='check_crm')
nrpe.write()
if __name__ == '__main__':
try:
hooks.execute(sys.argv)

View File

@ -0,0 +1 @@
hooks.py

View File

@ -0,0 +1 @@
hooks.py

View File

@ -14,6 +14,9 @@ provides:
ha:
interface: hacluster
scope: container
nrpe-external-master:
interface: nrpe-external-master
scope: container
peers:
hanode:
interface: hacluster