229 lines
7.0 KiB
Perl
Executable File
229 lines
7.0 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
#
|
|
# check_crm_v0_7
|
|
#
|
|
# Copyright © 2013 Philip Garner, Sysnix Consultants Limited
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
# Authors: Phil Garner - phil@sysnix.com & Peter Mottram - peter@sysnix.com
|
|
#
|
|
# v0.1 09/01/2011
|
|
# v0.2 11/01/2011
|
|
# v0.3 22/08/2011 - bug fix and changes suggested by Vadym Chepkov
|
|
# v0.4 23/08/2011 - update for spelling and anchor regex capture (Vadym Chepkov)
|
|
# v0.5 29/09/2011 - Add standby warn/crit suggested by Sönke Martens & removal
|
|
# of 'our' to 'my' to completely avoid problems with ePN
|
|
# v0.6 14/03/2013 - Change from \w+ to \S+ in stopped check to cope with
|
|
# Servers that have non word charachters in. Suggested by
|
|
# Igal Baevsky.
|
|
# v0.7 01/09/2013 - In testing as still not fully tested. Adds optional
|
|
# constraints check (Boris Wesslowski). Adds fail count
|
|
# threshold ( Zoran Bosnjak & Marko Hrastovec )
|
|
#
|
|
# NOTE:- Requires Perl 5.8 or higher & either the Perl Module Nagios::Plugin
|
|
# or Monitoring::Plugin, whichever is available for your system.
|
|
# Nagios user will need sudo acces - suggest adding line below to
|
|
# sudoers
|
|
# nagios ALL=(ALL) NOPASSWD: /usr/sbin/crm_mon -1 -r -f
|
|
#
|
|
# if you want to check for location constraints (-c) also add
|
|
# nagios ALL=(ALL) NOPASSWD: /usr/sbin/crm configure show
|
|
#
|
|
# In sudoers if requiretty is on (off state is default)
|
|
# you will also need to add the line below
|
|
# Defaults:nagios !requiretty
|
|
#
|
|
|
|
use warnings;
|
|
use strict;
|
|
|
|
my $plugin_provider='';
|
|
|
|
my $rc = eval {
|
|
require Monitoring::Plugin;
|
|
Monitoring::Plugin->import();
|
|
1;
|
|
};
|
|
|
|
if ($rc)
|
|
{
|
|
$plugin_provider = 'Monitoring';
|
|
} else{
|
|
require Nagios::Plugin;
|
|
Nagios::Plugin->import();
|
|
$plugin_provider = 'Nagios';
|
|
}
|
|
|
|
|
|
# Lines below may need changing if crm_mon or sudo installed in a
|
|
# different location.
|
|
|
|
my $sudo = '/usr/bin/sudo';
|
|
my $crm_mon = '/usr/sbin/crm_mon -1 -r -f';
|
|
my $crm_configure_show = '/usr/sbin/crm configure show';
|
|
|
|
my $np;
|
|
if ($plugin_provider eq 'Monitoring') {
|
|
$np = Monitoring::Plugin->new(
|
|
shortname => 'check_crm',
|
|
version => '0.7',
|
|
usage => "Usage: %s <ARGS>\n\t\t--help for help\n",
|
|
);
|
|
} else {
|
|
$np = Nagios::Plugin->new(
|
|
shortname => 'check_crm',
|
|
version => '0.7',
|
|
usage => "Usage: %s <ARGS>\n\t\t--help for help\n",
|
|
);
|
|
}
|
|
|
|
$np->add_arg(
|
|
spec => 'warning|w',
|
|
help =>
|
|
'If failed Nodes, stopped Resources detected or Standby Nodes sends Warning instead of Critical (default) as long as there are no other errors and there is Quorum',
|
|
required => 0,
|
|
);
|
|
|
|
$np->add_arg(
|
|
spec => 'standbyignore|s',
|
|
help => 'Ignore any node(s) in standby, by default sends Critical',
|
|
required => 0,
|
|
);
|
|
|
|
$np->add_arg(
|
|
spec => 'constraint|constraints|c',
|
|
help => 'Also check configuration for location constraints (caused by migrations) and warn if there are any. Requires additional privileges see notes',
|
|
required => 0,
|
|
);
|
|
|
|
$np->add_arg(
|
|
spec => 'failcount|failcounts|f=i',
|
|
help => 'resource fail count to start warning on [default = 1].',
|
|
required => 0,
|
|
default => 1,
|
|
);
|
|
|
|
$np->getopts;
|
|
my $ConstraintsFlag = $np->opts->constraint;
|
|
|
|
my @standby;
|
|
|
|
# Check for -w option set warn if this is case instead of crit
|
|
my $warn_or_crit = 'CRITICAL';
|
|
$warn_or_crit = 'WARNING' if $np->opts->warning;
|
|
|
|
my $fh;
|
|
|
|
open( $fh, "$sudo $crm_mon |" )
|
|
or $np->nagios_exit("CRITICAL", "Running $sudo $crm_mon has failed" );
|
|
|
|
foreach my $line (<$fh>) {
|
|
|
|
if ( $line =~ m/Connection to cluster failed\:(.*)/i ) {
|
|
|
|
# Check Cluster connected
|
|
$np->nagios_exit("CRITICAL", "Connection to cluster FAILED: $1" );
|
|
}
|
|
elsif ( $line =~ m/Current DC:/ ) {
|
|
|
|
# Check for Quorum
|
|
if ( $line =~ m/partition with quorum$/ ) {
|
|
|
|
# Assume cluster is OK - we only add warn/crit after here
|
|
|
|
$np->add_message("OK", "Cluster OK" );
|
|
}
|
|
else {
|
|
$np->add_message("CRITICAL", "No Quorum" );
|
|
}
|
|
}
|
|
elsif ( $line =~ m/^offline:\s*\[\s*(\S.*?)\s*\]/i ) {
|
|
|
|
# Count offline nodes
|
|
my @offline = split( /\s+/, $1 );
|
|
my $numoffline = scalar @offline;
|
|
$np->add_message( $warn_or_crit, ": $numoffline Nodes Offline" );
|
|
}
|
|
elsif ( $line =~ m/^node\s+(\S.*):\s*standby/i ) {
|
|
|
|
# Check for standby nodes (suggested by Sönke Martens)
|
|
# See later in code for message created from this
|
|
push @standby, $1;
|
|
}
|
|
|
|
elsif ( $line =~ m/\s*(\S+)\s+\(\S+\)\:\s+Stopped/ ) {
|
|
|
|
# Check Resources Stopped
|
|
$np->add_message( $warn_or_crit, ": $1 Stopped" );
|
|
}
|
|
elsif ( $line =~ m/\s*stopped\:\s*\[(.*)\]/i ) {
|
|
|
|
# Check Master/Slave stopped
|
|
$np->add_message( $warn_or_crit, ": $1 Stopped" );
|
|
}
|
|
elsif ( $line =~ m/^Failed actions\:/ ) {
|
|
|
|
# Check Failed Actions
|
|
$np->add_message("CRITICAL",
|
|
": FAILED actions detected or not cleaned up" );
|
|
}
|
|
elsif ( $line =~ m/\s*(\S+?)\s+ \(.*\)\:\s+\w+\s+\w+\s+\(unmanaged\)\s+/i )
|
|
{
|
|
|
|
# Check Unmanaged
|
|
$np->add_message("CRITICAL", ": $1 unmanaged FAILED" );
|
|
}
|
|
elsif ( $line =~ m/\s*(\S+?)\s+ \(.*\)\:\s+not installed/i ) {
|
|
|
|
# Check for errors
|
|
$np->add_message("CRITICAL", ": $1 not installed" );
|
|
}
|
|
elsif ( $line =~ m/\s*(\S+?):.*fail-count=(\d+)/i ) {
|
|
if ( $2 >= $np->opts->failcount ) {
|
|
|
|
# Check for resource Fail count (suggested by Vadym Chepkov)
|
|
$np->add_message("WARNING", ": $1 failure detected, fail-count=$2" );
|
|
}
|
|
}
|
|
}
|
|
|
|
# If found any Nodes in standby & no -s option used send warn/crit
|
|
if ( scalar @standby > 0 && !$np->opts->standbyignore ) {
|
|
$np->add_message( $warn_or_crit,
|
|
": " . join( ', ', @standby ) . " in Standby" );
|
|
}
|
|
|
|
close($fh) or $np->nagios_exit("CRITICAL", "Running $crm_mon FAILED" );
|
|
|
|
# if -c flag set check configuration for constraints
|
|
if ($ConstraintsFlag) {
|
|
|
|
open( $fh, "$sudo $crm_configure_show|" )
|
|
or $np->nagios_exit("CRITICAL",
|
|
"Running $sudo $crm_configure_show has failed" );
|
|
|
|
foreach my $line (<$fh>) {
|
|
if ( $line =~ m/location cli-(prefer|standby)-\S+\s+(\S+)/ ) {
|
|
$np->add_message("WARNING",
|
|
": $2 blocking location constraint detected" );
|
|
}
|
|
}
|
|
close($fh)
|
|
or $np->nagios_exit("CRITICAL", "Running $crm_configure_show FAILED" );
|
|
}
|
|
|
|
$np->nagios_exit( $np->check_messages() );
|
|
|