#!/usr/bin/perl # # check_crm_v0_7 # # Copyright © 2013 Philip Garner, Sysnix Consultants Limited # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Authors: Phil Garner - phil@sysnix.com & Peter Mottram - peter@sysnix.com # # v0.1 09/01/2011 # v0.2 11/01/2011 # v0.3 22/08/2011 - bug fix and changes suggested by Vadym Chepkov # v0.4 23/08/2011 - update for spelling and anchor regex capture (Vadym Chepkov) # v0.5 29/09/2011 - Add standby warn/crit suggested by Sönke Martens & removal # of 'our' to 'my' to completely avoid problems with ePN # v0.6 14/03/2013 - Change from \w+ to \S+ in stopped check to cope with # Servers that have non word charachters in. Suggested by # Igal Baevsky. # v0.7 01/09/2013 - In testing as still not fully tested. Adds optional # constraints check (Boris Wesslowski). Adds fail count # threshold ( Zoran Bosnjak & Marko Hrastovec ) # # NOTE:- Requires Perl 5.8 or higher & either the Perl Module Nagios::Plugin # or Monitoring::Plugin, whichever is available for your system. # Nagios user will need sudo acces - suggest adding line below to # sudoers # nagios ALL=(ALL) NOPASSWD: /usr/sbin/crm_mon -1 -r -f # # if you want to check for location constraints (-c) also add # nagios ALL=(ALL) NOPASSWD: /usr/sbin/crm configure show # # In sudoers if requiretty is on (off state is default) # you will also need to add the line below # Defaults:nagios !requiretty # use warnings; use strict; my $plugin_provider=''; my $rc = eval { require Monitoring::Plugin; Monitoring::Plugin->import(); 1; }; if ($rc) { $plugin_provider = 'Monitoring'; } else{ require Nagios::Plugin; Nagios::Plugin->import(); $plugin_provider = 'Nagios'; } # Lines below may need changing if crm_mon or sudo installed in a # different location. my $sudo = '/usr/bin/sudo'; my $crm_mon = '/usr/sbin/crm_mon -1 -r -f'; my $crm_configure_show = '/usr/sbin/crm configure show'; my $np; if ($plugin_provider eq 'Monitoring') { $np = Monitoring::Plugin->new( shortname => 'check_crm', version => '0.7', usage => "Usage: %s \n\t\t--help for help\n", ); } else { $np = Nagios::Plugin->new( shortname => 'check_crm', version => '0.7', usage => "Usage: %s \n\t\t--help for help\n", ); } $np->add_arg( spec => 'warning|w', help => 'If failed Nodes, stopped Resources detected or Standby Nodes sends Warning instead of Critical (default) as long as there are no other errors and there is Quorum', required => 0, ); $np->add_arg( spec => 'standbyignore|s', help => 'Ignore any node(s) in standby, by default sends Critical', required => 0, ); $np->add_arg( spec => 'constraint|constraints|c', help => 'Also check configuration for location constraints (caused by migrations) and warn if there are any. Requires additional privileges see notes', required => 0, ); $np->add_arg( spec => 'failcount|failcounts|f=i', help => 'resource fail count to start warning on [default = 1].', required => 0, default => 1, ); $np->getopts; my $ConstraintsFlag = $np->opts->constraint; my @standby; # Check for -w option set warn if this is case instead of crit my $warn_or_crit = 'CRITICAL'; $warn_or_crit = 'WARNING' if $np->opts->warning; my $fh; open( $fh, "$sudo $crm_mon |" ) or $np->nagios_exit("CRITICAL", "Running $sudo $crm_mon has failed" ); foreach my $line (<$fh>) { if ( $line =~ m/Connection to cluster failed\:(.*)/i ) { # Check Cluster connected $np->nagios_exit("CRITICAL", "Connection to cluster FAILED: $1" ); } elsif ( $line =~ m/Current DC:/ ) { # Check for Quorum if ( $line =~ m/partition with quorum$/ ) { # Assume cluster is OK - we only add warn/crit after here $np->add_message("OK", "Cluster OK" ); } else { $np->add_message("CRITICAL", "No Quorum" ); } } elsif ( $line =~ m/^offline:\s*\[\s*(\S.*?)\s*\]/i ) { # Count offline nodes my @offline = split( /\s+/, $1 ); my $numoffline = scalar @offline; $np->add_message( $warn_or_crit, ": $numoffline Nodes Offline" ); } elsif ( $line =~ m/^node\s+(\S.*):\s*standby/i ) { # Check for standby nodes (suggested by Sönke Martens) # See later in code for message created from this push @standby, $1; } elsif ( $line =~ m/\s*(\S+)\s+\(\S+\)\:\s+Stopped/ ) { # Check Resources Stopped $np->add_message( $warn_or_crit, ": $1 Stopped" ); } elsif ( $line =~ m/\s*stopped\:\s*\[(.*)\]/i ) { # Check Master/Slave stopped $np->add_message( $warn_or_crit, ": $1 Stopped" ); } elsif ( $line =~ m/^Failed actions\:/ ) { # Check Failed Actions $np->add_message("CRITICAL", ": FAILED actions detected or not cleaned up" ); } elsif ( $line =~ m/\s*(\S+?)\s+ \(.*\)\:\s+\w+\s+\w+\s+\(unmanaged\)\s+/i ) { # Check Unmanaged $np->add_message("CRITICAL", ": $1 unmanaged FAILED" ); } elsif ( $line =~ m/\s*(\S+?)\s+ \(.*\)\:\s+not installed/i ) { # Check for errors $np->add_message("CRITICAL", ": $1 not installed" ); } elsif ( $line =~ m/\s*(\S+?):.*fail-count=(\d+)/i ) { if ( $2 >= $np->opts->failcount ) { # Check for resource Fail count (suggested by Vadym Chepkov) $np->add_message("WARNING", ": $1 failure detected, fail-count=$2" ); } } } # If found any Nodes in standby & no -s option used send warn/crit if ( scalar @standby > 0 && !$np->opts->standbyignore ) { $np->add_message( $warn_or_crit, ": " . join( ', ', @standby ) . " in Standby" ); } close($fh) or $np->nagios_exit("CRITICAL", "Running $crm_mon FAILED" ); # if -c flag set check configuration for constraints if ($ConstraintsFlag) { open( $fh, "$sudo $crm_configure_show|" ) or $np->nagios_exit("CRITICAL", "Running $sudo $crm_configure_show has failed" ); foreach my $line (<$fh>) { if ( $line =~ m/location cli-(prefer|standby)-\S+\s+(\S+)/ ) { $np->add_message("WARNING", ": $2 blocking location constraint detected" ); } } close($fh) or $np->nagios_exit("CRITICAL", "Running $crm_configure_show FAILED" ); } $np->nagios_exit( $np->check_messages() );