diff --git a/config.yaml b/config.yaml index fa38e73..322e834 100644 --- a/config.yaml +++ b/config.yaml @@ -130,6 +130,20 @@ options: default: "-m -r 60 180 10 20" type: string description: String appended to nagios check + nagios-replication-check-params: + default: "replicated 15 2 1" + type: string + description: | + Space delimited parameters for check_swift_replicator_logs.sh. + search_pattern + interval_in_minutes + minimum_hits_before_warning + minimum_hits_before_critical + Default of "replicated 15 2 1" leads to warning alert when there + have not been at least 2 lines matching "replicated" in the last 15 + minutes, and critical if there have been no matching lines in the + last 15 minutes. + Set to blank string "" to disable the check. nagios_context: default: "juju" type: string diff --git a/files/nrpe-external-master/check_swift_replicator_logs.sh b/files/nrpe-external-master/check_swift_replicator_logs.sh new file mode 100755 index 0000000..322c2e0 --- /dev/null +++ b/files/nrpe-external-master/check_swift_replicator_logs.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +pattern=${1:-replicated} +interval=${2:-15} +warn_min=${3:-2} +crit_min=${4:-1} + +exec sudo -u root /usr/local/lib/nagios/plugins/check_timed_logs.pl -pattern $pattern -logfile /var/log/syslog -interval $interval -w $warn_min -c $crit_min -reverse 2>&1 diff --git a/files/nrpe-external-master/check_timed_logs.pl b/files/nrpe-external-master/check_timed_logs.pl new file mode 100755 index 0000000..006331b --- /dev/null +++ b/files/nrpe-external-master/check_timed_logs.pl @@ -0,0 +1,195 @@ +#!/usr/bin/perl +############################################################################## +# +# NAME: check_timed_logs.pl +# +# AUTHOR: Gerd Radecke +# +# COMMENT: Script searches a text file for the appearance of a given RegEx within a given time period. +# Using additional parameters you can adjust: Time string format, +# time string position, number of pattern matches required to be "successful". +# +# Return Values for NRPE: +# OK - There are only 0 instances of $pattern in the last $interval minutes (0) +# CRITICAL - There are $hits instances of \"$pattern\" in the last $interval minutes (2) +# WARNING - There are $hits instances of \"$pattern\" in the last $interval minutes (1) +# UNKNOWN - There were no files matching the passed filename (3) +# +# REQUIRES: perl-Time-Piece perl-File-ReadBackwards +# ON RHEL-based systems you can run: yum install perl-Time-Piece perl-File-ReadBackwards +# +# CHANGELOG: +# 1.0 2013-02-19 - initial version +# 1.0.1 2013-02-27 - fixed false variable reference +# 1.0.2 2013-10-07 - integrated threshold comparison fix by Christoph Tavan - thanks ;) +# 1.0.3 2019-12-23 - Added --reverse flag to check for presense of lines within last $interval - drewn3ss +# 1.0.4 2019-12-30 - Updated time_pattern default to match ubuntu syslog timepattern +# +############################################################################## + + +use File::ReadBackwards; # EPEL RPM: perl-File-ReadBackwards.noarch +use Getopt::Long; +use Time::Piece; # RHEL package: perl-Time-Piece +use File::Find; + +$ENV{"LC_ALL"} = "C"; +$time_pattern = '%b %e %H:%M:%S'; +$warning = 1; +$critical = 1; +$reverse = 0; + +$time_position = 0; +$result = GetOptions ( + "pattern=s" => \$pattern, # string e.g. "CRITICAL" + "logfile=s" => \$logfile, # string e.g. "/var/log/messages" + "interval=i" => \$interval, # int e.g. 30 for half an hour + "timepattern=s" => \$time_pattern, #string e.g. '%Y-%m-%d %H:%M:%S' + "timeposition=i" => \$time_position, # int, each line is split into string on the space character, this provides the index of the first string block for the time + "warning|w=i" => \$warning, # int e.g. 3 + "critical|c=i" => \$critical, # int e.g. 5 + "debug|d|vv" => \$debug, # flag/boolean + "verbose|v" => \$verbose, # flag/boolean + "reverse|r|?" => \$reverse, # flag/boolean - should we report on absence of pattern rather than presence + "help|h|?" => \$usage # flag/boolean - is help called? + ); + +print $count; +if ($usage || !(defined($pattern) && $pattern ne "") || !(defined($logfile) && $logfile ne "") || !(defined($interval) && $interval gt 0 )) { + print "\nUsage: $0 + \t -pattern + \t -logfile + \t -interval + \t -reverse # report on absence of enough entries in the timeframe + \t [-timepattern ] + \t [-warning|w ] [-critical|c ] + \t [-timeposition ] \n\n"; + print "To allow for rotating logfiles, any file that matches the passed filename and was changed within the passed interval is checked. e.g. If you pass /var/log/applog, this could match /var/log/applog.0, /var/log/applog.old and so on. However, it does not handle compressed (e.g. gzip/bzip) files. \n\n"; + print "Default time pattern is: %Y-%m-%d %H:%M:%S => 2012-12-31 17:20:40\n"; + print "Example Time patterns (from a RHEL system): + BSD/Syslog: %b %d %H:%M:%S => Dec 31 17:20:40 + Apache Logs: %d/%b/%Y:%H:%M:%S (with -timeposition 3) => 31/Dec/2012:17:20:40 + Websphere Logs: %d-%b-%Y %I:%M:%S %p => 31-Dec-2012 05:20:40 PM + Nagios logs: %s => 1361260238 (seconds since 01-01-1970) \n"; + print "For a posix time format documentation check out: http://linux.die.net/man/3/strftime \n\n"; + print "Default warning/critical threshold of pattern matches to find is: 1 -> unless you change this, you will only get OK or CRITICAL, but never WARNING\n\n"; + print "Default time position is 0 \n"; + print "\t Time Position: each line is split into an array of strings on the space character, this provides the index for the first time string.\n"; + print "\t Note: If the line starts with the time, that means we start at index 0.\n\n"; + print "The values for interval and warning/critical need to be larger than zero \n"; + exit; +} + +my $now = localtime; + +$oldestDate = $now - $interval*60; +if ($debug) { print "Now: $now and tzoffset: ". ($now)->tzoffset ."\n"; } +if ($debug) { print "Oldest date: $oldestDate and tzoffset: ". ($oldestDate)->tzoffset ."\n"; } + + +$hits = 0; # number of matches for the regex within the log files will be counted in this variable +$validFileNames = 0; # number of files that match the given filename +my @dateFields = $time_pattern =~ / /g; # how many spaces do we have in our time pattern? +my $dateFieldsCount = @dateFields; # count the number spaces in the date format + +if ($debug) { +$verbose = 1; # if we debug, we want to have all information +print "Interval: $interval equals " . ($interval/1440) . " Fraction of days.\n"; +} + + +$logfile=~m/^.+\//; +$DIR=$&; # greedy matching from theline above + +@files = find(\&process, $DIR); +sub process { + +### note the following is done for each file that is found and matches the name and date criteria + if ($File::Find::name =~ m/$logfile/ && (-T)) { # match only files that are ASCII files (-T) and that contain the file name + $validFileNames += 1; + if ($debug) { print "Found: $File::Find::name has age " . (-M) ." (in Fraction of days) \n"; } + + # -M returns the last change date of the file in fraction of days. e.g. 24 ago -> 1, 6 hours ago -> 0.25 + if ((-M) < ($interval/1440)) { # match only files whose last change (-M) is within the change interval + # perldoc defines -M : Script start time minus file modification time, in days. + + $LOGS = File::ReadBackwards->new($File::Find::name) or + die "Can't read file: $File::Find::name\n"; + + while (defined($line = $LOGS->readline) ) { + my @fields = split ' ', $line; # split the line into an array, split on ' '(space) + $dateString = ""; # reset the datestring for each line + for ($i=0; $i <= $dateFieldsCount; $i++) { + $dateString .= $fields[$time_position + $i] . " "; # concatenate all date strings into one parseable string + } + $dateString =~ s/^\s+|\s+$//g ; # remove both leading and tailing whitespace - perl 6 will have a trim() function, until then - regex ! + $dateString =~ s/<|>|\]|\[//g ; # remove brackets + #if ($debug) { print "Datestring: $dateString \n";} # this is only needed if you are unsure which strings of the array are part of your datestring + + my $dt = Time::Piece->strptime($dateString, $time_pattern); # parse string into Time::Piece object + my $dt_tzadjusted = ($dt - $now->tzoffset); # TIME::PIECE assumes the parsed dates will be UTC, we need to adjust to the local tz offset + + # some date formats don't have the year information e.g. Dec 31 15:50:57 -> the year would automatically be parsed to 1970, + # which is probably never correct. We will correct this to this or last year + if ($dt->year eq 1970) { + $dt = $dt->add_years($now->year - 1970); # We cannot set the year directly. So we add the number of years that have passed since 1970. + $dt_tzadjusted = ($dt - $now->tzoffset); + # NOTE: If $now is January 1st and we're looking at log files from the end of last year, we will add too many years + # hence if the date is now in the future, we subtract one year again. + if ($dt_tzadjusted > $now) { + $dt = $dt->add_years(-1); + $dt_tzadjusted = ($dt - $now->tzoffset); + } + } + + if ($dt_tzadjusted > $oldestDate) { # is the date bigger=>newer than the oldest date we want to look at? + if ($line =~ m/$pattern/){ # if the line contains the regex pattern + if ($debug) {print $dt . " => "; } + if ($verbose) { print $line; } + $hits++; # increase by 1 hit + } + } + else{ + last; #if the date is older than the oldest we still care about, leave this loop -> go to the next file if available + } + } + + close(LOGS); + } + + } +}## the find sub process ends here + + + +if (!$reverse) { + if ($hits >= ($critical + 0)) { + print "CRITICAL - There are $hits instances of \"$pattern\" in the last $interval minutes\n"; + exit 2; } + if ($hits >= ($warning + 0)) { + print "WARNING - There are $hits instances of \"$pattern\" in the last $interval minutes\n"; + exit 1; } + if ($validFileNames == 0) { + print "UNKNOWN - There were no files matching the passed filename: \"$logfile\"\n"; + exit 3; } + else { + print "OK - There are only $hits instances of \"$pattern\" in the last $interval minutes - Warning threshold is $warning\n"; + exit 0; + } +} else { + if ($hits < ($critical + 0)) { + print "CRITICAL - There are only $hits instances of \"$pattern\" in the last $interval minutes\n"; + exit 2; } + if ($hits < ($warning + 0)) { + print "WARNING - There are only $hits instances of \"$pattern\" in the last $interval minutes\n"; + exit 1; } + if ($validFileNames == 0) { + print "UNKNOWN - There were no files matching the passed filename: \"$logfile\"\n"; + exit 3; } + else { + print "OK - There are at least $hits instances of \"$pattern\" in the last $interval minutes - Warning threshold is $warning\n"; + exit 0; + } + +} + diff --git a/files/sudo/swift-storage b/files/sudo/swift-storage index e05f7cc..6ab3110 100644 --- a/files/sudo/swift-storage +++ b/files/sudo/swift-storage @@ -1 +1,2 @@ nagios ALL=(swift) NOPASSWD:/usr/bin/swift-init status * +nagios ALL=NOPASSWD:/usr/local/lib/nagios/plugins/check_timed_logs.pl * diff --git a/hooks/swift_storage_hooks.py b/hooks/swift_storage_hooks.py index f09a900..68600fe 100755 --- a/hooks/swift_storage_hooks.py +++ b/hooks/swift_storage_hooks.py @@ -385,6 +385,12 @@ def update_nrpe_config(): rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nrpe-external-master', 'check_swift_storage.py'), os.path.join(NAGIOS_PLUGINS, 'check_swift_storage.py')) + rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nrpe-external-master', + 'check_timed_logs.pl'), + os.path.join(NAGIOS_PLUGINS, 'check_timed_logs.pl')) + rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nrpe-external-master', + 'check_swift_replicator_logs.sh'), + os.path.join(NAGIOS_PLUGINS, 'check_swift_replicator_logs.sh')) rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nrpe-external-master', 'check_swift_service'), os.path.join(NAGIOS_PLUGINS, 'check_swift_service')) @@ -405,6 +411,16 @@ def update_nrpe_config(): check_cmd='check_swift_storage.py {}'.format( config('nagios-check-params')) ) + if config('nagios-replication-check-params'): + nrpe_setup.add_check( + shortname='swift_replicator_health', + description='Check swift object replicator log reporting', + check_cmd='check_swift_replicator_logs.sh {}'.format( + config('nagios-replication-check-params')) + ) + else: + nrpe_setup.remove_check(shortname='swift_replicator_health') + nrpe.add_init_service_checks(nrpe_setup, SWIFT_SVCS, current_unit) nrpe_setup.write() diff --git a/lib/swift_storage_utils.py b/lib/swift_storage_utils.py index 271f00e..e9f11f9 100644 --- a/lib/swift_storage_utils.py +++ b/lib/swift_storage_utils.py @@ -112,6 +112,8 @@ PACKAGES = [ 'python-psutil', 'ufw', 'xfsprogs', + 'libfile-readbackwards-perl', + 'libtime-piece-perl', ] PY3_PACKAGES = [ diff --git a/unit_tests/test_swift_storage_relations.py b/unit_tests/test_swift_storage_relations.py index 726f31b..a5408f7 100644 --- a/unit_tests/test_swift_storage_relations.py +++ b/unit_tests/test_swift_storage_relations.py @@ -176,7 +176,8 @@ class SwiftStorageRelationsTests(CharmTestCase): self.apt_install.assert_called_with( ['gdisk', 'lvm2', 'swift', 'swift-account', 'swift-container', 'swift-object', 'python-jinja2', - 'python-psutil', 'ufw', 'xfsprogs'], + 'python-psutil', 'ufw', 'xfsprogs', + 'libfile-readbackwards-perl', 'libtime-piece-perl'], fatal=True) self.assertTrue(self.update_nrpe_config.called) self.assertTrue(mock_ensure_devs_tracked.called) diff --git a/unit_tests/test_swift_storage_utils.py b/unit_tests/test_swift_storage_utils.py index 7d4f91d..1f1f7f6 100644 --- a/unit_tests/test_swift_storage_utils.py +++ b/unit_tests/test_swift_storage_utils.py @@ -567,7 +567,8 @@ class SwiftStorageUtilsTests(CharmTestCase): options=dpkg_opts, packages=['gdisk', 'lvm2', 'swift', 'swift-account', 'swift-container', 'swift-object', 'python-jinja2', - 'python-psutil', 'ufw', 'xfsprogs'], + 'python-psutil', 'ufw', 'xfsprogs', + 'libfile-readbackwards-perl', 'libtime-piece-perl'], fatal=True ) self.assertTrue(mock_remove_old_packages.called) @@ -600,6 +601,7 @@ class SwiftStorageUtilsTests(CharmTestCase): options=dpkg_opts, packages=['gdisk', 'lvm2', 'swift', 'swift-account', 'swift-container', 'swift-object', 'ufw', 'xfsprogs', + 'libfile-readbackwards-perl', 'libtime-piece-perl', 'python3-jinja2', 'python3-psutil', 'python3-six', 'python3-swift'], fatal=True