diff --git a/files/nagios/check_ceph_osd_services.py b/files/nagios/check_ceph_osd_services.py index 7f53b2d7..cff66541 100755 --- a/files/nagios/check_ceph_osd_services.py +++ b/files/nagios/check_ceph_osd_services.py @@ -6,9 +6,11 @@ import os import sys +from datetime import datetime, timedelta CRON_CHECK_TMPFILE = 'ceph-osd-checks' NAGIOS_HOME = '/var/lib/nagios' +CACHE_MAX_AGE = timedelta(minutes=10) STATE_OK = 0 STATE_WARNING = 1 @@ -31,6 +33,16 @@ def run_main(): print("File '{}' doesn't exist".format(_tmp_file)) return STATE_UNKNOWN + try: + s = os.stat(_tmp_file) + if datetime.now() - datetime.fromtimestamp(s.st_mtime) > CACHE_MAX_AGE: + print("Status file is older than {}".format(CACHE_MAX_AGE)) + return STATE_CRITICAL + except Exception as e: + print("Something went wrong grabbing stats for the file: {}".format( + str(e))) + return STATE_UNKNOWN + try: with open(_tmp_file, 'rt') as f: lines = f.readlines() @@ -38,12 +50,6 @@ def run_main(): print("Something went wrong reading the file: {}".format(str(e))) return STATE_UNKNOWN - # now remove the file in case the next check fails. - try: - os.remove(_tmp_file) - except Exception: - pass - if not lines: print("checked status file is empty: {}".format(_tmp_file)) return STATE_UNKNOWN