Nagios: Update ceph_health check

The ceph_health check in Nagios incorrectly sets the warning and
error level to 0. The ceph_health_status metric's value of 0
indicates the cluster is healthy, while 1 indicates a warning and
2 indicates an error state. The Nagios check for ceph_health is
updated to reflect these values

Change-Id: Iffe80f1c34f6edee6370dd7e707e5f55f83f1ec1
This commit is contained in:
Steve Wilkerson 2018-11-06 14:51:40 -06:00
parent fca344900f
commit ba22b0e726
1 changed files with 1 additions and 1 deletions

View File

@ -453,7 +453,7 @@ conf:
command_line: $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_ntp_clock_skew_high' --labels_csv 'instance=~"$HOSTADDRESS$.*"' --msg_format 'CRITICAL- NTP clock skew is more than 2 seconds.' --ok_message 'OK- NTP clock skew is less than 2 seconds.'
- check_ceph_health:
command_name: check_ceph_health
command_line: $USER1$/check_exporter_health_metric.py --exporter_api 'http://$HOSTADDRESS$:9283/metrics' --health_metric ceph_health_status --critical 0 --warning 0
command_line: $USER1$/check_exporter_health_metric.py --exporter_api 'http://$HOSTADDRESS$:9283/metrics' --health_metric ceph_health_status --critical 2 --warning 1
- check_prometheus_hosts:
command_name: check_prometheus_hosts
command_line: $USER1$/check_update_prometheus_hosts.py --prometheus_api $USER2$ --object_file_loc /opt/nagios/etc/objects/prometheus_discovery_objects.cfg