Don't return Critical when ceph is in warning state.
Current implementation returns Critical when Ceph is in warning state, checking for some known exceptions which are considered operational tasks. However this causes many Alarms. This patch changes the behavior to report Warning when Ceph is in HEALTH_WARN. If known operational tasks are exceeding thresholds, Critical is returned. Change-Id: I7a330189da8f0ba9168cedb534823c5e8f4795ba
This commit is contained in:
parent
7a362ff0a5
commit
35c8e40e83
14
config.yaml
14
config.yaml
|
@ -189,17 +189,21 @@ options:
|
||||||
type: float
|
type: float
|
||||||
description: "Threshold for degraded ratio (0.1 = 10%)"
|
description: "Threshold for degraded ratio (0.1 = 10%)"
|
||||||
nagios_misplaced_thresh:
|
nagios_misplaced_thresh:
|
||||||
default: 10.0
|
default: 1.0
|
||||||
type: float
|
type: float
|
||||||
description: "Threshold for misplaced ratio (0.1 = 10%)"
|
description: "Threshold for misplaced ratio (0.1 = 10%)"
|
||||||
nagios_recovery_rate:
|
nagios_recovery_rate:
|
||||||
default: '1'
|
default: '1'
|
||||||
type: string
|
type: string
|
||||||
description: Recovery rate below which we consider recovery to be stalled
|
description: |
|
||||||
nagios_ignore_nodeepscub:
|
Recovery rate (in objects/s) below which we consider recovery
|
||||||
default: False
|
to be stalled.
|
||||||
|
nagios_raise_nodeepscrub:
|
||||||
|
default: True
|
||||||
type: boolean
|
type: boolean
|
||||||
description: Whether to ignore the nodeep-scrub flag
|
description: |
|
||||||
|
Whether to report Critical instead of Warning when the nodeep-scrub
|
||||||
|
flag is set.
|
||||||
use-direct-io:
|
use-direct-io:
|
||||||
type: boolean
|
type: boolean
|
||||||
default: True
|
default: True
|
||||||
|
|
|
@ -102,10 +102,6 @@ def check_ceph_status(args):
|
||||||
:returns string, describing the status of the ceph cluster.
|
:returns string, describing the status of the ceph cluster.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
ignorable = (r'\d+ pgs (?:backfill|degraded|recovery_wait|stuck unclean)|'
|
|
||||||
'recovery \d+\/\d+ objects (?:degraded|misplaced)')
|
|
||||||
if args.ignore_nodeepscrub:
|
|
||||||
ignorable = ignorable + '|nodeep-scrub flag\(s\) set'
|
|
||||||
status_critical = False
|
status_critical = False
|
||||||
if args.status_file:
|
if args.status_file:
|
||||||
check_file_freshness(args.status_file)
|
check_file_freshness(args.status_file)
|
||||||
|
@ -136,41 +132,60 @@ def check_ceph_status(args):
|
||||||
luminous = False
|
luminous = False
|
||||||
|
|
||||||
if overall_status != 'HEALTH_OK':
|
if overall_status != 'HEALTH_OK':
|
||||||
# Health is not OK, check if any lines are not in our list of OK
|
# Health is not OK, collect status message(s) and
|
||||||
# any lines that don't match, check is critical
|
# decide whether to return warning or critical
|
||||||
|
status_critical = False
|
||||||
status_msg = []
|
status_msg = []
|
||||||
if luminous:
|
if luminous:
|
||||||
status_messages = [x['summary']['message'] for x in status_data['health'].get('checks').values()]
|
status_messages = [x['summary']['message']
|
||||||
|
for x in
|
||||||
|
status_data['health'].get('checks').values()]
|
||||||
else:
|
else:
|
||||||
status_messages = [x['summary'] for x in status_data['health']['summary']]
|
status_messages = [x['summary']
|
||||||
|
for x in
|
||||||
|
status_data['health']['summary']]
|
||||||
for status in status_messages:
|
for status in status_messages:
|
||||||
if not re.match(ignorable, status):
|
status_msg.append(status)
|
||||||
|
# Check if nedeepscrub is set and whether it should raise an error
|
||||||
|
if args.raise_nodeepscrub:
|
||||||
|
if re.match("nodeep-scrub flag", status):
|
||||||
|
status_critical = True
|
||||||
|
if overall_status == 'HEALTH_CRITICAL' or \
|
||||||
|
overall_status == 'HEALTH_ERR':
|
||||||
|
# HEALTH_ERR, report critical
|
||||||
|
status_critical = True
|
||||||
|
else:
|
||||||
|
# HEALTH_WARN
|
||||||
|
# Check the threshold for a list of operational tasks,
|
||||||
|
# and return CRITICAL if exceeded
|
||||||
|
degraded_ratio = float(status_data['pgmap'].get('degraded_ratio',
|
||||||
|
0.0))
|
||||||
|
if degraded_ratio > args.degraded_thresh:
|
||||||
status_critical = True
|
status_critical = True
|
||||||
status_msg.append(status)
|
if degraded_ratio > 0:
|
||||||
# If we got this far, then the status is not OK but the status lines
|
status_msg.append("Degraded ratio: {}".format(degraded_ratio))
|
||||||
# are all in our list of things we consider to be operational tasks.
|
misplaced_ratio = float(status_data['pgmap'].get('misplaced_ratio',
|
||||||
# Check the thresholds and return CRITICAL if exceeded,
|
0.0))
|
||||||
# otherwise there's something not accounted for and we want to know
|
if misplaced_ratio > args.misplaced_thresh:
|
||||||
# about it with a WARN alert.
|
status_critical = True
|
||||||
degraded_ratio = status_data['pgmap'].get('degraded_ratio', 0.0)
|
if misplaced_ratio > 0:
|
||||||
if degraded_ratio > args.degraded_thresh:
|
status_msg.append("Misplaced ratio: {}".
|
||||||
status_critical = True
|
format(misplaced_ratio))
|
||||||
status_msg.append("Degraded ratio: {}".format(degraded_ratio))
|
recovering = float(status_data['pgmap'].
|
||||||
misplaced_ratio = status_data['pgmap'].get('misplaced_ratio', 0.0)
|
get('recovering_objects_per_sec', 0.0))
|
||||||
if misplaced_ratio > args.misplaced_thresh:
|
if (degraded_ratio > 0 or misplaced_ratio > 0) \
|
||||||
status_critical = True
|
and recovering > 0 \
|
||||||
status_msg.append("Misplaced ratio: {}".format(misplaced_ratio))
|
and recovering < args.recovery_rate:
|
||||||
recovering = status_data['pgmap'].get('recovering_objects_per_sec',
|
status_critical = True
|
||||||
0.0)
|
if recovering > 0:
|
||||||
if recovering < args.recovery_rate:
|
status_msg.append("Recovering objects/s {}".format(recovering))
|
||||||
status_critical = True
|
|
||||||
status_msg.append("Recovering objects/sec {}".format(recovering))
|
|
||||||
if status_critical:
|
if status_critical:
|
||||||
msg = 'CRITICAL: ceph health: "{} {}"'.format(
|
msg = 'CRITICAL: ceph health: "{} {}"'.format(
|
||||||
overall_status,
|
overall_status,
|
||||||
", ".join(status_msg))
|
", ".join(status_msg))
|
||||||
raise CriticalError(msg)
|
raise CriticalError(msg)
|
||||||
if overall_status == 'HEALTH_WARN':
|
else:
|
||||||
|
# overall_status == 'HEALTH_WARN':
|
||||||
msg = "WARNING: {}".format(", ".join(status_msg))
|
msg = "WARNING: {}".format(", ".join(status_msg))
|
||||||
raise WarnError(msg)
|
raise WarnError(msg)
|
||||||
message = "All OK"
|
message = "All OK"
|
||||||
|
@ -187,21 +202,21 @@ def parse_args(args):
|
||||||
'user account does not have rights for the Ceph '
|
'user account does not have rights for the Ceph '
|
||||||
'config files.')
|
'config files.')
|
||||||
parser.add_argument('--degraded_thresh', dest='degraded_thresh',
|
parser.add_argument('--degraded_thresh', dest='degraded_thresh',
|
||||||
default=1, type=float,
|
default=1.0, type=float,
|
||||||
help="Threshold for degraded ratio (0.1 = 10%)")
|
help="Threshold for degraded ratio (0.1 = 10%)")
|
||||||
parser.add_argument('--misplaced_thresh', dest='misplaced_thresh',
|
parser.add_argument('--misplaced_thresh', dest='misplaced_thresh',
|
||||||
default=10, type=float,
|
default=1.0, type=float,
|
||||||
help="Threshold for misplaced ratio (0.1 = 10%)")
|
help="Threshold for misplaced ratio (0.1 = 10%)")
|
||||||
parser.add_argument('--recovery_rate', dest='recovery_rate',
|
parser.add_argument('--recovery_rate', dest='recovery_rate',
|
||||||
default=1, type=int,
|
default=1, type=int,
|
||||||
help="Recovery rate below which we consider recovery "
|
help="Recovery rate (in objects/s) below which we"
|
||||||
"to be stalled")
|
"consider recovery to be stalled")
|
||||||
parser.add_argument('--ignore_nodeepscrub', dest='ignore_nodeepscrub',
|
parser.add_argument('--raise_nodeepscrub', dest='raise_nodeepscrub',
|
||||||
default=False, action='store_true',
|
default=False, action='store_true',
|
||||||
help="Whether to ignore the nodeep-scrub flag. If "
|
help="Whether to raise an error for the nodeep-scrub"
|
||||||
"the nodeep-scrub flag is set, the check returns "
|
"flag. If the nodeep-scrub flag is set,"
|
||||||
"warning if this param is passed, otherwise "
|
"the check returns critical if this param is"
|
||||||
"returns critical.")
|
"passed, otherwise it returns warning.")
|
||||||
return parser.parse_args(args)
|
return parser.parse_args(args)
|
||||||
|
|
||||||
|
|
||||||
|
@ -218,7 +233,7 @@ def main(args):
|
||||||
exitcode = 'critical'
|
exitcode = 'critical'
|
||||||
except WarnError as msg:
|
except WarnError as msg:
|
||||||
print(msg)
|
print(msg)
|
||||||
exitcode = 'critical'
|
exitcode = 'warning'
|
||||||
except:
|
except:
|
||||||
print("%s raised unknown exception '%s'" % ('check_ceph_status',
|
print("%s raised unknown exception '%s'" % ('check_ceph_status',
|
||||||
sys.exc_info()[0]))
|
sys.exc_info()[0]))
|
||||||
|
|
|
@ -748,8 +748,8 @@ def update_nrpe_config():
|
||||||
config('nagios_degraded_thresh'),
|
config('nagios_degraded_thresh'),
|
||||||
config('nagios_misplaced_thresh'),
|
config('nagios_misplaced_thresh'),
|
||||||
config('nagios_recovery_rate'))
|
config('nagios_recovery_rate'))
|
||||||
if config('nagios_ignore_nodeepscub'):
|
if config('nagios_raise_nodeepscrub'):
|
||||||
check_cmd = check_cmd + ' --ignore_nodeepscrub'
|
check_cmd = check_cmd + ' --raise_nodeepscrub'
|
||||||
nrpe_setup.add_check(
|
nrpe_setup.add_check(
|
||||||
shortname="ceph",
|
shortname="ceph",
|
||||||
description='Check Ceph health {{{}}}'.format(current_unit),
|
description='Check Ceph health {{{}}}'.format(current_unit),
|
||||||
|
|
|
@ -0,0 +1,147 @@
|
||||||
|
{
|
||||||
|
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||||
|
"health": {
|
||||||
|
"checks": {
|
||||||
|
"OSD_DOWN": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "3 osds down"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"OSD_HOST_DOWN": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "1 host (3 osds) down"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"OBJECT_MISPLACED": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "9883/43779 objects misplaced (22.575%)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"PG_DEGRADED": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "Degraded data redundancy: 14001/43779 objects degraded (31.981%), 32 pgs degraded"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"POOL_APP_NOT_ENABLED": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "application not enabled on 1 pool(s)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"TOO_FEW_PGS": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "too few PGs per OSD (7 < min 30)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"status": "HEALTH_WARN"
|
||||||
|
},
|
||||||
|
"election_epoch": 5,
|
||||||
|
"quorum": [
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"quorum_names": [
|
||||||
|
"juju-460e0f-11"
|
||||||
|
],
|
||||||
|
"monmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||||
|
"modified": "2018-11-07 14:17:12.324408",
|
||||||
|
"created": "2018-11-07 14:17:12.324408",
|
||||||
|
"features": {
|
||||||
|
"persistent": [
|
||||||
|
"kraken",
|
||||||
|
"luminous"
|
||||||
|
],
|
||||||
|
"optional": []
|
||||||
|
},
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"rank": 0,
|
||||||
|
"name": "juju-460e0f-11",
|
||||||
|
"addr": "192.168.100.81:6789/0",
|
||||||
|
"public_addr": "192.168.100.81:6789/0"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"osdmap": {
|
||||||
|
"osdmap": {
|
||||||
|
"epoch": 72,
|
||||||
|
"num_osds": 9,
|
||||||
|
"num_up_osds": 6,
|
||||||
|
"num_in_osds": 9,
|
||||||
|
"full": false,
|
||||||
|
"nearfull": false,
|
||||||
|
"num_remapped_pgs": 16
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pgmap": {
|
||||||
|
"pgs_by_state": [
|
||||||
|
{
|
||||||
|
"state_name": "active+undersized+degraded",
|
||||||
|
"count": 16
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "active+undersized+degraded+remapped+backfill_wait",
|
||||||
|
"count": 14
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "active+undersized+degraded+remapped+backfilling",
|
||||||
|
"count": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"num_pgs": 32,
|
||||||
|
"num_pools": 1,
|
||||||
|
"num_objects": 14593,
|
||||||
|
"data_bytes": 61169729807,
|
||||||
|
"bytes_used": 14540595200,
|
||||||
|
"bytes_avail": 14889525248,
|
||||||
|
"bytes_total": 29430120448,
|
||||||
|
"degraded_objects": 14001,
|
||||||
|
"degraded_total": 43779,
|
||||||
|
"degraded_ratio": 0.319811,
|
||||||
|
"misplaced_objects": 9883,
|
||||||
|
"misplaced_total": 43779,
|
||||||
|
"misplaced_ratio": 0.225748
|
||||||
|
},
|
||||||
|
"fsmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"by_rank": []
|
||||||
|
},
|
||||||
|
"mgrmap": {
|
||||||
|
"epoch": 5,
|
||||||
|
"active_gid": 14097,
|
||||||
|
"active_name": "juju-460e0f-11",
|
||||||
|
"active_addr": "192.168.100.81:6800/204",
|
||||||
|
"available": true,
|
||||||
|
"standbys": [],
|
||||||
|
"modules": [
|
||||||
|
"balancer",
|
||||||
|
"restful",
|
||||||
|
"status"
|
||||||
|
],
|
||||||
|
"available_modules": [
|
||||||
|
"balancer",
|
||||||
|
"dashboard",
|
||||||
|
"influx",
|
||||||
|
"localpool",
|
||||||
|
"prometheus",
|
||||||
|
"restful",
|
||||||
|
"selftest",
|
||||||
|
"status",
|
||||||
|
"zabbix"
|
||||||
|
],
|
||||||
|
"services": {}
|
||||||
|
},
|
||||||
|
"servicemap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"modified": "0.000000",
|
||||||
|
"services": {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,118 @@
|
||||||
|
{
|
||||||
|
"health": {
|
||||||
|
"health": {
|
||||||
|
"health_services": [
|
||||||
|
{
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"name": "juju-460e0f-12",
|
||||||
|
"kb_total": 1829760,
|
||||||
|
"kb_used": 835072,
|
||||||
|
"kb_avail": 994688,
|
||||||
|
"avail_percent": 54,
|
||||||
|
"last_updated": "2018-11-07 18:46:32.308592",
|
||||||
|
"store_stats": {
|
||||||
|
"bytes_total": 15678387,
|
||||||
|
"bytes_sst": 0,
|
||||||
|
"bytes_log": 420953,
|
||||||
|
"bytes_misc": 15257434,
|
||||||
|
"last_updated": "0.000000"
|
||||||
|
},
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timechecks": {
|
||||||
|
"epoch": 3,
|
||||||
|
"round": 0,
|
||||||
|
"round_status": "finished"
|
||||||
|
},
|
||||||
|
"summary": [
|
||||||
|
{
|
||||||
|
"severity": "HEALTH_ERR",
|
||||||
|
"summary": "6 pgs are stuck inactive for more than 300 seconds"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": "7 pgs peering"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": "6 pgs stuck inactive"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": "6 pgs stuck unclean"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"overall_status": "HEALTH_ERR",
|
||||||
|
"detail": []
|
||||||
|
},
|
||||||
|
"fsid": "68a9ca14-e297-11e8-843c-00163e64b0c0",
|
||||||
|
"election_epoch": 3,
|
||||||
|
"quorum": [
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"quorum_names": [
|
||||||
|
"juju-460e0f-12"
|
||||||
|
],
|
||||||
|
"monmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"fsid": "68a9ca14-e297-11e8-843c-00163e64b0c0",
|
||||||
|
"modified": "2018-11-07 14:17:27.659064",
|
||||||
|
"created": "2018-11-07 14:17:27.659064",
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"rank": 0,
|
||||||
|
"name": "juju-460e0f-12",
|
||||||
|
"addr": "192.168.100.26:6789\/0"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"osdmap": {
|
||||||
|
"osdmap": {
|
||||||
|
"epoch": 28,
|
||||||
|
"num_osds": 9,
|
||||||
|
"num_up_osds": 9,
|
||||||
|
"num_in_osds": 9,
|
||||||
|
"full": false,
|
||||||
|
"nearfull": false,
|
||||||
|
"num_remapped_pgs": 0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pgmap": {
|
||||||
|
"pgs_by_state": [
|
||||||
|
{
|
||||||
|
"state_name": "creating",
|
||||||
|
"count": 113
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "active+clean",
|
||||||
|
"count": 64
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "activating",
|
||||||
|
"count": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "peering",
|
||||||
|
"count": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"version": 7831,
|
||||||
|
"num_pgs": 192,
|
||||||
|
"data_bytes": 1790967809,
|
||||||
|
"bytes_used": 9995157504,
|
||||||
|
"bytes_avail": 9157476352,
|
||||||
|
"bytes_total": 19152633856,
|
||||||
|
"write_bytes_sec": 89844495,
|
||||||
|
"read_op_per_sec": 0,
|
||||||
|
"write_op_per_sec": 21
|
||||||
|
},
|
||||||
|
"fsmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"by_rank": []
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,147 @@
|
||||||
|
{
|
||||||
|
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||||
|
"health": {
|
||||||
|
"checks": {
|
||||||
|
"OBJECT_MISPLACED": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "1560/12264 objects misplaced (12.720%)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"PG_AVAILABILITY": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "Reduced data availability: 27 pgs inactive, 30 pgs peering"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"POOL_APP_NOT_ENABLED": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "application not enabled on 1 pool(s)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"TOO_FEW_PGS": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "too few PGs per OSD (21 < min 30)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"status": "HEALTH_WARN"
|
||||||
|
},
|
||||||
|
"election_epoch": 5,
|
||||||
|
"quorum": [
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"quorum_names": [
|
||||||
|
"juju-460e0f-11"
|
||||||
|
],
|
||||||
|
"monmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||||
|
"modified": "2018-11-07 14:17:12.324408",
|
||||||
|
"created": "2018-11-07 14:17:12.324408",
|
||||||
|
"features": {
|
||||||
|
"persistent": [
|
||||||
|
"kraken",
|
||||||
|
"luminous"
|
||||||
|
],
|
||||||
|
"optional": []
|
||||||
|
},
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"rank": 0,
|
||||||
|
"name": "juju-460e0f-11",
|
||||||
|
"addr": "192.168.100.81:6789/0",
|
||||||
|
"public_addr": "192.168.100.81:6789/0"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"osdmap": {
|
||||||
|
"osdmap": {
|
||||||
|
"epoch": 118,
|
||||||
|
"num_osds": 9,
|
||||||
|
"num_up_osds": 9,
|
||||||
|
"num_in_osds": 9,
|
||||||
|
"full": false,
|
||||||
|
"nearfull": false,
|
||||||
|
"num_remapped_pgs": 15
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pgmap": {
|
||||||
|
"pgs_by_state": [
|
||||||
|
{
|
||||||
|
"state_name": "unknown",
|
||||||
|
"count": 65
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "peering",
|
||||||
|
"count": 31
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "activating",
|
||||||
|
"count": 17
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "activating+remapped",
|
||||||
|
"count": 15
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"num_pgs": 128,
|
||||||
|
"num_pools": 1,
|
||||||
|
"num_objects": 4088,
|
||||||
|
"data_bytes": 17187733578,
|
||||||
|
"bytes_used": 14360064000,
|
||||||
|
"bytes_avail": 15023263744,
|
||||||
|
"bytes_total": 29383327744,
|
||||||
|
"unknown_pgs_ratio": 0.507812,
|
||||||
|
"inactive_pgs_ratio": 0.492188,
|
||||||
|
"misplaced_objects": 1560,
|
||||||
|
"misplaced_total": 12264,
|
||||||
|
"misplaced_ratio": 0.127202,
|
||||||
|
"recovering_objects_per_sec": 14,
|
||||||
|
"recovering_bytes_per_sec": 60779755,
|
||||||
|
"recovering_keys_per_sec": 0,
|
||||||
|
"num_objects_recovered": 113,
|
||||||
|
"num_bytes_recovered": 471859200,
|
||||||
|
"num_keys_recovered": 0,
|
||||||
|
"read_bytes_sec": 0,
|
||||||
|
"write_bytes_sec": 244132150,
|
||||||
|
"read_op_per_sec": 0,
|
||||||
|
"write_op_per_sec": 116
|
||||||
|
},
|
||||||
|
"fsmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"by_rank": []
|
||||||
|
},
|
||||||
|
"mgrmap": {
|
||||||
|
"epoch": 5,
|
||||||
|
"active_gid": 14097,
|
||||||
|
"active_name": "juju-460e0f-11",
|
||||||
|
"active_addr": "192.168.100.81:6800/204",
|
||||||
|
"available": true,
|
||||||
|
"standbys": [],
|
||||||
|
"modules": [
|
||||||
|
"balancer",
|
||||||
|
"restful",
|
||||||
|
"status"
|
||||||
|
],
|
||||||
|
"available_modules": [
|
||||||
|
"balancer",
|
||||||
|
"dashboard",
|
||||||
|
"influx",
|
||||||
|
"localpool",
|
||||||
|
"prometheus",
|
||||||
|
"restful",
|
||||||
|
"selftest",
|
||||||
|
"status",
|
||||||
|
"zabbix"
|
||||||
|
],
|
||||||
|
"services": {}
|
||||||
|
},
|
||||||
|
"servicemap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"modified": "0.000000",
|
||||||
|
"services": {}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,177 +1,202 @@
|
||||||
{
|
{
|
||||||
"health": {
|
|
||||||
"health": {
|
"health": {
|
||||||
"health_services": [
|
"health": {
|
||||||
{
|
"health_services": [
|
||||||
"mons": [
|
{
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"name": "juju-c62a41-21-lxd-0",
|
||||||
|
"kb_total": 334602320,
|
||||||
|
"kb_used": 2127960,
|
||||||
|
"kb_avail": 315454468,
|
||||||
|
"avail_percent": 94,
|
||||||
|
"last_updated": "2018-11-08 09:47:09.932189",
|
||||||
|
"store_stats": {
|
||||||
|
"bytes_total": 34880542,
|
||||||
|
"bytes_sst": 0,
|
||||||
|
"bytes_log": 1647123,
|
||||||
|
"bytes_misc": 33233419,
|
||||||
|
"last_updated": "0.000000"
|
||||||
|
},
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "juju-c62a41-24-lxd-0",
|
||||||
|
"kb_total": 334602320,
|
||||||
|
"kb_used": 2128116,
|
||||||
|
"kb_avail": 315454312,
|
||||||
|
"avail_percent": 94,
|
||||||
|
"last_updated": "2018-11-08 09:47:16.418007",
|
||||||
|
"store_stats": {
|
||||||
|
"bytes_total": 36811676,
|
||||||
|
"bytes_sst": 0,
|
||||||
|
"bytes_log": 3574345,
|
||||||
|
"bytes_misc": 33237331,
|
||||||
|
"last_updated": "0.000000"
|
||||||
|
},
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "juju-c62a41-25-lxd-0",
|
||||||
|
"kb_total": 334602320,
|
||||||
|
"kb_used": 2128860,
|
||||||
|
"kb_avail": 315453568,
|
||||||
|
"avail_percent": 94,
|
||||||
|
"last_updated": "2018-11-08 09:47:21.198816",
|
||||||
|
"store_stats": {
|
||||||
|
"bytes_total": 37388424,
|
||||||
|
"bytes_sst": 0,
|
||||||
|
"bytes_log": 4151569,
|
||||||
|
"bytes_misc": 33236855,
|
||||||
|
"last_updated": "0.000000"
|
||||||
|
},
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timechecks": {
|
||||||
|
"epoch": 14,
|
||||||
|
"round": 4480,
|
||||||
|
"round_status": "finished",
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"name": "juju-c62a41-21-lxd-0",
|
||||||
|
"skew": 0.000000,
|
||||||
|
"latency": 0.000000,
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "juju-c62a41-24-lxd-0",
|
||||||
|
"skew": 0.000282,
|
||||||
|
"latency": 0.000989,
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "juju-c62a41-25-lxd-0",
|
||||||
|
"skew": -0.001223,
|
||||||
|
"latency": 0.000776,
|
||||||
|
"health": "HEALTH_OK"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"summary": [
|
||||||
{
|
{
|
||||||
"name": "node1",
|
"severity": "HEALTH_WARN",
|
||||||
"kb_total": 140956600,
|
"summary": "19 pgs backfill_wait"
|
||||||
"kb_used": 15916132,
|
|
||||||
"kb_avail": 117857208,
|
|
||||||
"avail_percent": 83,
|
|
||||||
"last_updated": "2017-05-17 03:23:11.248297",
|
|
||||||
"store_stats": {
|
|
||||||
"bytes_total": 140014259,
|
|
||||||
"bytes_sst": 0,
|
|
||||||
"bytes_log": 13670758,
|
|
||||||
"bytes_misc": 126343501,
|
|
||||||
"last_updated": "0.000000"
|
|
||||||
},
|
|
||||||
"health": "HEALTH_OK"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "node2",
|
"severity": "HEALTH_WARN",
|
||||||
"kb_total": 70395920,
|
"summary": "4 pgs backfilling"
|
||||||
"kb_used": 10532504,
|
|
||||||
"kb_avail": 56264436,
|
|
||||||
"avail_percent": 79,
|
|
||||||
"last_updated": "2017-05-17 03:23:16.952673",
|
|
||||||
"store_stats": {
|
|
||||||
"bytes_total": 315512452,
|
|
||||||
"bytes_sst": 0,
|
|
||||||
"bytes_log": 21691698,
|
|
||||||
"bytes_misc": 293820754,
|
|
||||||
"last_updated": "0.000000"
|
|
||||||
},
|
|
||||||
"health": "HEALTH_OK"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "juju-machine-85-lxc-10",
|
"severity": "HEALTH_WARN",
|
||||||
"kb_total": 131927524,
|
"summary": "1 pgs peering"
|
||||||
"kb_used": 79521024,
|
},
|
||||||
"kb_avail": 45954016,
|
{
|
||||||
"avail_percent": 34,
|
"severity": "HEALTH_WARN",
|
||||||
"last_updated": "2017-05-17 03:23:13.794034",
|
"summary": "24 pgs stuck unclean"
|
||||||
"store_stats": {
|
},
|
||||||
"bytes_total": 89036349,
|
{
|
||||||
"bytes_sst": 0,
|
"severity": "HEALTH_WARN",
|
||||||
"bytes_log": 21055337,
|
"summary": "recovery 17386\/112794 objects misplaced (15.414%)"
|
||||||
"bytes_misc": 67981012,
|
},
|
||||||
"last_updated": "0.000000"
|
{
|
||||||
},
|
"severity": "HEALTH_WARN",
|
||||||
"health": "HEALTH_OK"
|
"summary": "pool pool1 has many more objects per pg than average (too few pgs?)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": "nodeep-scrub flag(s) set"
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
}
|
"overall_status": "HEALTH_WARN",
|
||||||
]
|
"detail": []
|
||||||
},
|
},
|
||||||
"timechecks": {
|
"fsid": "66af7af5-2f60-4e0e-94dc-49f49bd37284",
|
||||||
"epoch": 280,
|
"election_epoch": 14,
|
||||||
"round": 19874,
|
"quorum": [
|
||||||
"round_status": "finished",
|
0,
|
||||||
"mons": [
|
1,
|
||||||
{
|
2
|
||||||
"name": "node1",
|
|
||||||
"skew": "0.000000",
|
|
||||||
"latency": "0.000000",
|
|
||||||
"health": "HEALTH_OK"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "node2",
|
|
||||||
"skew": "-0.000000",
|
|
||||||
"latency": "0.000866",
|
|
||||||
"health": "HEALTH_OK"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "juju-machine-85-lxc-10",
|
|
||||||
"skew": "-0.000000",
|
|
||||||
"latency": "0.018848",
|
|
||||||
"health": "HEALTH_OK"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"summary": [
|
|
||||||
{
|
|
||||||
"severity": "HEALTH_WARN",
|
|
||||||
"summary": "nodeep-scrub flag(s) set"
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
"overall_status": "HEALTH_WARN",
|
"quorum_names": [
|
||||||
"detail": []
|
"juju-c62a41-21-lxd-0",
|
||||||
},
|
"juju-c62a41-24-lxd-0",
|
||||||
"fsid": "some_fsid",
|
"juju-c62a41-25-lxd-0"
|
||||||
"election_epoch": 280,
|
],
|
||||||
"quorum": [
|
"monmap": {
|
||||||
0,
|
"epoch": 2,
|
||||||
1,
|
"fsid": "66af7af5-2f60-4e0e-94dc-49f49bd37284",
|
||||||
2
|
"modified": "2018-10-31 15:37:56.902830",
|
||||||
],
|
"created": "2018-10-31 15:37:40.288870",
|
||||||
"quorum_names": [
|
"mons": [
|
||||||
"node1",
|
{
|
||||||
"node2",
|
"rank": 0,
|
||||||
"juju-machine-85-lxc-10"
|
"name": "juju-c62a41-21-lxd-0",
|
||||||
],
|
"addr": "100.84.195.4:6789\/0"
|
||||||
"monmap": {
|
},
|
||||||
"epoch": 3,
|
{
|
||||||
"fsid": "some_fsid",
|
"rank": 1,
|
||||||
"modified": "2016-11-25 00:08:51.235813",
|
"name": "juju-c62a41-24-lxd-0",
|
||||||
"created": "0.000000",
|
"addr": "100.84.196.4:6789\/0"
|
||||||
"mons": [
|
},
|
||||||
{
|
{
|
||||||
"rank": 0,
|
"rank": 2,
|
||||||
"name": "node1",
|
"name": "juju-c62a41-25-lxd-0",
|
||||||
"addr": "10.24.0.15:6789/0"
|
"addr": "100.84.196.5:6789\/0"
|
||||||
},
|
}
|
||||||
{
|
]
|
||||||
"rank": 1,
|
},
|
||||||
"name": "node2",
|
|
||||||
"addr": "10.24.0.17:6789/0"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"rank": 2,
|
|
||||||
"name": "juju-machine-85-lxc-10",
|
|
||||||
"addr": "10.24.0.195:6789/0"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"osdmap": {
|
|
||||||
"osdmap": {
|
"osdmap": {
|
||||||
"epoch": 37820,
|
"osdmap": {
|
||||||
"num_osds": 46,
|
"epoch": 316,
|
||||||
"num_up_osds": 46,
|
"num_osds": 48,
|
||||||
"num_in_osds": 46,
|
"num_up_osds": 48,
|
||||||
"full": false,
|
"num_in_osds": 48,
|
||||||
"nearfull": false
|
"full": false,
|
||||||
|
"nearfull": false,
|
||||||
|
"num_remapped_pgs": 22
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pgmap": {
|
||||||
|
"pgs_by_state": [
|
||||||
|
{
|
||||||
|
"state_name": "active+clean",
|
||||||
|
"count": 3448
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "active+remapped+wait_backfill",
|
||||||
|
"count": 19
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "active+remapped+backfilling",
|
||||||
|
"count": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"state_name": "peering",
|
||||||
|
"count": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"version": 141480,
|
||||||
|
"num_pgs": 3472,
|
||||||
|
"data_bytes": 157009583781,
|
||||||
|
"bytes_used": 487185850368,
|
||||||
|
"bytes_avail": 75282911256576,
|
||||||
|
"bytes_total": 75770097106944,
|
||||||
|
"misplaced_objects": 17386,
|
||||||
|
"misplaced_total": 112794,
|
||||||
|
"misplaced_ratio": 0.154139,
|
||||||
|
"recovering_objects_per_sec": 436,
|
||||||
|
"recovering_bytes_per_sec": 1832614589,
|
||||||
|
"recovering_keys_per_sec": 0,
|
||||||
|
"num_objects_recovered": 446,
|
||||||
|
"num_bytes_recovered": 1870659584,
|
||||||
|
"num_keys_recovered": 0
|
||||||
|
},
|
||||||
|
"fsmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"by_rank": []
|
||||||
}
|
}
|
||||||
},
|
|
||||||
"pgmap": {
|
|
||||||
"pgs_by_state": [
|
|
||||||
{
|
|
||||||
"state_name": "active+clean",
|
|
||||||
"count": 1988
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"state_name": "active+remapped+wait_backfill",
|
|
||||||
"count": 3
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"state_name": "active+remapped+backfilling",
|
|
||||||
"count": 1
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"version": 58873447,
|
|
||||||
"num_pgs": 1992,
|
|
||||||
"data_bytes": 35851846298041,
|
|
||||||
"bytes_used": 107730678743040,
|
|
||||||
"bytes_avail": 63413590548480,
|
|
||||||
"bytes_total": 171144269291520,
|
|
||||||
"degraded_objects": 0,
|
|
||||||
"degraded_total": 25759217,
|
|
||||||
"degraded_ratio": 0,
|
|
||||||
"recovering_objects_per_sec": 17,
|
|
||||||
"recovering_bytes_per_sec": 72552794,
|
|
||||||
"recovering_keys_per_sec": 0,
|
|
||||||
"read_bytes_sec": 23935944,
|
|
||||||
"write_bytes_sec": 7024503,
|
|
||||||
"op_per_sec": 5332
|
|
||||||
},
|
|
||||||
"mdsmap": {
|
|
||||||
"epoch": 1,
|
|
||||||
"up": 0,
|
|
||||||
"in": 0,
|
|
||||||
"max": 1,
|
|
||||||
"by_rank": []
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,102 @@
|
||||||
|
{
|
||||||
|
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||||
|
"health": {
|
||||||
|
"checks": {
|
||||||
|
"OSDMAP_FLAGS": {
|
||||||
|
"severity": "HEALTH_WARN",
|
||||||
|
"summary": {
|
||||||
|
"message": "nodeep-scrub flag(s) set"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"status": "HEALTH_WARN"
|
||||||
|
},
|
||||||
|
"election_epoch": 5,
|
||||||
|
"quorum": [
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"quorum_names": [
|
||||||
|
"juju-460e0f-11"
|
||||||
|
],
|
||||||
|
"monmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||||
|
"modified": "2018-11-07 14:17:12.324408",
|
||||||
|
"created": "2018-11-07 14:17:12.324408",
|
||||||
|
"features": {
|
||||||
|
"persistent": [
|
||||||
|
"kraken",
|
||||||
|
"luminous"
|
||||||
|
],
|
||||||
|
"optional": []
|
||||||
|
},
|
||||||
|
"mons": [
|
||||||
|
{
|
||||||
|
"rank": 0,
|
||||||
|
"name": "juju-460e0f-11",
|
||||||
|
"addr": "192.168.100.81:6789/0",
|
||||||
|
"public_addr": "192.168.100.81:6789/0"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"osdmap": {
|
||||||
|
"osdmap": {
|
||||||
|
"epoch": 518,
|
||||||
|
"num_osds": 9,
|
||||||
|
"num_up_osds": 9,
|
||||||
|
"num_in_osds": 9,
|
||||||
|
"full": false,
|
||||||
|
"nearfull": false,
|
||||||
|
"num_remapped_pgs": 0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pgmap": {
|
||||||
|
"pgs_by_state": [
|
||||||
|
{
|
||||||
|
"state_name": "active+clean",
|
||||||
|
"count": 128
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"num_pgs": 128,
|
||||||
|
"num_pools": 1,
|
||||||
|
"num_objects": 14896,
|
||||||
|
"data_bytes": 62440603919,
|
||||||
|
"bytes_used": 14225776640,
|
||||||
|
"bytes_avail": 9450938368,
|
||||||
|
"bytes_total": 23676715008
|
||||||
|
},
|
||||||
|
"fsmap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"by_rank": []
|
||||||
|
},
|
||||||
|
"mgrmap": {
|
||||||
|
"epoch": 5,
|
||||||
|
"active_gid": 14097,
|
||||||
|
"active_name": "juju-460e0f-11",
|
||||||
|
"active_addr": "192.168.100.81:6800/204",
|
||||||
|
"available": true,
|
||||||
|
"standbys": [],
|
||||||
|
"modules": [
|
||||||
|
"balancer",
|
||||||
|
"restful",
|
||||||
|
"status"
|
||||||
|
],
|
||||||
|
"available_modules": [
|
||||||
|
"balancer",
|
||||||
|
"dashboard",
|
||||||
|
"influx",
|
||||||
|
"localpool",
|
||||||
|
"prometheus",
|
||||||
|
"restful",
|
||||||
|
"selftest",
|
||||||
|
"status",
|
||||||
|
"zabbix"
|
||||||
|
],
|
||||||
|
"services": {}
|
||||||
|
},
|
||||||
|
"servicemap": {
|
||||||
|
"epoch": 1,
|
||||||
|
"modified": "0.000000",
|
||||||
|
"services": {}
|
||||||
|
}
|
||||||
|
}
|
|
@ -48,7 +48,7 @@ CHARM_CONFIG = {'config-flags': '',
|
||||||
'nagios_degraded_thresh': '1',
|
'nagios_degraded_thresh': '1',
|
||||||
'nagios_misplaced_thresh': '10',
|
'nagios_misplaced_thresh': '10',
|
||||||
'nagios_recovery_rate': '1',
|
'nagios_recovery_rate': '1',
|
||||||
'nagios_ignore_nodeepscub': False,
|
'nagios_raise_nodeepscrub': True,
|
||||||
'disable-pg-max-object-skew': False}
|
'disable-pg-max-object-skew': False}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,7 @@ class NagiosTestCase(unittest.TestCase):
|
||||||
ceph_version = check_ceph_status.get_ceph_version()
|
ceph_version = check_ceph_status.get_ceph_version()
|
||||||
self.assertEqual(ceph_version, [10, 2, 9])
|
self.assertEqual(ceph_version, [10, 2, 9])
|
||||||
|
|
||||||
|
# All OK, pre-luminoius
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
def test_health_ok(self, mock_ceph_version, mock_subprocess):
|
def test_health_ok(self, mock_ceph_version, mock_subprocess):
|
||||||
mock_ceph_version.return_value = [10, 2, 9]
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
@ -42,6 +43,84 @@ class NagiosTestCase(unittest.TestCase):
|
||||||
check_output = check_ceph_status.check_ceph_status(args)
|
check_output = check_ceph_status.check_ceph_status(args)
|
||||||
self.assertRegex(check_output, r"^All OK$")
|
self.assertRegex(check_output, r"^All OK$")
|
||||||
|
|
||||||
|
# Warning, pre-luminous
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_warn(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_warn.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args("")
|
||||||
|
self.assertRaises(check_ceph_status.WarnError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, pre-luminous, health_critical status
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_err(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_crit.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args("")
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, pre-luminous, overall HEALTH_ERR
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_crit(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_error.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args("")
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, pre-luminous, because misplaced ratio is too big
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_crit_misplaced(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_params.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--misplaced_thresh', '0.1'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, pre-luminous, because recovery rate is too low
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_crit_recovery(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_params.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--recovery_rate', '400'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Warning, pre-luminous, deepscrub
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_warn_deepscrub(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_nodeepscrub.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args("")
|
||||||
|
self.assertRaises(check_ceph_status.WarnError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, pre-luminous, deepscrub
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_crit_deepscrub(self, mock_ceph_version, mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [10, 2, 9]
|
||||||
|
with open('unit_tests/ceph_nodeepscrub.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--raise_nodeepscrub'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# All OK, luminous
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
def test_health_ok_luminous(self, mock_ceph_version, mock_subprocess):
|
def test_health_ok_luminous(self, mock_ceph_version, mock_subprocess):
|
||||||
mock_ceph_version.return_value = [12, 2, 0]
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
|
@ -52,62 +131,80 @@ class NagiosTestCase(unittest.TestCase):
|
||||||
check_output = check_ceph_status.check_ceph_status(args)
|
check_output = check_ceph_status.check_ceph_status(args)
|
||||||
self.assertRegex(check_output, r"^All OK$")
|
self.assertRegex(check_output, r"^All OK$")
|
||||||
|
|
||||||
|
# Warning, luminous
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
def test_health_warn(self, mock_ceph_version, mock_subprocess):
|
def test_health_warn_luminous(self, mock_ceph_version, mock_subprocess):
|
||||||
mock_ceph_version.return_value = [10, 2, 9]
|
|
||||||
with open('unit_tests/ceph_warn.json') as f:
|
|
||||||
tree = f.read()
|
|
||||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
|
||||||
args = check_ceph_status.parse_args(['--degraded_thresh', '1'])
|
|
||||||
self.assertRaises(check_ceph_status.WarnError,
|
|
||||||
lambda: check_ceph_status.check_ceph_status(args))
|
|
||||||
|
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
|
||||||
def test_health_crit(self, mock_ceph_version, mock_subprocess):
|
|
||||||
mock_ceph_version.return_value = [10, 2, 9]
|
|
||||||
with open('unit_tests/ceph_crit.json') as f:
|
|
||||||
tree = f.read()
|
|
||||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
|
||||||
args = check_ceph_status.parse_args(['--degraded_thresh', '1'])
|
|
||||||
self.assertRaises(check_ceph_status.CriticalError,
|
|
||||||
lambda: check_ceph_status.check_ceph_status(args))
|
|
||||||
|
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
|
||||||
def test_health_crit_luminous(self, mock_ceph_version, mock_subprocess):
|
|
||||||
mock_ceph_version.return_value = [12, 2, 0]
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
with open('unit_tests/ceph_crit_luminous.json') as f:
|
with open('unit_tests/ceph_many_warnings_luminous.json') as f:
|
||||||
tree = f.read()
|
tree = f.read()
|
||||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
args = check_ceph_status.parse_args(['--degraded_thresh', '1'])
|
args = check_ceph_status.parse_args("")
|
||||||
self.assertRaises(check_ceph_status.CriticalError,
|
|
||||||
lambda: check_ceph_status.check_ceph_status(args))
|
|
||||||
|
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
|
||||||
def test_health_lotsdegraded(self, mock_ceph_version, mock_subprocess):
|
|
||||||
mock_ceph_version.return_value = [10, 2, 9]
|
|
||||||
with open('unit_tests/ceph_params.json') as f:
|
|
||||||
tree = f.read()
|
|
||||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
|
||||||
args = check_ceph_status.parse_args(['--degraded_thresh', '1'])
|
|
||||||
self.assertRaises(check_ceph_status.CriticalError,
|
|
||||||
lambda: check_ceph_status.check_ceph_status(args))
|
|
||||||
|
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
|
||||||
def test_health_nodeepscrub(self, mock_ceph_version, mock_subprocess):
|
|
||||||
mock_ceph_version.return_value = [10, 2, 9]
|
|
||||||
with open('unit_tests/ceph_nodeepscrub.json') as f:
|
|
||||||
tree = f.read()
|
|
||||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
|
||||||
args = check_ceph_status.parse_args(['--degraded_thresh', '1'])
|
|
||||||
self.assertRaises(check_ceph_status.CriticalError,
|
|
||||||
lambda: check_ceph_status.check_ceph_status(args))
|
|
||||||
|
|
||||||
@patch('check_ceph_status.get_ceph_version')
|
|
||||||
def test_health_nodeepscrubok(self, mock_ceph_version, mock_subprocess):
|
|
||||||
mock_ceph_version.return_value = [10, 2, 9]
|
|
||||||
with open('unit_tests/ceph_nodeepscrub.json') as f:
|
|
||||||
tree = f.read()
|
|
||||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
|
||||||
args = check_ceph_status.parse_args(['--ignore_nodeepscrub'])
|
|
||||||
self.assertRaises(check_ceph_status.WarnError,
|
self.assertRaises(check_ceph_status.WarnError,
|
||||||
lambda: check_ceph_status.check_ceph_status(args))
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, luminous, because of overall status
|
||||||
|
|
||||||
|
# Error, luminous, because misplaced ratio is too big
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_critical_misplaced_luminous(self,
|
||||||
|
mock_ceph_version,
|
||||||
|
mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
|
with open('unit_tests/ceph_many_warnings_luminous.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--misplaced_thresh', '0.1'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, luminous, because degraded ratio is too big
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_critical_degraded_luminous(self,
|
||||||
|
mock_ceph_version,
|
||||||
|
mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
|
with open('unit_tests/ceph_degraded_luminous.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--degraded_thresh', '0.1'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, luminous, because recovery rate is too low
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_critical_recovery_luminous(self,
|
||||||
|
mock_ceph_version,
|
||||||
|
mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
|
with open('unit_tests/ceph_many_warnings_luminous.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--recovery_rate', '20'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Warning, luminous, deepscrub
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_warn_deepscrub_luminous(self,
|
||||||
|
mock_ceph_version,
|
||||||
|
mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
|
with open('unit_tests/ceph_nodeepscrub_luminous.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args("")
|
||||||
|
self.assertRaises(check_ceph_status.WarnError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
||||||
|
# Error, luminous, deepscrub
|
||||||
|
@patch('check_ceph_status.get_ceph_version')
|
||||||
|
def test_health_crit_deepscrub_luminous(self,
|
||||||
|
mock_ceph_version,
|
||||||
|
mock_subprocess):
|
||||||
|
mock_ceph_version.return_value = [12, 2, 0]
|
||||||
|
with open('unit_tests/ceph_nodeepscrub_luminous.json') as f:
|
||||||
|
tree = f.read()
|
||||||
|
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||||
|
args = check_ceph_status.parse_args(['--raise_nodeepscrub'])
|
||||||
|
self.assertRaises(check_ceph_status.CriticalError,
|
||||||
|
lambda: check_ceph_status.check_ceph_status(args))
|
||||||
|
|
Loading…
Reference in New Issue