Merge "Update Nagios check for Luminous"
This commit is contained in:
commit
2aa99d2af9
|
@ -68,6 +68,23 @@ def check_file_freshness(filename, newer_than=3600):
|
|||
% (filename, time.ctime(mtime)))
|
||||
|
||||
|
||||
def get_ceph_version():
|
||||
"""
|
||||
Uses CLI to get the ceph version, because the status output changes from
|
||||
Luminous onwards (12.2.0 or higher)
|
||||
|
||||
:returns: list of integers, just the actual version number
|
||||
"""
|
||||
try:
|
||||
out_string = subprocess.check_output(['ceph',
|
||||
'--version']).decode('UTF-8')
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise UnknownError(
|
||||
"UNKNOWN: could not determine Ceph version, error: {}".format(e))
|
||||
out_version = [int(x) for x in out_string.split(" ")[2].split(".")]
|
||||
return out_version
|
||||
|
||||
|
||||
def check_ceph_status(args):
|
||||
"""
|
||||
Used to check the status of a Ceph cluster. Uses the output of 'ceph
|
||||
|
@ -109,15 +126,27 @@ def check_ceph_status(args):
|
|||
required_keys = ['health', 'monmap', 'pgmap']
|
||||
if not all(key in status_data.keys() for key in required_keys):
|
||||
raise UnknownError('UNKNOWN: status data is incomplete')
|
||||
ceph_version = get_ceph_version()
|
||||
if ceph_version[0] >= 12 and ceph_version[1] >= 2:
|
||||
# This is Luminous or above
|
||||
overall_status = status_data['health'].get('status')
|
||||
luminous = True
|
||||
else:
|
||||
overall_status = status_data['health'].get('overall_status')
|
||||
luminous = False
|
||||
|
||||
if status_data['health']['overall_status'] != 'HEALTH_OK':
|
||||
if overall_status != 'HEALTH_OK':
|
||||
# Health is not OK, check if any lines are not in our list of OK
|
||||
# any lines that don't match, check is critical
|
||||
status_msg = []
|
||||
for status in status_data['health']['summary']:
|
||||
if not re.match(ignorable, status['summary']):
|
||||
if luminous:
|
||||
status_messages = [x['summary']['message'] for x in status_data['health'].get('checks').values()]
|
||||
else:
|
||||
status_messages = [x['summary'] for x in status_data['health']['summary']]
|
||||
for status in status_messages:
|
||||
if not re.match(ignorable, status):
|
||||
status_critical = True
|
||||
status_msg.append(status['summary'])
|
||||
status_msg.append(status)
|
||||
# If we got this far, then the status is not OK but the status lines
|
||||
# are all in our list of things we consider to be operational tasks.
|
||||
# Check the thresholds and return CRITICAL if exceeded,
|
||||
|
@ -138,10 +167,10 @@ def check_ceph_status(args):
|
|||
status_msg.append("Recovering objects/sec {}".format(recovering))
|
||||
if status_critical:
|
||||
msg = 'CRITICAL: ceph health: "{} {}"'.format(
|
||||
status_data['health']['overall_status'],
|
||||
overall_status,
|
||||
", ".join(status_msg))
|
||||
raise CriticalError(msg)
|
||||
if status_data['health']['overall_status'] == 'HEALTH_WARN':
|
||||
if overall_status == 'HEALTH_WARN':
|
||||
msg = "WARNING: {}".format(", ".join(status_msg))
|
||||
raise WarnError(msg)
|
||||
message = "All OK"
|
||||
|
|
|
@ -0,0 +1,196 @@
|
|||
{
|
||||
"fsid": "a7285ad8-3961-11e8-b715-00163e030140",
|
||||
"health": {
|
||||
"checks": {
|
||||
"OSD_DOWN": {
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": {
|
||||
"message": "1 osds down"
|
||||
}
|
||||
},
|
||||
"PG_DEGRADED": {
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": {
|
||||
"message": "Degraded data redundancy: 31/906 objects degraded (3.422%), 74 pgs unclean, 74 pgs degraded"
|
||||
}
|
||||
}
|
||||
},
|
||||
"status": "HEALTH_WARN"
|
||||
},
|
||||
"election_epoch": 28,
|
||||
"quorum": [
|
||||
0,
|
||||
1,
|
||||
2
|
||||
],
|
||||
"quorum_names": [
|
||||
"juju-7cfc1d-1-lxd-0",
|
||||
"juju-7cfc1d-0-lxd-0",
|
||||
"juju-7cfc1d-12-lxd-0"
|
||||
],
|
||||
"monmap": {
|
||||
"epoch": 2,
|
||||
"fsid": "a7285ad8-3961-11e8-b715-00163e030140",
|
||||
"modified": "2018-04-06 06:37:04.978765",
|
||||
"created": "2018-04-06 06:35:06.513449",
|
||||
"features": {
|
||||
"persistent": [
|
||||
"kraken",
|
||||
"luminous"
|
||||
],
|
||||
"optional": []
|
||||
},
|
||||
"mons": [
|
||||
{
|
||||
"rank": 0,
|
||||
"name": "juju-7cfc1d-1-lxd-0",
|
||||
"addr": "172.18.250.75:6789/0",
|
||||
"public_addr": "172.18.250.75:6789/0"
|
||||
},
|
||||
{
|
||||
"rank": 1,
|
||||
"name": "juju-7cfc1d-0-lxd-0",
|
||||
"addr": "172.18.250.76:6789/0",
|
||||
"public_addr": "172.18.250.76:6789/0"
|
||||
},
|
||||
{
|
||||
"rank": 2,
|
||||
"name": "juju-7cfc1d-12-lxd-0",
|
||||
"addr": "172.18.250.84:6789/0",
|
||||
"public_addr": "172.18.250.84:6789/0"
|
||||
}
|
||||
]
|
||||
},
|
||||
"osdmap": {
|
||||
"osdmap": {
|
||||
"epoch": 257,
|
||||
"num_osds": 33,
|
||||
"num_up_osds": 32,
|
||||
"num_in_osds": 33,
|
||||
"full": false,
|
||||
"nearfull": false,
|
||||
"num_remapped_pgs": 0
|
||||
}
|
||||
},
|
||||
"pgmap": {
|
||||
"pgs_by_state": [
|
||||
{
|
||||
"state_name": "active+clean",
|
||||
"count": 958
|
||||
},
|
||||
{
|
||||
"state_name": "active+undersized+degraded",
|
||||
"count": 74
|
||||
}
|
||||
],
|
||||
"num_pgs": 1032,
|
||||
"num_pools": 20,
|
||||
"num_objects": 302,
|
||||
"data_bytes": 580388173,
|
||||
"bytes_used": 2971890057216,
|
||||
"bytes_avail": 128989599563776,
|
||||
"bytes_total": 131961489620992,
|
||||
"degraded_objects": 31,
|
||||
"degraded_total": 906,
|
||||
"degraded_ratio": 0.034216
|
||||
},
|
||||
"fsmap": {
|
||||
"epoch": 1,
|
||||
"by_rank": []
|
||||
},
|
||||
"mgrmap": {
|
||||
"epoch": 4,
|
||||
"active_gid": 4131,
|
||||
"active_name": "juju-7cfc1d-1-lxd-0",
|
||||
"active_addr": "172.18.250.75:6800/88914",
|
||||
"available": true,
|
||||
"standbys": [
|
||||
{
|
||||
"gid": 4134,
|
||||
"name": "juju-7cfc1d-0-lxd-0",
|
||||
"available_modules": [
|
||||
"balancer",
|
||||
"dashboard",
|
||||
"influx",
|
||||
"localpool",
|
||||
"prometheus",
|
||||
"restful",
|
||||
"selftest",
|
||||
"status",
|
||||
"zabbix"
|
||||
]
|
||||
},
|
||||
{
|
||||
"gid": 4299,
|
||||
"name": "juju-7cfc1d-12-lxd-0",
|
||||
"available_modules": [
|
||||
"balancer",
|
||||
"dashboard",
|
||||
"influx",
|
||||
"localpool",
|
||||
"prometheus",
|
||||
"restful",
|
||||
"selftest",
|
||||
"status",
|
||||
"zabbix"
|
||||
]
|
||||
}
|
||||
],
|
||||
"modules": [
|
||||
"balancer",
|
||||
"restful",
|
||||
"status"
|
||||
],
|
||||
"available_modules": [
|
||||
"balancer",
|
||||
"dashboard",
|
||||
"influx",
|
||||
"localpool",
|
||||
"prometheus",
|
||||
"restful",
|
||||
"selftest",
|
||||
"status",
|
||||
"zabbix"
|
||||
],
|
||||
"services": {}
|
||||
},
|
||||
"servicemap": {
|
||||
"epoch": 22,
|
||||
"modified": "2018-04-14 06:25:03.499825",
|
||||
"services": {
|
||||
"rgw": {
|
||||
"daemons": {
|
||||
"summary": "",
|
||||
"radosgw.gateway": {
|
||||
"start_epoch": 22,
|
||||
"start_stamp": "2018-04-14 06:25:02.277715",
|
||||
"gid": 156351,
|
||||
"addr": "172.18.250.74:0/2962286796",
|
||||
"metadata": {
|
||||
"arch": "x86_64",
|
||||
"ceph_version": "ceph version 12.2.2 (cf0baeeeeba3b47f9427c6c97e2144b094b7e5ba) luminous (stable)",
|
||||
"cpu": "Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GHz",
|
||||
"distro": "ubuntu",
|
||||
"distro_description": "Ubuntu 16.04.4 LTS",
|
||||
"distro_version": "16.04",
|
||||
"frontend_config#0": "civetweb port=60",
|
||||
"frontend_type#0": "civetweb",
|
||||
"hostname": "juju-7cfc1d-1-lxd-1",
|
||||
"kernel_description": "#43~16.04.1-Ubuntu SMP Wed Mar 14 17:48:43 UTC 2018",
|
||||
"kernel_version": "4.13.0-38-generic",
|
||||
"mem_swap_kb": "8388604",
|
||||
"mem_total_kb": "528154640",
|
||||
"num_handles": "1",
|
||||
"os": "Linux",
|
||||
"pid": "225019",
|
||||
"zone_id": "34009c14-e608-47e6-84c5-bf2cefbe94f8",
|
||||
"zone_name": "default",
|
||||
"zonegroup_id": "7771c284-f980-41f0-861b-66c95357cb3d",
|
||||
"zonegroup_name": "default"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,180 @@
|
|||
{
|
||||
"fsid": "1111111-11111-1111-1111-111111111111",
|
||||
"health": {
|
||||
"checks": {},
|
||||
"status": "HEALTH_OK"
|
||||
},
|
||||
"election_epoch": 28,
|
||||
"quorum": [
|
||||
0,
|
||||
1,
|
||||
2
|
||||
],
|
||||
"quorum_names": [
|
||||
"juju-badbad-1-lxd-0",
|
||||
"juju-badbad-0-lxd-0",
|
||||
"juju-badbad-12-lxd-0"
|
||||
],
|
||||
"monmap": {
|
||||
"epoch": 2,
|
||||
"fsid": "1111111-11111-1111-1111-111111111111",
|
||||
"modified": "2018-04-06 06:37:04.978765",
|
||||
"created": "2018-04-06 06:35:06.513449",
|
||||
"features": {
|
||||
"persistent": [
|
||||
"kraken",
|
||||
"luminous"
|
||||
],
|
||||
"optional": []
|
||||
},
|
||||
"mons": [
|
||||
{
|
||||
"rank": 0,
|
||||
"name": "juju-badbad-1-lxd-0",
|
||||
"addr": "10.11.12.75:6789/0",
|
||||
"public_addr": "10.11.12.75:6789/0"
|
||||
},
|
||||
{
|
||||
"rank": 1,
|
||||
"name": "juju-badbad-0-lxd-0",
|
||||
"addr": "10.11.12.76:6789/0",
|
||||
"public_addr": "10.11.12.76:6789/0"
|
||||
},
|
||||
{
|
||||
"rank": 2,
|
||||
"name": "juju-badbad-12-lxd-0",
|
||||
"addr": "10.11.12.84:6789/0",
|
||||
"public_addr": "10.11.12.84:6789/0"
|
||||
}
|
||||
]
|
||||
},
|
||||
"osdmap": {
|
||||
"osdmap": {
|
||||
"epoch": 262,
|
||||
"num_osds": 33,
|
||||
"num_up_osds": 32,
|
||||
"num_in_osds": 32,
|
||||
"full": false,
|
||||
"nearfull": false,
|
||||
"num_remapped_pgs": 0
|
||||
}
|
||||
},
|
||||
"pgmap": {
|
||||
"pgs_by_state": [
|
||||
{
|
||||
"state_name": "active+clean",
|
||||
"count": 1032
|
||||
}
|
||||
],
|
||||
"num_pgs": 1032,
|
||||
"num_pools": 20,
|
||||
"num_objects": 561,
|
||||
"data_bytes": 1584814720,
|
||||
"bytes_used": 2884842602496,
|
||||
"bytes_avail": 125077821714432,
|
||||
"bytes_total": 127962664316928,
|
||||
"read_bytes_sec": 1513,
|
||||
"read_op_per_sec": 1,
|
||||
"write_op_per_sec": 0
|
||||
},
|
||||
"fsmap": {
|
||||
"epoch": 1,
|
||||
"by_rank": []
|
||||
},
|
||||
"mgrmap": {
|
||||
"epoch": 4,
|
||||
"active_gid": 4131,
|
||||
"active_name": "juju-badbad-1-lxd-0",
|
||||
"active_addr": "10.11.12.75:6800/88914",
|
||||
"available": true,
|
||||
"standbys": [
|
||||
{
|
||||
"gid": 4134,
|
||||
"name": "juju-badbad-0-lxd-0",
|
||||
"available_modules": [
|
||||
"balancer",
|
||||
"dashboard",
|
||||
"influx",
|
||||
"localpool",
|
||||
"prometheus",
|
||||
"restful",
|
||||
"selftest",
|
||||
"status",
|
||||
"zabbix"
|
||||
]
|
||||
},
|
||||
{
|
||||
"gid": 4299,
|
||||
"name": "juju-badbad-12-lxd-0",
|
||||
"available_modules": [
|
||||
"balancer",
|
||||
"dashboard",
|
||||
"influx",
|
||||
"localpool",
|
||||
"prometheus",
|
||||
"restful",
|
||||
"selftest",
|
||||
"status",
|
||||
"zabbix"
|
||||
]
|
||||
}
|
||||
],
|
||||
"modules": [
|
||||
"balancer",
|
||||
"restful",
|
||||
"status"
|
||||
],
|
||||
"available_modules": [
|
||||
"balancer",
|
||||
"dashboard",
|
||||
"influx",
|
||||
"localpool",
|
||||
"prometheus",
|
||||
"restful",
|
||||
"selftest",
|
||||
"status",
|
||||
"zabbix"
|
||||
],
|
||||
"services": {}
|
||||
},
|
||||
"servicemap": {
|
||||
"epoch": 29,
|
||||
"modified": "2018-04-18 06:25:04.076050",
|
||||
"services": {
|
||||
"rgw": {
|
||||
"daemons": {
|
||||
"summary": "",
|
||||
"radosgw.gateway": {
|
||||
"start_epoch": 29,
|
||||
"start_stamp": "2018-04-18 06:25:02.612368",
|
||||
"gid": 231504,
|
||||
"addr": "10.11.12.78:0/2747422053",
|
||||
"metadata": {
|
||||
"arch": "x86_64",
|
||||
"ceph_version": "ceph version 12.2.2 (cf0baeeeeba3b47f9427c6c97e2144b094b7e5ba) luminous (stable)",
|
||||
"cpu": "Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GHz",
|
||||
"distro": "ubuntu",
|
||||
"distro_description": "Ubuntu 16.04.4 LTS",
|
||||
"distro_version": "16.04",
|
||||
"frontend_config#0": "civetweb port=60",
|
||||
"frontend_type#0": "civetweb",
|
||||
"hostname": "juju-badbad-0-lxd-1",
|
||||
"kernel_description": "#43~16.04.1-Ubuntu SMP Wed Mar 14 17:48:43 UTC 2018",
|
||||
"kernel_version": "4.13.0-38-generic",
|
||||
"mem_swap_kb": "8388604",
|
||||
"mem_total_kb": "528154640",
|
||||
"num_handles": "1",
|
||||
"os": "Linux",
|
||||
"pid": "225487",
|
||||
"zone_id": "11111111-1111-1111-1111-111111111111",
|
||||
"zone_name": "default",
|
||||
"zonegroup_id": "11111111-1111-1111-1111-111111111111",
|
||||
"zonegroup_name": "default"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -26,7 +26,15 @@ import check_ceph_status
|
|||
@patch('subprocess.check_output')
|
||||
class NagiosTestCase(unittest.TestCase):
|
||||
|
||||
def test_health_ok(self, mock_subprocess):
|
||||
def test_get_ceph_version(self, mock_subprocess):
|
||||
mock_subprocess.return_value = 'ceph version 10.2.9 ' \
|
||||
'(2ee413f77150c0f375ff6f10edd6c8f9c7d060d0)'.encode('UTF-8')
|
||||
ceph_version = check_ceph_status.get_ceph_version()
|
||||
self.assertEqual(ceph_version, [10, 2, 9])
|
||||
|
||||
@patch('check_ceph_status.get_ceph_version')
|
||||
def test_health_ok(self, mock_ceph_version, mock_subprocess):
|
||||
mock_ceph_version.return_value = [10, 2, 9]
|
||||
with open('unit_tests/ceph_ok.json') as f:
|
||||
tree = f.read()
|
||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||
|
@ -34,7 +42,19 @@ class NagiosTestCase(unittest.TestCase):
|
|||
check_output = check_ceph_status.check_ceph_status(args)
|
||||
self.assertRegex(check_output, r"^All OK$")
|
||||
|
||||
def test_health_warn(self, mock_subprocess):
|
||||
@patch('check_ceph_status.get_ceph_version')
|
||||
def test_health_ok_luminous(self, mock_ceph_version, mock_subprocess):
|
||||
mock_ceph_version.return_value = [12, 2, 0]
|
||||
with open('unit_tests/ceph_ok_luminous.json') as f:
|
||||
tree = f.read()
|
||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||
args = check_ceph_status.parse_args(['--degraded_thresh', '1'])
|
||||
check_output = check_ceph_status.check_ceph_status(args)
|
||||
self.assertRegex(check_output, r"^All OK$")
|
||||
|
||||
@patch('check_ceph_status.get_ceph_version')
|
||||
def test_health_warn(self, mock_ceph_version, mock_subprocess):
|
||||
mock_ceph_version.return_value = [10, 2, 9]
|
||||
with open('unit_tests/ceph_warn.json') as f:
|
||||
tree = f.read()
|
||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||
|
@ -42,7 +62,9 @@ class NagiosTestCase(unittest.TestCase):
|
|||
self.assertRaises(check_ceph_status.WarnError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
def test_health_crit(self, mock_subprocess):
|
||||
@patch('check_ceph_status.get_ceph_version')
|
||||
def test_health_crit(self, mock_ceph_version, mock_subprocess):
|
||||
mock_ceph_version.return_value = [10, 2, 9]
|
||||
with open('unit_tests/ceph_crit.json') as f:
|
||||
tree = f.read()
|
||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||
|
@ -50,7 +72,19 @@ class NagiosTestCase(unittest.TestCase):
|
|||
self.assertRaises(check_ceph_status.CriticalError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
def test_health_lotsdegraded(self, mock_subprocess):
|
||||
@patch('check_ceph_status.get_ceph_version')
|
||||
def test_health_crit_luminous(self, mock_ceph_version, mock_subprocess):
|
||||
mock_ceph_version.return_value = [12, 2, 0]
|
||||
with open('unit_tests/ceph_crit_luminous.json') as f:
|
||||
tree = f.read()
|
||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||
args = check_ceph_status.parse_args(['--degraded_thresh', '1'])
|
||||
self.assertRaises(check_ceph_status.CriticalError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
@patch('check_ceph_status.get_ceph_version')
|
||||
def test_health_lotsdegraded(self, mock_ceph_version, mock_subprocess):
|
||||
mock_ceph_version.return_value = [10, 2, 9]
|
||||
with open('unit_tests/ceph_params.json') as f:
|
||||
tree = f.read()
|
||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||
|
@ -58,7 +92,9 @@ class NagiosTestCase(unittest.TestCase):
|
|||
self.assertRaises(check_ceph_status.CriticalError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
def test_health_nodeepscrub(self, mock_subprocess):
|
||||
@patch('check_ceph_status.get_ceph_version')
|
||||
def test_health_nodeepscrub(self, mock_ceph_version, mock_subprocess):
|
||||
mock_ceph_version.return_value = [10, 2, 9]
|
||||
with open('unit_tests/ceph_nodeepscrub.json') as f:
|
||||
tree = f.read()
|
||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||
|
@ -66,7 +102,9 @@ class NagiosTestCase(unittest.TestCase):
|
|||
self.assertRaises(check_ceph_status.CriticalError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
def test_health_nodeepscrubok(self, mock_subprocess):
|
||||
@patch('check_ceph_status.get_ceph_version')
|
||||
def test_health_nodeepscrubok(self, mock_ceph_version, mock_subprocess):
|
||||
mock_ceph_version.return_value = [10, 2, 9]
|
||||
with open('unit_tests/ceph_nodeepscrub.json') as f:
|
||||
tree = f.read()
|
||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||
|
|
Loading…
Reference in New Issue