From d59759259e8537af596976f7f8d8adc02f9e75be Mon Sep 17 00:00:00 2001 From: Alvaro Uria Date: Thu, 27 Apr 2017 18:50:14 +0200 Subject: [PATCH] Support null values on recon/replication module Update on nrpe check check_swift_storage: * Refactor check_replication function * Raise STATUS_CRIT on null values * Clean code per flake8 and previous review * Add unit_test for nrpe check check_swift_storage.py Change-Id: Ie076b8ea56f66c1e15a9bfe2e400b49f968fa4ed Closes-Bug: #1673441 Signed-off-by: Alvaro Uria --- .../check_swift_storage.py | 111 +++-- unit_tests/test_check_swift_storage.py | 387 ++++++++++++++++++ 2 files changed, 465 insertions(+), 33 deletions(-) create mode 100644 unit_tests/test_check_swift_storage.py diff --git a/files/nrpe-external-master/check_swift_storage.py b/files/nrpe-external-master/check_swift_storage.py index a43115b..be9da8e 100755 --- a/files/nrpe-external-master/check_swift_storage.py +++ b/files/nrpe-external-master/check_swift_storage.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright (C) 2014 Canonical +# Copyright (C) 2014, 2017 Canonical # All Rights Reserved # Author: Jacek Nykis @@ -35,7 +35,7 @@ def check_md5(base_url): results = [] try: data = urllib2.urlopen(url).read() - j = json.loads(data) + ringmd5_info = json.loads(data) except urllib2.URLError: return [(STATUS_UNKNOWN, "Can't open url: {}".format(url))] except ValueError: @@ -43,9 +43,9 @@ def check_md5(base_url): for ringfile in ringfiles: try: - if generate_md5(ringfile) != j[ringfile]: - results.append((STATUS_CRIT, - "Ringfile {} MD5 sum mismatch".format(ringfile))) + if generate_md5(ringfile) != ringmd5_info[ringfile]: + results.append((STATUS_CRIT, "Ringfile {} MD5 sum " + "mismatch".format(ringfile))) except IOError: results.append( (STATUS_UNKNOWN, "Can't open ringfile {}".format(ringfile))) @@ -55,6 +55,41 @@ def check_md5(base_url): return [(STATUS_OK, "OK")] +def repl_last_timestamp(repl_info): + """Verifies that values are not null (float expected) + @returns (timedelta, counter) + + ie. if /var/cache/swift/* has no swift uid/gid, + syslog will show errors and all replication values will + return None + + { + "replication_last": None, + "replication_stats": None, + "object_replication_last": None, + "replication_time": None, + "object_replication_time": None, + } + """ + delta = None + repl_last = (repl_info.get('replication_last') or + repl_info.get('object_replication_last')) + + if repl_last: + delta = datetime.datetime.now() - \ + datetime.datetime.fromtimestamp(repl_last) + + if 'replication_stats' not in repl_info: + # no info shared by swift; considered OK + repl_failures = None + else: + # raise error (-1) if NULL value or 'failure' attr does not exist + repl_failures = (repl_info.get('replication_stats', {}) + .get('failure', -1)) + + return (delta, repl_failures) + + def check_replication(base_url, limits): types = ["account", "object", "container"] results = [] @@ -62,7 +97,7 @@ def check_replication(base_url, limits): url = base_url + "replication/" + repl try: data = urllib2.urlopen(url).read() - j = json.loads(data) + repl_info = json.loads(data) except urllib2.URLError: results.append((STATUS_UNKNOWN, "Can't open url: {}".format(url))) continue @@ -70,25 +105,34 @@ def check_replication(base_url, limits): results.append((STATUS_UNKNOWN, "Can't parse status data")) continue - if "object_replication_last" in j: - repl_last = datetime.datetime.fromtimestamp(j["object_replication_last"]) - else: - repl_last = datetime.datetime.fromtimestamp(j["replication_last"]) - delta = datetime.datetime.now() - repl_last - if delta.seconds >= limits[1]: + delta, repl_failures = repl_last_timestamp(repl_info) + + if not delta: results.append((STATUS_CRIT, - "'{}' replication lag is {} seconds".format(repl, delta.seconds))) + "'{}' replication lag not working " + '(perms issue? check syslog)'.format(repl))) + elif delta.seconds >= limits[1]: + results.append((STATUS_CRIT, + "'{}' replication lag is {} " + "seconds".format(repl, delta.seconds))) elif delta.seconds >= limits[0]: results.append((STATUS_WARN, - "'{}' replication lag is {} seconds".format(repl, delta.seconds))) - if "replication_stats" in j: - errors = j["replication_stats"]["failure"] - if errors >= limits[3]: - results.append( - (STATUS_CRIT, "{} replication failures".format(errors))) - elif errors >= limits[2]: - results.append( - (STATUS_WARN, "{} replication failures".format(errors))) + "'{}' replication lag is {} " + "seconds".format(repl, delta.seconds))) + + if not repl_failures: + pass + elif repl_failures >= limits[3]: + results.append((STATUS_CRIT, + "{} replication failures".format(repl_failures))) + elif repl_failures >= limits[2]: + results.append((STATUS_WARN, + "{} replication failures".format(repl_failures))) + elif repl_failures == -1 and delta: + # XXX(aluria): only raised if repl lag err was not triggered + results.append((STATUS_CRIT, + "replication failures counter is NULL " + "(check syslog)")) if results: return results else: @@ -98,18 +142,19 @@ def check_replication(base_url, limits): if __name__ == '__main__': parser = argparse.ArgumentParser(description='Check swift-storage health') parser.add_argument('-H', '--host', dest='host', default='localhost', - help='Hostname to query') - parser.add_argument('-p', '--port', dest='port', default='6000', - type=int, help='Port number') + help='Hostname to query') + parser.add_argument('-p', '--port', dest='port', default='6000', type=int, + help='Port number') parser.add_argument('-r', '--replication', dest='check_replication', - type=int, nargs=4, help='Check replication status', - metavar=('lag_warn', 'lag_crit', 'failures_warn', 'failures_crit')) + type=int, nargs=4, help='Check replication status', + metavar=('lag_warn', 'lag_crit', 'failures_warn', + 'failures_crit')) parser.add_argument('-m', '--md5', dest='check_md5', action='store_true', - help='Compare server rings md5sum with local copy') + help='Compare server rings md5sum with local copy') args = parser.parse_args() if not args.check_replication and not args.check_md5: - print "You must use -r or -m switch" + print('You must use -r or -m switch') sys.exit(STATUS_UNKNOWN) base_url = "http://{}:{}/recon/".format(args.host, args.port) @@ -123,14 +168,14 @@ if __name__ == '__main__': warns = ';'.join([i[1] for i in results if i[0] == STATUS_WARN]) unknowns = ';'.join([i[1] for i in results if i[0] == STATUS_UNKNOWN]) if crits: - print "CRITICAL: " + crits + print("CRITICAL: {}".format(crits)) sys.exit(STATUS_CRIT) elif warns: - print "WARNING: " + warns + print("WARNING: {}".format(warns)) sys.exit(STATUS_WARN) elif unknowns: - print "UNKNOWN: " + unknowns + print("UNKNOWN: {}".format(unknowns)) sys.exit(STATUS_UNKNOWN) else: - print "OK" + print("OK") sys.exit(0) diff --git a/unit_tests/test_check_swift_storage.py b/unit_tests/test_check_swift_storage.py new file mode 100644 index 0000000..c59bdb1 --- /dev/null +++ b/unit_tests/test_check_swift_storage.py @@ -0,0 +1,387 @@ +# Copyright 2017 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import sys +import unittest +import urllib2 + +from mock import ( + patch, + mock_open, + MagicMock, + Mock, + PropertyMock, +) + +sys.path.append('files/nrpe-external-master') +from check_swift_storage import ( + check_md5, + check_replication, + generate_md5, + repl_last_timestamp, +) + + +STATUS_OK = 0 +STATUS_WARN = 1 +STATUS_CRIT = 2 +STATUS_UNKNOWN = 3 + + +class NewDate(datetime.datetime): + @classmethod + def now(cls): + """ + Mock for builtin datetime.datetime.now(), to test repl_last_timestamp() + Non-defined methods are inherited from real class + """ + return cls(2017, 4, 27, + 13, 25, 46, 629282) + + @classmethod + def fromtimestamp(cls, timestamp): + return cls.utcfromtimestamp(timestamp) + + +datetime.datetime = NewDate + + +class CheckSwiftStorageTestCase(unittest.TestCase): + def test_generate_md5(self): + """ + Ensure md5 checksum is generated from a file content + """ + with patch("__builtin__.open", mock_open(read_data='data')) as \ + mock_file: + result = generate_md5('path/to/file') + mock_file.assert_called_with('path/to/file', 'rb') + # md5 hash for 'data' string + self.assertEqual(result, + '8d777f385d3dfec8815d20f7496026dc') + + @patch('urllib2.urlopen') + def test_check_md5_unknown_urlerror(self, mock_urlopen): + """ + Force urllib2.URLError to test try-except + """ + base_url = 'http://localhost:6000/recon/' + url = '{}ringmd5'.format(base_url) + error = 'connection refused' + mock_urlopen.side_effect = urllib2.URLError(Mock(return_value=error)) + result = check_md5(base_url) + self.assertEqual(result, + [(STATUS_UNKNOWN, + "Can't open url: {}".format(url))]) + + @patch('urllib2.urlopen') + def test_check_md5_unknown_valueerror1(self, mock_urlopen): + """ + Force ValueError on urllib2 to test try-except + """ + base_url = 'asdfasdf' + url = '{}ringmd5'.format(base_url) + mock_urlopen.side_effect = ValueError(Mock(return_value='')) + result = check_md5(base_url) + mock_urlopen.assert_called_with(url) + self.assertEqual(result, + [(STATUS_UNKNOWN, + "Can't parse status data")]) + + @patch('urllib2.urlopen') + def test_check_md5_unknown_valueerror2(self, mock_urlopen): + """ + Force ValueError on json to test try-catch + """ + jdata = PropertyMock(return_value='X') + mock_urlopen.return_value = MagicMock(read=jdata) + result = check_md5('.') + mock_urlopen.assert_called_with('.ringmd5') + self.assertEqual(result, + [(STATUS_UNKNOWN, + "Can't parse status data")]) + + @patch('check_swift_storage.generate_md5') + def test_check_md5_unknown_ioerror(self, mock_generate_md5): + """ + Force IOError (reading file) to test try-catch + """ + jdata = '{"/etc/swift/object.ring.gz": ' \ + '"6b4f3a0ef3731f18291ecd053ce0d9b6", ' \ + '"/etc/swift/account.ring.gz": ' \ + '"93fc4ae496a7343362ebf13988a137e7", ' \ + '"/etc/swift/container.ring.gz": ' \ + '"0ea1ec9585ef644ce2b5c5b1dced4128"}' + pmock_jdata = PropertyMock(return_value=jdata) + mock_generate_md5.side_effect = IOError() + with patch('urllib2.urlopen') as mock_urlopen: + mock_urlopen.return_value = MagicMock(read=pmock_jdata) + result = check_md5('.') + mock_urlopen.assert_called_with('.ringmd5') + expected_result = [(STATUS_UNKNOWN, + "Can't open ringfile " + "/etc/swift/{}.ring.gz".format(name)) + for name in ('object', 'account', 'container')] + self.assertEqual(result, expected_result) + + @patch('check_swift_storage.generate_md5') + def test_check_md5_crit_md5sum_mismatch(self, mock_generate_md5): + """ + Ensure md5 checksums match, STATUS_CRIT + """ + jdata = '{"/etc/swift/object.ring.gz": ' \ + '"6b4f3a0ef3731f18291ecd053ce0d9b6", ' \ + '"/etc/swift/account.ring.gz": ' \ + '"93fc4ae496a7343362ebf13988a137e7", ' \ + '"/etc/swift/container.ring.gz": ' \ + '"0ea1ec9585ef644ce2b5c5b1dced4128"}' + pmock_jdata = PropertyMock(return_value=jdata) + mock_generate_md5.return_value = 'xxxx' + with patch('urllib2.urlopen') as mock_urlopen: + mock_urlopen.return_value = MagicMock(read=pmock_jdata) + result = check_md5('.') + mock_urlopen.assert_called_with('.ringmd5') + expected_result = [(STATUS_CRIT, + 'Ringfile /etc/swift/{}.ring.gz ' + 'MD5 sum mismatch'.format(name)) + for name in ('object', 'account', 'container')] + self.assertEqual(result, expected_result) + + @patch('check_swift_storage.generate_md5') + def test_check_md5_ok(self, mock_generate_md5): + """ + Ensure md5 checksums match, STATUS_OK + """ + jdata = '{"/etc/swift/object.ring.gz": ' \ + '"6b4f3a0ef3731f18291ecd053ce0d9b6", ' \ + '"/etc/swift/account.ring.gz": ' \ + '"6b4f3a0ef3731f18291ecd053ce0d9b6", ' \ + '"/etc/swift/container.ring.gz": ' \ + '"6b4f3a0ef3731f18291ecd053ce0d9b6"}' + pmock_jdata = PropertyMock(return_value=jdata) + mock_generate_md5.return_value = '6b4f3a0ef3731f18291ecd053ce0d9b6' + with patch('urllib2.urlopen') as mock_urlopen: + mock_urlopen.return_value = MagicMock(read=pmock_jdata) + result = check_md5('.') + mock_urlopen.assert_called_with('.ringmd5') + self.assertEqual(result, + [(STATUS_OK, 'OK')]) + + @patch('urllib2.urlopen') + def test_check_replication_unknown_urlerror(self, mock_urlopen): + """ + Force urllib2.URLError to test try-catch + """ + base_url = 'http://localhost:6000/recon/' + url = '{}replication/{}' + error = 'connection refused' + mock_urlopen.side_effect = urllib2.URLError(Mock(return_value=error)) + result = check_replication(base_url, 60) + expected_result = [(STATUS_UNKNOWN, + "Can't open url: " + "{}".format(url.format(base_url, name))) + for name in ('account', 'object', 'container')] + self.assertEqual(result, expected_result) + + @patch('urllib2.urlopen') + def test_check_replication_unknown_valueerror1(self, mock_urlopen): + """ + Force ValueError on urllib2 to test try-catch + """ + base_url = '.' + mock_urlopen.side_effect = ValueError(Mock(return_value='')) + result = check_replication(base_url, [4, 10, 4, 10]) + self.assertEqual(result, + 3*[(STATUS_UNKNOWN, + "Can't parse status data")]) + + @patch('urllib2.urlopen') + def test_check_replication_unknown_valueerror2(self, mock_urlopen): + """ + Force ValueError on json to test try-catch + """ + base_url = 'http://localhost:6000/recon/' + jdata = PropertyMock(return_value='X') + mock_urlopen.return_value = MagicMock(read=jdata) + result = check_replication(base_url, [4, 10, 4, 10]) + self.assertEqual(result, + 3*[(STATUS_UNKNOWN, + "Can't parse status data")]) + + @patch('check_swift_storage.repl_last_timestamp') + def test_check_replication_crit_lag_notworking(self, mock_timestamp): + """ + Catch NULL replication value, STATUS_CRIT + """ + base_url = 'http://localhost:6000/recon/' + jdata = '{"replication_last": 1493299546.629282, ' \ + '"replication_stats": {"no_change": 0, "rsync": 0, ' \ + '"success": 0, "failure": 0, "attempted": 0, "ts_repl": 0, ' \ + '"remove": 0, "remote_merge": 0, "diff_capped": 0, ' \ + '"start": 1493299546.621624, "hashmatch": 0, "diff": 0, ' \ + '"empty": 0}, "replication_time": 0.0076580047607421875}' + pmock_jdata = PropertyMock(return_value=jdata) + mock_timestamp.return_value = (None, 0) + with patch('urllib2.urlopen') as mock_urlopen: + mock_urlopen.return_value = MagicMock(read=pmock_jdata) + result = check_replication(base_url, [4, 10, 4, 10]) + self.assertEqual(result, + [(STATUS_CRIT, + "'{}' replication lag not working " + "(perms issue? check syslog)".format(repl)) + for repl in ('account', 'object', 'container')]) + + @patch('check_swift_storage.repl_last_timestamp') + def test_check_replication_crit_lag(self, mock_timestamp): + """ + Replication lag over CRIT threshold, STATUS_CRIT + """ + base_url = 'http://localhost:6000/recon/' + jdata = '{"replication_last": 1493299546.629282, ' \ + '"replication_stats": {"no_change": 0, "rsync": 0, ' \ + '"success": 0, "failure": 0, "attempted": 0, "ts_repl": 0, ' \ + '"remove": 0, "remote_merge": 0, "diff_capped": 0, ' \ + '"start": 1493299546.621624, "hashmatch": 0, "diff": 0, ' \ + '"empty": 0}, "replication_time": 0.0076580047607421875}' + pmock_jdata = PropertyMock(return_value=jdata) + mock_timestamp.return_value = (MagicMock(seconds=12), 0) + with patch('urllib2.urlopen') as mock_urlopen: + mock_urlopen.return_value = MagicMock(read=pmock_jdata) + result = check_replication(base_url, [4, 10, 4, 10]) + self.assertEqual(result, + [(STATUS_CRIT, + "'{}' replication lag is " + "12 seconds".format(repl)) + for repl in ('account', 'object', 'container')]) + + @patch('check_swift_storage.repl_last_timestamp') + def test_check_replication_crit_failures(self, mock_timestamp): + """ + Replication failures over CRIT threshold, STATUS_CRIT + """ + base_url = 'http://localhost:6000/recon/' + jdata = '{"replication_last": 1493299546.629282, ' \ + '"replication_stats": {"no_change": 0, "rsync": 0, ' \ + '"success": 0, "failure": 0, "attempted": 0, "ts_repl": 0, ' \ + '"remove": 0, "remote_merge": 0, "diff_capped": 0, ' \ + '"start": 1493299546.621624, "hashmatch": 0, "diff": 0, ' \ + '"empty": 0}, "replication_time": 0.0076580047607421875}' + pmock_jdata = PropertyMock(return_value=jdata) + mock_timestamp.return_value = (MagicMock(seconds=0), 12) + with patch('urllib2.urlopen') as mock_urlopen: + mock_urlopen.return_value = MagicMock(read=pmock_jdata) + result = check_replication(base_url, [4, 10, 4, 10]) + self.assertEqual(result, + 3*[(STATUS_CRIT, "12 replication failures")]) + + @patch('check_swift_storage.repl_last_timestamp') + def test_check_replication_crit_null_failures(self, mock_timestamp): + """ + Catch NULL value on failures stats, STATUS_CRIT + """ + base_url = 'http://localhost:6000/recon/' + jdata = '{"replication_last": 1493299546.629282, ' \ + '"replication_stats": {"no_change": 0, "rsync": 0, ' \ + '"success": 0, "failure": 0, "attempted": 0, "ts_repl": 0, ' \ + '"remove": 0, "remote_merge": 0, "diff_capped": 0, ' \ + '"start": 1493299546.621624, "hashmatch": 0, "diff": 0, ' \ + '"empty": 0}, "replication_time": 0.0076580047607421875}' + pmock_jdata = PropertyMock(return_value=jdata) + mock_timestamp.return_value = (MagicMock(seconds=0), -1) + with patch('urllib2.urlopen') as mock_urlopen: + mock_urlopen.return_value = MagicMock(read=pmock_jdata) + result = check_replication(base_url, [4, 10, 4, 10]) + self.assertEqual(result, + 3*[(STATUS_CRIT, + "replication failures counter is NULL " + "(check syslog)")]) + + @patch('check_swift_storage.repl_last_timestamp') + def test_check_replication_warn_lag(self, mock_timestamp): + """ + Replication lag over WARN threshold (below CRIT), STATUS_WARN + """ + base_url = 'http://localhost:6000/recon/' + jdata = '{"replication_last": 1493299546.629282, ' \ + '"replication_stats": {"no_change": 0, "rsync": 0, ' \ + '"success": 0, "failure": 0, "attempted": 0, "ts_repl": 0, ' \ + '"remove": 0, "remote_merge": 0, "diff_capped": 0, ' \ + '"start": 1493299546.621624, "hashmatch": 0, "diff": 0, ' \ + '"empty": 0}, "replication_time": 0.0076580047607421875}' + pmock_jdata = PropertyMock(return_value=jdata) + mock_timestamp.return_value = (MagicMock(seconds=5), 0) + with patch('urllib2.urlopen') as mock_urlopen: + mock_urlopen.return_value = MagicMock(read=pmock_jdata) + result = check_replication(base_url, [4, 10, 4, 10]) + self.assertEqual(result, + [(STATUS_WARN, + "'{}' replication lag is " + "5 seconds".format(repl)) + for repl in ('account', 'object', 'container')]) + + @patch('check_swift_storage.repl_last_timestamp') + def test_check_replication_warn_failures(self, mock_timestamp): + """ + Replication failures over WARN threshold (below CRIT), STATUS_WARN + """ + base_url = 'http://localhost:6000/recon/' + jdata = '{"replication_last": 1493299546.629282, ' \ + '"replication_stats": {"no_change": 0, "rsync": 0, ' \ + '"success": 0, "failure": 0, "attempted": 0, "ts_repl": 0, ' \ + '"remove": 0, "remote_merge": 0, "diff_capped": 0, ' \ + '"start": 1493299546.621624, "hashmatch": 0, "diff": 0, ' \ + '"empty": 0}, "replication_time": 0.0076580047607421875}' + pmock_jdata = PropertyMock(return_value=jdata) + mock_timestamp.return_value = (MagicMock(seconds=0), 5) + with patch('urllib2.urlopen') as mock_urlopen: + mock_urlopen.return_value = MagicMock(read=pmock_jdata) + result = check_replication(base_url, [4, 10, 4, 10]) + self.assertEqual(result, + 3*[(STATUS_WARN, "5 replication failures")]) + + @patch('check_swift_storage.repl_last_timestamp') + def test_check_replication_ok(self, mock_timestamp): + """ + Replication lag and number of failures are below WARN threshold, + STATUS_OK + """ + base_url = 'http://localhost:6000/recon/' + jdata = '{"replication_last": 1493299546.629282, ' \ + '"replication_stats": {"no_change": 0, "rsync": 0, ' \ + '"success": 0, "failure": 0, "attempted": 0, "ts_repl": 0, ' \ + '"remove": 0, "remote_merge": 0, "diff_capped": 0, ' \ + '"start": 1493299546.621624, "hashmatch": 0, "diff": 0, ' \ + '"empty": 0}, "replication_time": 0.0076580047607421875}' + pmock_jdata = PropertyMock(return_value=jdata) + mock_timestamp.return_value = (MagicMock(seconds=0), 0) + with patch('urllib2.urlopen') as mock_urlopen: + mock_urlopen.return_value = MagicMock(read=pmock_jdata) + result = check_replication(base_url, [4, 10, 4, 10]) + self.assertEqual(result, [(STATUS_OK, 'OK')]) + + def test_repl_last_timestamp(self): + """ + Calculates delta between NOW and last replication date + Also gathers the number of failures + """ + # 1493299546.629282 + jdata = {u'replication_last': 1493299546.629282, u'replication_stats': + {u'no_change': 0, u'rsync': 0, u'success': 0, u'start': + 1493299546.621624, u'attempted': 0, u'ts_repl': 0, u'remove': + 0, u'remote_merge': 0, u'diff_capped': 0, u'failure': 0, + u'hashmatch': 0, u'diff': 0, u'empty': 0}, + u'replication_time': 0.0076580047607421875} + result = repl_last_timestamp(jdata) + self.assertEqual(result, + (datetime.timedelta(0), 0))