NRPE: Allow excluding queues from queue-size checks

Option '-e <vhost>  <queue>' was added to the 'check_rabbitmq_queues.py'
nrpe script to allow excluding selected queues when checking queue
sizes. Corresponding option 'exclude_queues' was added to the
charm config.
By default, following queues are excluded:
 * event.sample
 * notifications_designate.info
 * notifications_designate.error
 * versioned_notifications.info
 * versioned_notifications.error

Closes-Bug: #1811433
Change-Id: I57e297bb4323a3ab98da020bfcb1630889aac6d7
This commit is contained in:
Martin Kalcok 2020-12-08 15:51:32 +01:00
parent 07ec03b5d7
commit 7acad5fdaa
5 changed files with 57 additions and 8 deletions

View File

@ -1,5 +1,4 @@
- project:
templates:
- python35-charm-jobs
- openstack-python3-ussuri-jobs
- openstack-python3-charm-jobs
- openstack-cover-jobs

View File

@ -106,6 +106,20 @@ options:
Wildcards '*' are accepted to monitor all vhosts and/or queues.
In case of multiple matches, only the first will apply: wildcards should
therefore be used last in order to avoid unexpected behavior.
exclude_queues:
type: string
default: "[]"
description: |
List of RabbitMQ queues that should be skipped when checking thresholds.
Interpreted as YAML in format [<vhost>, <queue>]
Per-queue thresholds can be expressed as a multi-line YAML array:
- ['/', 'queue1']
- ['/', 'queue2']
Or as a list of lists:
[['/', 'queue1'], ['/', 'queue2']]
Wildcards '*' are accepted to exclude, for example, single queue on all
hosts. Note that the wildcard asterisk must be double-escaped. Example:
[['\\*', 'queue1']]
connection-backlog:
type: int
default:

View File

@ -38,12 +38,22 @@ def gen_stats(data_lines):
yield vhost, queue, int(m_all)
def collate_stats(stats, limits):
def collate_stats(stats, limits, exclude):
# Create a dict with stats collated according to the definitions in the
# limits file. If none of the definitions in the limits file is matched,
# store the stat without collating.
collated = defaultdict(lambda: 0)
for vhost, queue, m_all in stats:
skip = False
for e_vhost, e_queue in exclude:
if fnmatchcase(vhost, e_vhost) and fnmatchcase(queue, e_queue):
skip = True
break
if skip:
continue
for l_vhost, l_queue, _, _ in limits:
if fnmatchcase(vhost, l_vhost) and fnmatchcase(queue, l_queue):
collated[l_vhost, l_queue] += m_all
@ -120,7 +130,18 @@ if __name__ == "__main__":
action='append',
required=True,
metavar=('vhost', 'queue', 'warn', 'crit'),
help=('Vhost and queue to check. Can be used multiple times'))
help='Vhost and queue to check. Can be used multiple times'
)
parser.add_argument(
'-e',
nargs=2,
action='append',
required=False,
default=[],
metavar=('vhost', 'queue'),
help='Vhost and queue to exclude from checks. Can be used multiple \
times'
)
parser.add_argument(
'stats_file',
nargs='*',
@ -133,7 +154,7 @@ if __name__ == "__main__":
chain.from_iterable(
gen_data_lines(filename) for filename in args.stats_file))
# Collate stats according to limit definitions and check.
stats_collated = collate_stats(stats, args.c)
stats_collated = collate_stats(stats, args.c, args.e)
stats_checked = check_stats(stats_collated, args.c)
criticals, warnings = [], []
for queue, vhost, message_no, status in stats_checked:

View File

@ -1403,6 +1403,9 @@ def nrpe_update_queues_check(nrpe_compat, rabbit_dir):
# If value of queue_thresholds is incorrect we want the hook to fail
for item in yaml.safe_load(config('queue_thresholds')):
cmd += ' -c "{}" "{}" {} {}'.format(*item)
for item in yaml.safe_load(config('exclude_queues')):
cmd += ' -e "{}" "{}"'.format(*item)
nrpe_compat.add_check(
shortname=RABBIT_USER + '_queue',
description='Check RabbitMQ Queues',

View File

@ -1157,13 +1157,25 @@ class UtilsTests(CharmTestCase):
# call with stats_cron_schedule set to '*/5 * * * *'
self.test_config.set('stats_cron_schedule', '*/5 * * * *')
# set some queues to exclude to test proper command generation
# with '-e' parameter
self.test_config.set('exclude_queues',
"[['\\*', 'event.sample'], "
"['\\*', 'notifications_designate.info']]")
rabbit_utils.nrpe_update_queues_check(self.nrpe_compat, self.tmp_dir)
default_excludes = [
('\\*', 'event.sample'),
('\\*', 'notifications_designate.info'),
]
exclude_queues = ''
for vhost, queue in default_excludes:
exclude_queues += '-e "{}" "{}" '.format(vhost, queue)
self.nrpe_compat.add_check.assert_called_with(
shortname='rabbitmq_queue',
description='Check RabbitMQ Queues',
check_cmd='{}/check_rabbitmq_queues.py -c "\\*" "\\*" 100 200 '
'{}/data/test_queue_stats.dat'.format(self.tmp_dir,
self.tmp_dir))
check_cmd='{0}/check_rabbitmq_queues.py -c "\\*" "\\*" 100 200 {1}'
'{0}/data/test_queue_stats.dat'.format(self.tmp_dir,
exclude_queues))
self.nrpe_compat.remove_check.assert_not_called()
self.nrpe_compat.reset_mock()