Rabbitmq metrics and splitbrain detection

Enabled rabbitmq_prometheus plugin for prometheus to scrape
the metrics of rabbitmq and alert if rabbitmq splitbrain is
detected.

Integrated rabbitmq dashboards in grafana via dashboards
relations

Added new unit test cases

Closes-Bug: 1899183
Change-Id: I88942dd0b246c498d0ab40b00d586d4349b0f100
(cherry picked from commit 0653c186ce)
This commit is contained in:
Linda Guo 2021-07-08 14:19:42 +10:00 committed by Arif Ali
parent 091cd02ee2
commit e965ff7e85
No known key found for this signature in database
GPG Key ID: 369608FBA1353A70
13 changed files with 5961 additions and 3 deletions

View File

@ -49,6 +49,24 @@ an application that supports the rabbitmq interface. For instance:
juju add-relation rabbitmq-server:amqp nova-cloud-controller:amqp
## Monitoring
To collect RabbitMQ metrics, add a relation between rabbitmq-server and
an application that supports the `scrape` interface. For instance:
juju add-relation rabbitmq-server:scrape prometheus:scrape
> **Note:** The scrape relation is only supported when the RabbitMQ version is >= 3.8.
The charm can be related to a dashboard charm like grafana to view visualization metrics:
juju add-relation rabbitmq-server:dashboards grafana:dashboards
To get alerts of RabbitMQ split-brain events, add a relation between rabbitmq-server and
an application that supports the `prometheus-rules` interface. For instance:
juju add-relation rabbitmq-server:prometheus-rules prometheus:prometheus-rules
## High availability
When more than one unit is deployed the charm will bring up a native RabbitMQ

5841
files/grafana-dashboard.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
- alert: RabbitMQ_split_brain
# detect if rabbitmq_queues is different between rabbitmq nodes
expr: count(count(rabbitmq_queues) by (job)) > 1
for: 5m
labels:
severity: page
application: rabbitmq-server
annotations:
description: RabbitMQ split brain detected
summary: RabbitMQ split brain detected

View File

@ -0,0 +1 @@
rabbitmq_server_relations.py

View File

@ -0,0 +1 @@
rabbitmq_server_relations.py

View File

@ -0,0 +1 @@
rabbitmq_server_relations.py

View File

@ -768,9 +768,12 @@ def get_plugin_manager():
:returns: Path to rabbitmq-plugins executable
:rtype: str
"""
manager = glob.glob(
'/usr/lib/rabbitmq/lib/rabbitmq_server-*/sbin/rabbitmq-plugins')[0]
return manager
# At version 3.8.2, only /sbin/rabbitmq-plugins can enable plugin correctly
if os.path.exists("/sbin/rabbitmq-plugins"):
return '/sbin/rabbitmq-plugins'
else:
return glob.glob(
'/usr/lib/rabbitmq/lib/rabbitmq_server-*/sbin/rabbitmq-plugins')[0]
def _manage_plugin(plugin, action):

View File

@ -80,6 +80,7 @@ from charmhelpers.core.hookenv import (
DEBUG,
ERROR,
INFO,
WARNING,
leader_set,
leader_get,
relation_get,
@ -327,6 +328,58 @@ def update_clients(check_deferred_restarts=True):
check_deferred_restarts=check_deferred_restarts)
@hooks.hook('dashboards-relation-joined')
def dashboards_relation_joined(relation_id=None, remote_unit=None):
"""
dashboards relation joined
send the dashboard json data via relation
"""
with open(os.path.join("files", "grafana-dashboard.json")) as f:
dashboard_str = f.read()
relation_set(relation_id, relation_settings={"dashboard": dashboard_str,
"name": "RabbitMQ-Overview"})
@hooks.hook('prometheus-rules-relation-joined',
'prometheus-rules-relation-created')
def prometheus_rules_joined(relation_id=None, remote_unit=None):
"""
prometheus rules relation joined
send the prometheus rules via relation
"""
with open(os.path.join("files", "prom_rule_rmq_splitbrain.yaml")) as f:
rule = f.read()
relation_set(relation_id, relation_settings={"groups": rule})
@hooks.hook('scrape-relation-joined', 'scrape-relation-created')
def prometheus_scrape_joined(relation_id=None, remote_unit=None):
"""
scrape relation joined
enable prometheus plugin and open port
"""
err_msg = "rabbitmq-server needs to be >= 3.8 to support Prometheus plugin"
if cmp_pkgrevno('rabbitmq-server', '3.8.0') < 0:
log(err_msg, level=WARNING)
status_set("blocked", err_msg)
raise Exception(err_msg)
rabbit.enable_plugin(PROM_PLUGIN)
open_port(RMQ_MON_PORT)
relation_set(relation_id, relation_settings={"port": RMQ_MON_PORT})
@hooks.hook('scrape-relation-broken')
def prometheus_scape_broken():
"""
scrape relation broken
the relation has been completely removed
disable prometheus plugin and close port
"""
rabbit.disable_plugin(PROM_PLUGIN)
close_port(RMQ_MON_PORT)
log("scrape relation broken, disabled plugin and close port", level=INFO)
@validate_amqp_config_tracker
@hooks.hook('amqp-relation-changed')
def amqp_changed(relation_id=None, remote_unit=None,
@ -737,6 +790,8 @@ def upgrade_charm():
MAN_PLUGIN = 'rabbitmq_management'
PROM_PLUGIN = 'rabbitmq_prometheus'
RMQ_MON_PORT = 15692
@hooks.hook('config-changed')

View File

@ -0,0 +1 @@
rabbitmq_server_relations.py

View File

@ -0,0 +1 @@
rabbitmq_server_relations.py

View File

@ -0,0 +1 @@
rabbitmq_server_relations.py

View File

@ -22,6 +22,12 @@ provides:
nrpe-external-master:
interface: nrpe-external-master
scope: container
dashboards:
interface: grafana-dashboard
scrape:
interface: http
prometheus-rules:
interface: prometheus-rules
requires:
ha:
interface: hacluster

View File

@ -893,6 +893,25 @@ class UtilsTests(CharmTestCase):
mock_new_rabbitmq.return_value = True
self.assertEqual(rabbit_utils.get_managment_port(), 15672)
@mock.patch('glob.glob')
@mock.patch('rabbit_utils.subprocess.check_call')
@mock.patch('os.path.exists')
def test_enable_management_plugin(self, mock_os_path,
mock_subprocess,
mock_glob):
mock_os_path.return_value = True
rabbitmq_plugins = '/sbin/rabbitmq-plugins'
rabbit_utils._manage_plugin("rabbitmq_prometheus", "enable")
mock_subprocess.assert_called_with([rabbitmq_plugins,
"enable", "rabbitmq_prometheus"])
mock_os_path.return_value = False
rabbitmq_plugins = '/usr/lib/rabbitmq/lib/'\
'rabbitmq_server-3.8.2/sbin/rabbitmq-plugins'
mock_glob.return_value = [rabbitmq_plugins]
rabbit_utils._manage_plugin("rabbitmq_prometheus", "enable")
mock_subprocess.assert_called_with([rabbitmq_plugins,
"enable", "rabbitmq_prometheus"])
@mock.patch('rabbit_utils.caching_cmp_pkgrevno')
@mock.patch('rabbit_utils.relations_for_id')
@mock.patch('rabbit_utils.subprocess')