Rabbitmq metrics and splitbrain detection
Enabled rabbitmq_prometheus plugin for prometheus to scrape
the metrics of rabbitmq and alert if rabbitmq splitbrain is
detected.
Integrated rabbitmq dashboards in grafana via dashboards
relations
Added new unit test cases
Closes-Bug: 1899183
Change-Id: I88942dd0b246c498d0ab40b00d586d4349b0f100
(cherry picked from commit 0653c186ce
)
This commit is contained in:
parent
091cd02ee2
commit
e965ff7e85
18
README.md
18
README.md
|
@ -49,6 +49,24 @@ an application that supports the rabbitmq interface. For instance:
|
|||
|
||||
juju add-relation rabbitmq-server:amqp nova-cloud-controller:amqp
|
||||
|
||||
## Monitoring
|
||||
|
||||
To collect RabbitMQ metrics, add a relation between rabbitmq-server and
|
||||
an application that supports the `scrape` interface. For instance:
|
||||
|
||||
juju add-relation rabbitmq-server:scrape prometheus:scrape
|
||||
|
||||
> **Note:** The scrape relation is only supported when the RabbitMQ version is >= 3.8.
|
||||
|
||||
The charm can be related to a dashboard charm like grafana to view visualization metrics:
|
||||
|
||||
juju add-relation rabbitmq-server:dashboards grafana:dashboards
|
||||
|
||||
To get alerts of RabbitMQ split-brain events, add a relation between rabbitmq-server and
|
||||
an application that supports the `prometheus-rules` interface. For instance:
|
||||
|
||||
juju add-relation rabbitmq-server:prometheus-rules prometheus:prometheus-rules
|
||||
|
||||
## High availability
|
||||
|
||||
When more than one unit is deployed the charm will bring up a native RabbitMQ
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,10 @@
|
|||
- alert: RabbitMQ_split_brain
|
||||
# detect if rabbitmq_queues is different between rabbitmq nodes
|
||||
expr: count(count(rabbitmq_queues) by (job)) > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: page
|
||||
application: rabbitmq-server
|
||||
annotations:
|
||||
description: RabbitMQ split brain detected
|
||||
summary: RabbitMQ split brain detected
|
|
@ -0,0 +1 @@
|
|||
rabbitmq_server_relations.py
|
|
@ -0,0 +1 @@
|
|||
rabbitmq_server_relations.py
|
|
@ -0,0 +1 @@
|
|||
rabbitmq_server_relations.py
|
|
@ -768,9 +768,12 @@ def get_plugin_manager():
|
|||
:returns: Path to rabbitmq-plugins executable
|
||||
:rtype: str
|
||||
"""
|
||||
manager = glob.glob(
|
||||
'/usr/lib/rabbitmq/lib/rabbitmq_server-*/sbin/rabbitmq-plugins')[0]
|
||||
return manager
|
||||
# At version 3.8.2, only /sbin/rabbitmq-plugins can enable plugin correctly
|
||||
if os.path.exists("/sbin/rabbitmq-plugins"):
|
||||
return '/sbin/rabbitmq-plugins'
|
||||
else:
|
||||
return glob.glob(
|
||||
'/usr/lib/rabbitmq/lib/rabbitmq_server-*/sbin/rabbitmq-plugins')[0]
|
||||
|
||||
|
||||
def _manage_plugin(plugin, action):
|
||||
|
|
|
@ -80,6 +80,7 @@ from charmhelpers.core.hookenv import (
|
|||
DEBUG,
|
||||
ERROR,
|
||||
INFO,
|
||||
WARNING,
|
||||
leader_set,
|
||||
leader_get,
|
||||
relation_get,
|
||||
|
@ -327,6 +328,58 @@ def update_clients(check_deferred_restarts=True):
|
|||
check_deferred_restarts=check_deferred_restarts)
|
||||
|
||||
|
||||
@hooks.hook('dashboards-relation-joined')
|
||||
def dashboards_relation_joined(relation_id=None, remote_unit=None):
|
||||
"""
|
||||
dashboards relation joined
|
||||
send the dashboard json data via relation
|
||||
"""
|
||||
with open(os.path.join("files", "grafana-dashboard.json")) as f:
|
||||
dashboard_str = f.read()
|
||||
relation_set(relation_id, relation_settings={"dashboard": dashboard_str,
|
||||
"name": "RabbitMQ-Overview"})
|
||||
|
||||
|
||||
@hooks.hook('prometheus-rules-relation-joined',
|
||||
'prometheus-rules-relation-created')
|
||||
def prometheus_rules_joined(relation_id=None, remote_unit=None):
|
||||
"""
|
||||
prometheus rules relation joined
|
||||
send the prometheus rules via relation
|
||||
"""
|
||||
with open(os.path.join("files", "prom_rule_rmq_splitbrain.yaml")) as f:
|
||||
rule = f.read()
|
||||
relation_set(relation_id, relation_settings={"groups": rule})
|
||||
|
||||
|
||||
@hooks.hook('scrape-relation-joined', 'scrape-relation-created')
|
||||
def prometheus_scrape_joined(relation_id=None, remote_unit=None):
|
||||
"""
|
||||
scrape relation joined
|
||||
enable prometheus plugin and open port
|
||||
"""
|
||||
err_msg = "rabbitmq-server needs to be >= 3.8 to support Prometheus plugin"
|
||||
if cmp_pkgrevno('rabbitmq-server', '3.8.0') < 0:
|
||||
log(err_msg, level=WARNING)
|
||||
status_set("blocked", err_msg)
|
||||
raise Exception(err_msg)
|
||||
rabbit.enable_plugin(PROM_PLUGIN)
|
||||
open_port(RMQ_MON_PORT)
|
||||
relation_set(relation_id, relation_settings={"port": RMQ_MON_PORT})
|
||||
|
||||
|
||||
@hooks.hook('scrape-relation-broken')
|
||||
def prometheus_scape_broken():
|
||||
"""
|
||||
scrape relation broken
|
||||
the relation has been completely removed
|
||||
disable prometheus plugin and close port
|
||||
"""
|
||||
rabbit.disable_plugin(PROM_PLUGIN)
|
||||
close_port(RMQ_MON_PORT)
|
||||
log("scrape relation broken, disabled plugin and close port", level=INFO)
|
||||
|
||||
|
||||
@validate_amqp_config_tracker
|
||||
@hooks.hook('amqp-relation-changed')
|
||||
def amqp_changed(relation_id=None, remote_unit=None,
|
||||
|
@ -737,6 +790,8 @@ def upgrade_charm():
|
|||
|
||||
|
||||
MAN_PLUGIN = 'rabbitmq_management'
|
||||
PROM_PLUGIN = 'rabbitmq_prometheus'
|
||||
RMQ_MON_PORT = 15692
|
||||
|
||||
|
||||
@hooks.hook('config-changed')
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
rabbitmq_server_relations.py
|
|
@ -0,0 +1 @@
|
|||
rabbitmq_server_relations.py
|
|
@ -0,0 +1 @@
|
|||
rabbitmq_server_relations.py
|
|
@ -22,6 +22,12 @@ provides:
|
|||
nrpe-external-master:
|
||||
interface: nrpe-external-master
|
||||
scope: container
|
||||
dashboards:
|
||||
interface: grafana-dashboard
|
||||
scrape:
|
||||
interface: http
|
||||
prometheus-rules:
|
||||
interface: prometheus-rules
|
||||
requires:
|
||||
ha:
|
||||
interface: hacluster
|
||||
|
|
|
@ -893,6 +893,25 @@ class UtilsTests(CharmTestCase):
|
|||
mock_new_rabbitmq.return_value = True
|
||||
self.assertEqual(rabbit_utils.get_managment_port(), 15672)
|
||||
|
||||
@mock.patch('glob.glob')
|
||||
@mock.patch('rabbit_utils.subprocess.check_call')
|
||||
@mock.patch('os.path.exists')
|
||||
def test_enable_management_plugin(self, mock_os_path,
|
||||
mock_subprocess,
|
||||
mock_glob):
|
||||
mock_os_path.return_value = True
|
||||
rabbitmq_plugins = '/sbin/rabbitmq-plugins'
|
||||
rabbit_utils._manage_plugin("rabbitmq_prometheus", "enable")
|
||||
mock_subprocess.assert_called_with([rabbitmq_plugins,
|
||||
"enable", "rabbitmq_prometheus"])
|
||||
mock_os_path.return_value = False
|
||||
rabbitmq_plugins = '/usr/lib/rabbitmq/lib/'\
|
||||
'rabbitmq_server-3.8.2/sbin/rabbitmq-plugins'
|
||||
mock_glob.return_value = [rabbitmq_plugins]
|
||||
rabbit_utils._manage_plugin("rabbitmq_prometheus", "enable")
|
||||
mock_subprocess.assert_called_with([rabbitmq_plugins,
|
||||
"enable", "rabbitmq_prometheus"])
|
||||
|
||||
@mock.patch('rabbit_utils.caching_cmp_pkgrevno')
|
||||
@mock.patch('rabbit_utils.relations_for_id')
|
||||
@mock.patch('rabbit_utils.subprocess')
|
||||
|
|
Loading…
Reference in New Issue