From c9efea67c8fe1aec32dc3bebe9853f41e4e23100 Mon Sep 17 00:00:00 2001 From: Olivier Dufour-Cuvillier Date: Wed, 5 Oct 2022 00:03:29 +0900 Subject: [PATCH] Allow NRPE to collect stats in CIS hardened env It removes the necessity to run the cron task as root user and ensure the content created in /var/lib/rabbitmq belongs to rabbitmq user and group solely. Then giving access for nrpe user is done by adding its user to rabbitmq group. Also implemented in the upgrade-charm hook for ongoing deployments Closes-Bug: #1879524 Change-Id: I19e3d675ace7c669451ca40a20d21cef1aec6a95 --- files/collect_rabbitmq_stats.sh | 6 ++- hooks/rabbit_utils.py | 39 ++++++++++++++++++- hooks/rabbitmq_server_relations.py | 10 +++++ unit_tests/test_rabbit_utils.py | 40 ++++++++++++++++++++ unit_tests/test_rabbitmq_server_relations.py | 17 ++++++++- 5 files changed, 108 insertions(+), 4 deletions(-) diff --git a/files/collect_rabbitmq_stats.sh b/files/collect_rabbitmq_stats.sh index 6c31621b..02e12426 100755 --- a/files/collect_rabbitmq_stats.sh +++ b/files/collect_rabbitmq_stats.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (C) 2011, 2014 Canonical +# Copyright (C) 2011, 2014, 2022 Canonical # All Rights Reserved # Author: Liam Young, Jacek Nykis @@ -29,6 +29,7 @@ else echo "No PID file found" exit 3 fi +umask 027 DATA_DIR="/var/lib/rabbitmq/data" DATA_FILE="${DATA_DIR}/$(hostname -s)_queue_stats.dat" LOG_DIR="/var/lib/rabbitmq/logs" @@ -51,6 +52,7 @@ while read VHOST; do awk "{print \"$VHOST \" \$0 \" $(date +'%s') \"}" >> ${TMP_DATA_FILE} 2>${LOG_DIR}/list_queues.log done mv ${TMP_DATA_FILE} ${DATA_FILE} -chmod 644 ${DATA_FILE} +chmod 640 ${DATA_FILE} +chmod 640 ${RABBIT_STATS_DATA_FILE} echo "mnesia_size: ${MNESIA_DB_SIZE}@$NOW" > $RABBIT_STATS_DATA_FILE echo "rss_size: ${RABBIT_RSS}@$NOW" >> $RABBIT_STATS_DATA_FILE diff --git a/hooks/rabbit_utils.py b/hooks/rabbit_utils.py index ad42930e..f229b225 100644 --- a/hooks/rabbit_utils.py +++ b/hooks/rabbit_utils.py @@ -120,13 +120,14 @@ ENV_CONF = '/etc/rabbitmq/rabbitmq-env.conf' RABBITMQ_CONFIG = '/etc/rabbitmq/rabbitmq.config' RABBITMQ_CONF = '/etc/rabbitmq/rabbitmq.conf' ENABLED_PLUGINS = '/etc/rabbitmq/enabled_plugins' +NRPE_USER = 'nagios' RABBIT_USER = 'rabbitmq' LIB_PATH = '/var/lib/rabbitmq/' HOSTS_FILE = '/etc/hosts' NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins' SCRIPTS_DIR = '/usr/local/bin' STATS_CRONFILE = '/etc/cron.d/rabbitmq-stats' -CRONJOB_CMD = ("{schedule} root timeout -k 10s -s SIGINT {timeout} " +CRONJOB_CMD = ("{schedule} rabbitmq timeout -k 10s -s SIGINT {timeout} " "{command} 2>&1 | logger -p local0.notice\n") COORD_KEY_RESTART = "restart" @@ -1823,6 +1824,42 @@ def management_plugin_enabled(): return config('management_plugin') is True +def add_nrpe_file_access(): + """ Add Nagios user the access to rabbitmq directories + + It allows Nagios NRPE agent to collect the stats from rabbitmq + """ + run_cmd(['usermod', '-a', '-G', 'rabbitmq', NRPE_USER]) + + +def fix_nrpe_file_owner(): + """ Set rabbitmq as owner of logs data generated from cron job + + Necessary on older deployments where the cron task would create files + with root as owner previously + It ensures the compatibility with existing deployments + """ + # Although this should run only when related to nrpe subordinate + # there is still a possibility for a race condition if upgrading the units + # before the cron task had the time to create the files + if os.path.exists(f'{LIB_PATH}data') and os.path.exists(f'{LIB_PATH}logs'): + run_cmd(['chown', + '-R', + f'{RABBIT_USER}:{RABBIT_USER}', + f'{LIB_PATH}data/', + ]) + run_cmd(['chown', + '-R', + f'{RABBIT_USER}:{RABBIT_USER}', + f'{LIB_PATH}logs/', + ]) + run_cmd(['chmod', + '750', + f'{LIB_PATH}data/', + f'{LIB_PATH}logs/', + ]) + + def sync_nrpe_files(): """Sync all NRPE-related files. diff --git a/hooks/rabbitmq_server_relations.py b/hooks/rabbitmq_server_relations.py index 90d90e97..d22bd3c8 100755 --- a/hooks/rabbitmq_server_relations.py +++ b/hooks/rabbitmq_server_relations.py @@ -857,6 +857,7 @@ def update_nrpe_checks(): nrpe_compat.write() rabbit.remove_nrpe_files() + rabbit.add_nrpe_file_access() @hooks.hook('upgrade-charm.real') @@ -915,6 +916,15 @@ def upgrade_charm(): config(rabbit.CLUSTER_MODE_KEY)), level='INFO') leader_set({rabbit.CLUSTER_MODE_KEY: config(rabbit.CLUSTER_MODE_KEY)}) + # BUG:#1879524 + # rabbitmq stat files checked by nrpe might be owned by root preventing + # monitoring to read correctly the values. + # Ensure that they belong to rabbitmq user to not break compatibility with + # older existing deployment + if is_relation_made('nrpe-external-master'): + rabbit.add_nrpe_file_access() + rabbit.fix_nrpe_file_owner() + MAN_PLUGIN = 'rabbitmq_management' PROM_PLUGIN = 'rabbitmq_prometheus' diff --git a/unit_tests/test_rabbit_utils.py b/unit_tests/test_rabbit_utils.py index 5f61f5ba..2940d179 100644 --- a/unit_tests/test_rabbit_utils.py +++ b/unit_tests/test_rabbit_utils.py @@ -1185,6 +1185,46 @@ class UtilsTests(CharmTestCase): level='DEBUG') ]) + @mock.patch('rabbit_utils.run_cmd') + def test_add_nrpe_file_access(self, mock_run_cmd): + rabbit_utils.NRPE_USER = 'nrpe_user' + rabbit_utils.add_nrpe_file_access() + mock_run_cmd.assert_called_once_with(['usermod', + '-a', + '-G', + 'rabbitmq', + 'nrpe_user', + ]) + + @mock.patch('rabbit_utils.run_cmd') + @mock.patch('os.path.exists') + @mock.patch('rabbit_utils.RABBIT_USER', 'rabbit') + @mock.patch('rabbit_utils.LIB_PATH', '/var/tmp/rabbit/') + def test_fix_nrpe_file_owner(self, mock_os_exists, mock_run_cmd): + call_args1 = ['chown', + '-R', + 'rabbit:rabbit', + '/var/tmp/rabbit/data/' + ] + call_args2 = ['chown', + '-R', + 'rabbit:rabbit', + '/var/tmp/rabbit/logs/' + ] + call_args3 = ['chmod', + '750', + '/var/tmp/rabbit/data/', + '/var/tmp/rabbit/logs/', + ] + mock_os_exists.return_value = True + rabbit_utils.fix_nrpe_file_owner() + + self.assertEqual(mock_run_cmd.call_count, 3) + mock_run_cmd.assert_has_calls([mock.call(call_args1), + mock.call(call_args2), + mock.call(call_args3) + ]) + @mock.patch('os.path.isdir') @mock.patch('rabbit_utils.charm_dir') @mock.patch('rabbit_utils.config') diff --git a/unit_tests/test_rabbitmq_server_relations.py b/unit_tests/test_rabbitmq_server_relations.py index d86ffbe0..1444245c 100644 --- a/unit_tests/test_rabbitmq_server_relations.py +++ b/unit_tests/test_rabbitmq_server_relations.py @@ -346,7 +346,9 @@ class RelationUtil(CharmTestCase): @patch('rabbitmq_server_relations.config') @patch('rabbit_utils.config') @patch('rabbit_utils.remove_file') + @patch('rabbit_utils.add_nrpe_file_access') def test_update_nrpe_checks(self, + mock_add_nrpe_access, mock_remove_file, mock_config3, mock_config, @@ -419,6 +421,8 @@ class RelationUtil(CharmTestCase): 'nagios-unit-0'), check_cmd=cmd_5671) + mock_add_nrpe_access.assert_called_once() + # test stats_cron_schedule has been removed mock_remove_file.reset_mock() mock_add_check.reset_mock() @@ -725,7 +729,14 @@ class RelationUtil(CharmTestCase): @patch('os.listdir') @patch('charmhelpers.contrib.hardening.harden.config') @patch.object(rabbitmq_server_relations, 'config') - def test_upgrade_charm(self, config, harden_config, listdir, + @patch.object(rabbitmq_server_relations, 'is_relation_made') + @patch('rabbit_utils.add_nrpe_file_access') + @patch('rabbit_utils.fix_nrpe_file_owner') + def test_upgrade_charm(self, + mock_fix_nrpe_owner, + mock_add_nrpe_access, + is_relation_made, + config, harden_config, listdir, is_elected_releader, migrate_passwords_to_peer_relation, update_peer_cluster_status, @@ -741,6 +752,7 @@ class RelationUtil(CharmTestCase): is_elected_releader.return_value = True clustered_with_leader.return_value = True filter_installed_packages.side_effect = lambda x: x + is_relation_made.return_value = True leader_get.return_value = None @@ -756,3 +768,6 @@ class RelationUtil(CharmTestCase): {rabbit_utils.CLUSTER_MODE_KEY: self.test_config.get(rabbit_utils.CLUSTER_MODE_KEY)} ) + + mock_add_nrpe_access.assert_called_once() + mock_fix_nrpe_owner.assert_called_once()