diff --git a/hooks/rabbitmq_server_relations.py b/hooks/rabbitmq_server_relations.py index 67bd4ccb..1348b5c7 100755 --- a/hooks/rabbitmq_server_relations.py +++ b/hooks/rabbitmq_server_relations.py @@ -16,6 +16,7 @@ import glob import os +import re import shutil import sys import subprocess @@ -56,7 +57,6 @@ from charmhelpers.contrib.openstack.utils import ( is_hook_allowed, is_unit_paused_set, set_unit_upgrading, - clear_unit_paused, clear_unit_upgrading, ) @@ -560,7 +560,7 @@ def cluster_joined(relation_id=None): if is_leader(): log('Leader peer_storing cookie', level=INFO) - cookie = open(rabbit.COOKIE_PATH, 'r').read().strip() + cookie = read_erlang_cookie() peer_store('cookie', cookie) peer_store('leader_node_ip', unit_private_ip()) peer_store('leader_node_hostname', rabbit.get_unit_hostname()) @@ -598,6 +598,11 @@ def cluster_changed(relation_id=None, remote_unit=None): 'rabbitmq cluster config.', level=INFO) return + if is_unit_paused_set(): + log('Unit is paused-set, so avoiding any other cluster activities', + level=INFO) + return + if rabbit.is_sufficient_peers(): rabbit.update_peer_cluster_status() if not rabbit.clustered_with_leader(): @@ -658,9 +663,7 @@ def update_cookie(leaders_cookie=None): cookie = leaders_cookie else: cookie = peer_retrieve('cookie') - cookie_local = None - with open(rabbit.COOKIE_PATH, 'r') as f: - cookie_local = f.read().strip() + cookie_local = read_erlang_cookie() if cookie_local == cookie: log('Cookie already synchronized with peer.') @@ -669,13 +672,92 @@ def update_cookie(leaders_cookie=None): raise Exception("rabbitmq-server must be restarted but not permitted") service_stop('rabbitmq-server') - with open(rabbit.COOKIE_PATH, 'wb') as out: - out.write(cookie.encode('ascii')) + write_erlang_cookie(cookie) if not is_unit_paused_set(): service_restart('rabbitmq-server') rabbit.wait_app() +def read_erlang_cookie(): + """Read the erlang cookie and return it stripped of newline. + + :returns: the erlang cookie + :rtype: str + """ + with open(rabbit.COOKIE_PATH, 'r') as f: + return f.read().strip() + + +def write_erlang_cookie(cookie): + """Write the erlang cookie to the cookie file. + + :param cookie: the cookie to write. + :type cookie: str + """ + try: + with open(rabbit.COOKIE_PATH, 'wb') as out: + out.write(cookie.encode('ascii')) + except Exception as e: + log("Could't write new cookie value to {}: reason: {}" + .format(rabbit.COOKIE_PATH, str(e)), + level=ERROR) + + +def check_erlang_cookie_on_series_upgrade(): + """Check the erlang cookie with peer storage. + + During series upgrade from focal to jammy (rabbitmq upgrade from 3.8 to + 3.9,) there is a package bug [1] that overwrites + the rabbit.COOKIEPATH with a more secure version of the cookie if it isn't + secure enough, which is exactly what the 3.8 and prior cookie looks like. + If the unit is the leader (and thus the first) it gets a new cookie and + 'survives' the upgrade, but doesn't copy the cookie to the peer storage. A + non leader then fails during the post-series-upgrade hook. This function + does two things: + + 1. It ensures that the cookie is secure (if it's the leader), and places + it in peer storage. + 2. If it's the non-leader then, it copies the cookie from peer storage and + puts it in the rabbit.COOKIE_PATH file. + """ + COOKIE_KEY = 'cookie' + RABBITMQ_SERVER = 'rabbitmq-server' + + restart_with_cookie = None + if is_leader(): + # grab the leader cookie and check if it's not secure. + cookie_local = read_erlang_cookie() + if re.match(r'^[A-Z]{20}$', cookie_local): + # write a new, more secure, cookie + cmd = "openssl rand -base64 42" + try: + new_cookie = subprocess.check_output(cmd.split()).decode() + cookie_local = new_cookie + restart_with_cookie = new_cookie + except subprocess.CalledProcessError as e: + log("Couldn't generate a new {}: reason: {}" + .format(rabbit.COOKIE_PATH, str(e)), + level=ERROR) + # ensure the local cookie matches the one stored. + cookie = peer_retrieve(COOKIE_KEY) + if cookie != cookie_local: + peer_store(COOKIE_KEY, cookie_local) + else: + # non leader so see if we need to change the cookie + cookie = peer_retrieve(COOKIE_KEY) + cookie_local = read_erlang_cookie() + if cookie != cookie_local: + restart_with_cookie = cookie + + # now if a restart is required, restart rabbitmq + if restart_with_cookie is not None: + service_stop(RABBITMQ_SERVER) + write_erlang_cookie(restart_with_cookie) + if not is_unit_paused_set(): + service_restart(RABBITMQ_SERVER) + rabbit.wait_app() + + @hooks.hook('ha-relation-joined') @rabbit.coordinated_restart_on_change({rabbit.ENV_CONF: rabbit.restart_map()[rabbit.ENV_CONF]}, @@ -1037,7 +1119,7 @@ def series_upgrade_complete(): os.remove(rabbit.RABBITMQ_CONFIG) rabbit.ConfigRenderer( rabbit.CONFIG_FILES()).write_all() - clear_unit_paused() + check_erlang_cookie_on_series_upgrade() clear_unit_upgrading() rabbit.resume_unit_helper(rabbit.ConfigRenderer(rabbit.CONFIG_FILES())) diff --git a/unit_tests/test_rabbitmq_server_relations.py b/unit_tests/test_rabbitmq_server_relations.py index 094adcef..312dfbe9 100644 --- a/unit_tests/test_rabbitmq_server_relations.py +++ b/unit_tests/test_rabbitmq_server_relations.py @@ -18,7 +18,7 @@ import subprocess import sys import tempfile -from unit_tests.test_utils import CharmTestCase +from unit_tests.test_utils import CharmTestCase, patch_open from unittest.mock import patch, MagicMock, call from charmhelpers.core.unitdata import Storage @@ -480,6 +480,235 @@ class RelationUtil(CharmTestCase): wait_app.assert_called_once_with() self.assertFalse(update_clients.called) + def test_read_erlang_cookie(self): + with patch_open() as (mock_open, mock_file): + mock_file.read.return_value = "the-cookie\n" + self.assertEqual(rabbitmq_server_relations.read_erlang_cookie(), + "the-cookie") + mock_open.assert_called_once_with(rabbit_utils.COOKIE_PATH, 'r') + + def test_write_erlang_cookie(self): + with patch_open() as (mock_open, mock_file): + rabbitmq_server_relations.write_erlang_cookie('a-cookie') + mock_open.assert_called_once_with(rabbit_utils.COOKIE_PATH, 'wb') + mock_file.write.assert_called_once_with(b'a-cookie') + + @patch.object(rabbitmq_server_relations.rabbit, 'wait_app') + @patch.object(rabbitmq_server_relations, 'log') + @patch.object(rabbitmq_server_relations, 'is_unit_paused_set') + @patch.object(rabbitmq_server_relations, 'service_stop') + @patch.object(rabbitmq_server_relations, 'service_restart') + @patch.object(rabbitmq_server_relations, 'peer_store') + @patch.object(rabbitmq_server_relations, 'peer_retrieve') + @patch('subprocess.check_output') + @patch.object(rabbitmq_server_relations, 'read_erlang_cookie') + @patch.object(rabbitmq_server_relations, 'write_erlang_cookie') + def test_check_erlang_cookie_on_series_upgrade__is_leader( + self, + write_erlang_cookie, + read_erlang_cookie, + subprocess_check_output, + peer_retrieve, + peer_store, + service_restart, + service_stop, + is_unit_paused_set, + log, + wait_app, + ): + # check that reading the peer cookie writes to the peer storage, but no + # new cookie is generated, no restart needed. + self.is_leader.return_value = True + read_erlang_cookie.return_value = 'the-cookie' + peer_retrieve.return_value = 'worse-cookie' + rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade() + read_erlang_cookie.assert_called_once_with() + peer_store.assert_called_once_with('cookie', 'the-cookie') + service_stop.assert_not_called() + write_erlang_cookie.assert_not_called() + is_unit_paused_set.assert_not_called() + wait_app.assert_not_called() + + @patch.object(rabbitmq_server_relations.rabbit, 'wait_app') + @patch.object(rabbitmq_server_relations, 'log') + @patch.object(rabbitmq_server_relations, 'is_unit_paused_set') + @patch.object(rabbitmq_server_relations, 'service_stop') + @patch.object(rabbitmq_server_relations, 'service_restart') + @patch.object(rabbitmq_server_relations, 'peer_store') + @patch.object(rabbitmq_server_relations, 'peer_retrieve') + @patch('subprocess.check_output') + @patch.object(rabbitmq_server_relations, 'read_erlang_cookie') + @patch.object(rabbitmq_server_relations, 'write_erlang_cookie') + def test_check_erlang_cookie_on_series_upgrade__is_leader__insecure_cookie( + self, + write_erlang_cookie, + read_erlang_cookie, + subprocess_check_output, + peer_retrieve, + peer_store, + service_restart, + service_stop, + is_unit_paused_set, + log, + wait_app, + ): + # check that a new cookie does have to be upgraded (e.g. it matches the + # 20 charmacters of A-Z. Also, the unit is not paused. + self.is_leader.return_value = True + peer_retrieve.return_value = 'worse-cookie' + read_erlang_cookie.return_value = "ABCDEABCDEABCDEABCDE" + is_unit_paused_set.return_value = False + subprocess_check_output.return_value = b"really-good-cookie" + rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade() + log.assert_not_called() + subprocess_check_output.assert_called_once_with( + ['openssl', 'rand', '-base64', '42']) + read_erlang_cookie.assert_called_once_with() + peer_store.assert_called_once_with('cookie', 'really-good-cookie') + service_stop.assert_called_once_with('rabbitmq-server') + write_erlang_cookie.assert_called_once_with('really-good-cookie') + is_unit_paused_set.assert_called_once_with() + service_restart.assert_called_once_with('rabbitmq-server') + wait_app.assert_called_once_with() + + @patch.object(rabbitmq_server_relations.rabbit, 'wait_app') + @patch.object(rabbitmq_server_relations, 'log') + @patch.object(rabbitmq_server_relations, 'is_unit_paused_set') + @patch.object(rabbitmq_server_relations, 'service_stop') + @patch.object(rabbitmq_server_relations, 'service_restart') + @patch.object(rabbitmq_server_relations, 'peer_store') + @patch.object(rabbitmq_server_relations, 'peer_retrieve') + @patch('subprocess.check_output') + @patch.object(rabbitmq_server_relations, 'read_erlang_cookie') + @patch.object(rabbitmq_server_relations, 'write_erlang_cookie') + def test_check_erlang_cookie_on_series_upgrade__is_leader__unit_paused( + self, + write_erlang_cookie, + read_erlang_cookie, + subprocess_check_output, + peer_retrieve, + peer_store, + service_restart, + service_stop, + is_unit_paused_set, + log, + wait_app, + ): + # check that a new cookie does have to be upgraded (e.g. it matches the + # 20 charmacters of A-Z. Also, the unit is paused. + self.is_leader.return_value = True + peer_retrieve.return_value = 'worse-cookie' + read_erlang_cookie.return_value = "ABCDEABCDEABCDEABCDE" + is_unit_paused_set.return_value = True + subprocess_check_output.return_value = b"really-good-cookie" + rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade() + log.assert_not_called() + subprocess_check_output.assert_called_once_with( + ['openssl', 'rand', '-base64', '42']) + read_erlang_cookie.assert_called_once_with() + peer_store.assert_called_once_with('cookie', 'really-good-cookie') + service_stop.assert_called_once_with('rabbitmq-server') + write_erlang_cookie.assert_called_once_with('really-good-cookie') + is_unit_paused_set.assert_called_once_with() + service_restart.assert_not_called() + wait_app.assert_not_called() + + @patch.object(rabbitmq_server_relations.rabbit, 'wait_app') + @patch.object(rabbitmq_server_relations, 'log') + @patch.object(rabbitmq_server_relations, 'is_unit_paused_set') + @patch.object(rabbitmq_server_relations, 'service_stop') + @patch.object(rabbitmq_server_relations, 'service_restart') + @patch.object(rabbitmq_server_relations, 'peer_store') + @patch.object(rabbitmq_server_relations, 'peer_retrieve') + @patch('subprocess.check_output') + @patch.object(rabbitmq_server_relations, 'read_erlang_cookie') + @patch.object(rabbitmq_server_relations, 'write_erlang_cookie') + def test_check_erlang_cookie_on_series_upgrade__is_leader__subp_error( + self, + write_erlang_cookie, + read_erlang_cookie, + subprocess_check_output, + peer_retrieve, + peer_store, + service_restart, + service_stop, + is_unit_paused_set, + log, + wait_app, + ): + # check that a new cookie does have to be upgraded (e.g. it matches the + # 20 charmacters of A-Z. However, the subprocess fails and so no new + # cookie is generated. + self.is_leader.return_value = True + peer_retrieve.return_value = 'worse-cookie' + read_erlang_cookie.return_value = "ABCDEABCDEABCDEABCDE" + is_unit_paused_set.return_value = True + + def _raise_error(*args, **kwargs): + raise subprocess.CalledProcessError( + cmd="some-command", + returncode=1, + stderr="went bang", + output="nothing good") + + subprocess_check_output.side_effect = _raise_error + rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade() + log.assert_called_once_with( + "Couldn't generate a new /var/lib/rabbitmq/.erlang.cookie: reason:" + " Command 'some-command' returned non-zero exit status 1.", + level='ERROR') + subprocess_check_output.assert_called_once_with( + ['openssl', 'rand', '-base64', '42']) + read_erlang_cookie.assert_called_once_with() + peer_store.assert_called_once_with('cookie', 'ABCDEABCDEABCDEABCDE') + service_stop.assert_not_called() + write_erlang_cookie.assert_not_called() + is_unit_paused_set.assert_not_called() + service_restart.assert_not_called() + wait_app.assert_not_called() + + @patch.object(rabbitmq_server_relations.rabbit, 'wait_app') + @patch.object(rabbitmq_server_relations, 'log') + @patch.object(rabbitmq_server_relations, 'is_unit_paused_set') + @patch.object(rabbitmq_server_relations, 'service_stop') + @patch.object(rabbitmq_server_relations, 'service_restart') + @patch.object(rabbitmq_server_relations, 'peer_store') + @patch.object(rabbitmq_server_relations, 'peer_retrieve') + @patch('subprocess.check_output') + @patch.object(rabbitmq_server_relations, 'read_erlang_cookie') + @patch.object(rabbitmq_server_relations, 'write_erlang_cookie') + def test_check_erlang_cookie_on_series_upgrade__is_not_leader( + self, + write_erlang_cookie, + read_erlang_cookie, + subprocess_check_output, + peer_retrieve, + peer_store, + service_restart, + service_stop, + is_unit_paused_set, + log, + wait_app, + ): + # Not the leader, so just verify if the cookie in the peer relation is + # different and if so, store it and restart the app (start by being + # paused). + self.is_leader.return_value = False + peer_retrieve.return_value = 'worse-cookie' + read_erlang_cookie.return_value = "ABCDEABCDEABCDEABCDE" + is_unit_paused_set.return_value = True + subprocess_check_output.return_value = b"really-good-cookie" + rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade() + log.assert_not_called() + subprocess_check_output.assert_not_called() + read_erlang_cookie.assert_called_once_with() + peer_store.assert_not_called() + service_stop.assert_called_once_with('rabbitmq-server') + write_erlang_cookie.assert_called_once_with('worse-cookie') + is_unit_paused_set.assert_called_once_with() + service_restart.assert_not_called() + wait_app.assert_not_called() + @patch.object(rabbitmq_server_relations, 'leader_set') @patch.object(rabbitmq_server_relations, 'leader_get') @patch.object(rabbitmq_server_relations, 'apt_install')