Fix focal to jammy series upgrade

This is a fix/workaround to the package upgrade bug that affects the
charm.  The post-inst package script updates the .erlang.cookie if it is
insecure during the upgrade of rabbit from 3.8 to 3.9.  This breaks the
series-upgrade resulting in a charm erroring on the post-series-upgrade
hook.

This fix works by checking if the .erlang.cookie has changed during the
post-series-upgrade hook and either updating the cookie in peer storage
(if it is insecure) or ensuring that the cookie from peer storage is
written to the .erlang.cookie if it isn't the leader. This ensures that
the cluster continues to work and that the series-upgrade can be
completed across the cluster.

Change-Id: I540ea8da85b3b4326ccb8194f1d8b1050b04eae9
Closes-Bug: #2006485
This commit is contained in:
Alex Kavanagh 2023-02-22 14:56:27 +00:00
parent f5aa13ee02
commit 55b985f55c
2 changed files with 320 additions and 9 deletions

View File

@ -16,6 +16,7 @@
import glob
import os
import re
import shutil
import sys
import subprocess
@ -56,7 +57,6 @@ from charmhelpers.contrib.openstack.utils import (
is_hook_allowed,
is_unit_paused_set,
set_unit_upgrading,
clear_unit_paused,
clear_unit_upgrading,
)
@ -560,7 +560,7 @@ def cluster_joined(relation_id=None):
if is_leader():
log('Leader peer_storing cookie', level=INFO)
cookie = open(rabbit.COOKIE_PATH, 'r').read().strip()
cookie = read_erlang_cookie()
peer_store('cookie', cookie)
peer_store('leader_node_ip', unit_private_ip())
peer_store('leader_node_hostname', rabbit.get_unit_hostname())
@ -598,6 +598,11 @@ def cluster_changed(relation_id=None, remote_unit=None):
'rabbitmq cluster config.', level=INFO)
return
if is_unit_paused_set():
log('Unit is paused-set, so avoiding any other cluster activities',
level=INFO)
return
if rabbit.is_sufficient_peers():
rabbit.update_peer_cluster_status()
if not rabbit.clustered_with_leader():
@ -658,9 +663,7 @@ def update_cookie(leaders_cookie=None):
cookie = leaders_cookie
else:
cookie = peer_retrieve('cookie')
cookie_local = None
with open(rabbit.COOKIE_PATH, 'r') as f:
cookie_local = f.read().strip()
cookie_local = read_erlang_cookie()
if cookie_local == cookie:
log('Cookie already synchronized with peer.')
@ -669,13 +672,92 @@ def update_cookie(leaders_cookie=None):
raise Exception("rabbitmq-server must be restarted but not permitted")
service_stop('rabbitmq-server')
with open(rabbit.COOKIE_PATH, 'wb') as out:
out.write(cookie.encode('ascii'))
write_erlang_cookie(cookie)
if not is_unit_paused_set():
service_restart('rabbitmq-server')
rabbit.wait_app()
def read_erlang_cookie():
"""Read the erlang cookie and return it stripped of newline.
:returns: the erlang cookie
:rtype: str
"""
with open(rabbit.COOKIE_PATH, 'r') as f:
return f.read().strip()
def write_erlang_cookie(cookie):
"""Write the erlang cookie to the cookie file.
:param cookie: the cookie to write.
:type cookie: str
"""
try:
with open(rabbit.COOKIE_PATH, 'wb') as out:
out.write(cookie.encode('ascii'))
except Exception as e:
log("Could't write new cookie value to {}: reason: {}"
.format(rabbit.COOKIE_PATH, str(e)),
level=ERROR)
def check_erlang_cookie_on_series_upgrade():
"""Check the erlang cookie with peer storage.
During series upgrade from focal to jammy (rabbitmq upgrade from 3.8 to
3.9,) there is a package bug [1] that overwrites
the rabbit.COOKIEPATH with a more secure version of the cookie if it isn't
secure enough, which is exactly what the 3.8 and prior cookie looks like.
If the unit is the leader (and thus the first) it gets a new cookie and
'survives' the upgrade, but doesn't copy the cookie to the peer storage. A
non leader then fails during the post-series-upgrade hook. This function
does two things:
1. It ensures that the cookie is secure (if it's the leader), and places
it in peer storage.
2. If it's the non-leader then, it copies the cookie from peer storage and
puts it in the rabbit.COOKIE_PATH file.
"""
COOKIE_KEY = 'cookie'
RABBITMQ_SERVER = 'rabbitmq-server'
restart_with_cookie = None
if is_leader():
# grab the leader cookie and check if it's not secure.
cookie_local = read_erlang_cookie()
if re.match(r'^[A-Z]{20}$', cookie_local):
# write a new, more secure, cookie
cmd = "openssl rand -base64 42"
try:
new_cookie = subprocess.check_output(cmd.split()).decode()
cookie_local = new_cookie
restart_with_cookie = new_cookie
except subprocess.CalledProcessError as e:
log("Couldn't generate a new {}: reason: {}"
.format(rabbit.COOKIE_PATH, str(e)),
level=ERROR)
# ensure the local cookie matches the one stored.
cookie = peer_retrieve(COOKIE_KEY)
if cookie != cookie_local:
peer_store(COOKIE_KEY, cookie_local)
else:
# non leader so see if we need to change the cookie
cookie = peer_retrieve(COOKIE_KEY)
cookie_local = read_erlang_cookie()
if cookie != cookie_local:
restart_with_cookie = cookie
# now if a restart is required, restart rabbitmq
if restart_with_cookie is not None:
service_stop(RABBITMQ_SERVER)
write_erlang_cookie(restart_with_cookie)
if not is_unit_paused_set():
service_restart(RABBITMQ_SERVER)
rabbit.wait_app()
@hooks.hook('ha-relation-joined')
@rabbit.coordinated_restart_on_change({rabbit.ENV_CONF:
rabbit.restart_map()[rabbit.ENV_CONF]},
@ -1037,7 +1119,7 @@ def series_upgrade_complete():
os.remove(rabbit.RABBITMQ_CONFIG)
rabbit.ConfigRenderer(
rabbit.CONFIG_FILES()).write_all()
clear_unit_paused()
check_erlang_cookie_on_series_upgrade()
clear_unit_upgrading()
rabbit.resume_unit_helper(rabbit.ConfigRenderer(rabbit.CONFIG_FILES()))

View File

@ -18,7 +18,7 @@ import subprocess
import sys
import tempfile
from unit_tests.test_utils import CharmTestCase
from unit_tests.test_utils import CharmTestCase, patch_open
from unittest.mock import patch, MagicMock, call
from charmhelpers.core.unitdata import Storage
@ -480,6 +480,235 @@ class RelationUtil(CharmTestCase):
wait_app.assert_called_once_with()
self.assertFalse(update_clients.called)
def test_read_erlang_cookie(self):
with patch_open() as (mock_open, mock_file):
mock_file.read.return_value = "the-cookie\n"
self.assertEqual(rabbitmq_server_relations.read_erlang_cookie(),
"the-cookie")
mock_open.assert_called_once_with(rabbit_utils.COOKIE_PATH, 'r')
def test_write_erlang_cookie(self):
with patch_open() as (mock_open, mock_file):
rabbitmq_server_relations.write_erlang_cookie('a-cookie')
mock_open.assert_called_once_with(rabbit_utils.COOKIE_PATH, 'wb')
mock_file.write.assert_called_once_with(b'a-cookie')
@patch.object(rabbitmq_server_relations.rabbit, 'wait_app')
@patch.object(rabbitmq_server_relations, 'log')
@patch.object(rabbitmq_server_relations, 'is_unit_paused_set')
@patch.object(rabbitmq_server_relations, 'service_stop')
@patch.object(rabbitmq_server_relations, 'service_restart')
@patch.object(rabbitmq_server_relations, 'peer_store')
@patch.object(rabbitmq_server_relations, 'peer_retrieve')
@patch('subprocess.check_output')
@patch.object(rabbitmq_server_relations, 'read_erlang_cookie')
@patch.object(rabbitmq_server_relations, 'write_erlang_cookie')
def test_check_erlang_cookie_on_series_upgrade__is_leader(
self,
write_erlang_cookie,
read_erlang_cookie,
subprocess_check_output,
peer_retrieve,
peer_store,
service_restart,
service_stop,
is_unit_paused_set,
log,
wait_app,
):
# check that reading the peer cookie writes to the peer storage, but no
# new cookie is generated, no restart needed.
self.is_leader.return_value = True
read_erlang_cookie.return_value = 'the-cookie'
peer_retrieve.return_value = 'worse-cookie'
rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade()
read_erlang_cookie.assert_called_once_with()
peer_store.assert_called_once_with('cookie', 'the-cookie')
service_stop.assert_not_called()
write_erlang_cookie.assert_not_called()
is_unit_paused_set.assert_not_called()
wait_app.assert_not_called()
@patch.object(rabbitmq_server_relations.rabbit, 'wait_app')
@patch.object(rabbitmq_server_relations, 'log')
@patch.object(rabbitmq_server_relations, 'is_unit_paused_set')
@patch.object(rabbitmq_server_relations, 'service_stop')
@patch.object(rabbitmq_server_relations, 'service_restart')
@patch.object(rabbitmq_server_relations, 'peer_store')
@patch.object(rabbitmq_server_relations, 'peer_retrieve')
@patch('subprocess.check_output')
@patch.object(rabbitmq_server_relations, 'read_erlang_cookie')
@patch.object(rabbitmq_server_relations, 'write_erlang_cookie')
def test_check_erlang_cookie_on_series_upgrade__is_leader__insecure_cookie(
self,
write_erlang_cookie,
read_erlang_cookie,
subprocess_check_output,
peer_retrieve,
peer_store,
service_restart,
service_stop,
is_unit_paused_set,
log,
wait_app,
):
# check that a new cookie does have to be upgraded (e.g. it matches the
# 20 charmacters of A-Z. Also, the unit is not paused.
self.is_leader.return_value = True
peer_retrieve.return_value = 'worse-cookie'
read_erlang_cookie.return_value = "ABCDEABCDEABCDEABCDE"
is_unit_paused_set.return_value = False
subprocess_check_output.return_value = b"really-good-cookie"
rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade()
log.assert_not_called()
subprocess_check_output.assert_called_once_with(
['openssl', 'rand', '-base64', '42'])
read_erlang_cookie.assert_called_once_with()
peer_store.assert_called_once_with('cookie', 'really-good-cookie')
service_stop.assert_called_once_with('rabbitmq-server')
write_erlang_cookie.assert_called_once_with('really-good-cookie')
is_unit_paused_set.assert_called_once_with()
service_restart.assert_called_once_with('rabbitmq-server')
wait_app.assert_called_once_with()
@patch.object(rabbitmq_server_relations.rabbit, 'wait_app')
@patch.object(rabbitmq_server_relations, 'log')
@patch.object(rabbitmq_server_relations, 'is_unit_paused_set')
@patch.object(rabbitmq_server_relations, 'service_stop')
@patch.object(rabbitmq_server_relations, 'service_restart')
@patch.object(rabbitmq_server_relations, 'peer_store')
@patch.object(rabbitmq_server_relations, 'peer_retrieve')
@patch('subprocess.check_output')
@patch.object(rabbitmq_server_relations, 'read_erlang_cookie')
@patch.object(rabbitmq_server_relations, 'write_erlang_cookie')
def test_check_erlang_cookie_on_series_upgrade__is_leader__unit_paused(
self,
write_erlang_cookie,
read_erlang_cookie,
subprocess_check_output,
peer_retrieve,
peer_store,
service_restart,
service_stop,
is_unit_paused_set,
log,
wait_app,
):
# check that a new cookie does have to be upgraded (e.g. it matches the
# 20 charmacters of A-Z. Also, the unit is paused.
self.is_leader.return_value = True
peer_retrieve.return_value = 'worse-cookie'
read_erlang_cookie.return_value = "ABCDEABCDEABCDEABCDE"
is_unit_paused_set.return_value = True
subprocess_check_output.return_value = b"really-good-cookie"
rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade()
log.assert_not_called()
subprocess_check_output.assert_called_once_with(
['openssl', 'rand', '-base64', '42'])
read_erlang_cookie.assert_called_once_with()
peer_store.assert_called_once_with('cookie', 'really-good-cookie')
service_stop.assert_called_once_with('rabbitmq-server')
write_erlang_cookie.assert_called_once_with('really-good-cookie')
is_unit_paused_set.assert_called_once_with()
service_restart.assert_not_called()
wait_app.assert_not_called()
@patch.object(rabbitmq_server_relations.rabbit, 'wait_app')
@patch.object(rabbitmq_server_relations, 'log')
@patch.object(rabbitmq_server_relations, 'is_unit_paused_set')
@patch.object(rabbitmq_server_relations, 'service_stop')
@patch.object(rabbitmq_server_relations, 'service_restart')
@patch.object(rabbitmq_server_relations, 'peer_store')
@patch.object(rabbitmq_server_relations, 'peer_retrieve')
@patch('subprocess.check_output')
@patch.object(rabbitmq_server_relations, 'read_erlang_cookie')
@patch.object(rabbitmq_server_relations, 'write_erlang_cookie')
def test_check_erlang_cookie_on_series_upgrade__is_leader__subp_error(
self,
write_erlang_cookie,
read_erlang_cookie,
subprocess_check_output,
peer_retrieve,
peer_store,
service_restart,
service_stop,
is_unit_paused_set,
log,
wait_app,
):
# check that a new cookie does have to be upgraded (e.g. it matches the
# 20 charmacters of A-Z. However, the subprocess fails and so no new
# cookie is generated.
self.is_leader.return_value = True
peer_retrieve.return_value = 'worse-cookie'
read_erlang_cookie.return_value = "ABCDEABCDEABCDEABCDE"
is_unit_paused_set.return_value = True
def _raise_error(*args, **kwargs):
raise subprocess.CalledProcessError(
cmd="some-command",
returncode=1,
stderr="went bang",
output="nothing good")
subprocess_check_output.side_effect = _raise_error
rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade()
log.assert_called_once_with(
"Couldn't generate a new /var/lib/rabbitmq/.erlang.cookie: reason:"
" Command 'some-command' returned non-zero exit status 1.",
level='ERROR')
subprocess_check_output.assert_called_once_with(
['openssl', 'rand', '-base64', '42'])
read_erlang_cookie.assert_called_once_with()
peer_store.assert_called_once_with('cookie', 'ABCDEABCDEABCDEABCDE')
service_stop.assert_not_called()
write_erlang_cookie.assert_not_called()
is_unit_paused_set.assert_not_called()
service_restart.assert_not_called()
wait_app.assert_not_called()
@patch.object(rabbitmq_server_relations.rabbit, 'wait_app')
@patch.object(rabbitmq_server_relations, 'log')
@patch.object(rabbitmq_server_relations, 'is_unit_paused_set')
@patch.object(rabbitmq_server_relations, 'service_stop')
@patch.object(rabbitmq_server_relations, 'service_restart')
@patch.object(rabbitmq_server_relations, 'peer_store')
@patch.object(rabbitmq_server_relations, 'peer_retrieve')
@patch('subprocess.check_output')
@patch.object(rabbitmq_server_relations, 'read_erlang_cookie')
@patch.object(rabbitmq_server_relations, 'write_erlang_cookie')
def test_check_erlang_cookie_on_series_upgrade__is_not_leader(
self,
write_erlang_cookie,
read_erlang_cookie,
subprocess_check_output,
peer_retrieve,
peer_store,
service_restart,
service_stop,
is_unit_paused_set,
log,
wait_app,
):
# Not the leader, so just verify if the cookie in the peer relation is
# different and if so, store it and restart the app (start by being
# paused).
self.is_leader.return_value = False
peer_retrieve.return_value = 'worse-cookie'
read_erlang_cookie.return_value = "ABCDEABCDEABCDEABCDE"
is_unit_paused_set.return_value = True
subprocess_check_output.return_value = b"really-good-cookie"
rabbitmq_server_relations.check_erlang_cookie_on_series_upgrade()
log.assert_not_called()
subprocess_check_output.assert_not_called()
read_erlang_cookie.assert_called_once_with()
peer_store.assert_not_called()
service_stop.assert_called_once_with('rabbitmq-server')
write_erlang_cookie.assert_called_once_with('worse-cookie')
is_unit_paused_set.assert_called_once_with()
service_restart.assert_not_called()
wait_app.assert_not_called()
@patch.object(rabbitmq_server_relations, 'leader_set')
@patch.object(rabbitmq_server_relations, 'leader_get')
@patch.object(rabbitmq_server_relations, 'apt_install')