From a50281db835fc139912f09fedb6e00dba9f333a5 Mon Sep 17 00:00:00 2001 From: Alex Kavanagh Date: Wed, 11 May 2022 12:48:41 +0100 Subject: [PATCH] Stop endless cycles on openstack upgrade If an HA cinder deployment is upgraded twice and the leadership has changed, there is a potential race that can be triggered where because the CINDER_DB_INIT_RKEY is different on the units that have been the leader. This race is that when the unit tries to decide whether it has reflected the 'new' RKEY it sees a different one from each unit. By clearing the rkey, it ensures that only the relation changed event from the leader actually causes a restart. Change-Id: I712502a666f23421cc58925f10b7643e3482d861 Closes-Bug: #1928383 --- hooks/cinder_utils.py | 6 +++++- unit_tests/test_cinder_utils.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/hooks/cinder_utils.py b/hooks/cinder_utils.py index a6f60ae9..7a989f0e 100644 --- a/hooks/cinder_utils.py +++ b/hooks/cinder_utils.py @@ -762,7 +762,11 @@ def check_local_db_actions_complete(): service_restart(svc) # Echo notification - relation_set(**{CINDER_DB_INIT_ECHO_RKEY: init_id}) + data = {CINDER_DB_INIT_ECHO_RKEY: init_id} + # BUG: #1928383 - clear CINDER_DB_INIT_RKEY if not the leader + if not(is_elected_leader(CLUSTER_RES)): + data[CINDER_DB_INIT_RKEY] = None + relation_set(**data) # NOTE(jamespage): Retry deals with sync issues during one-shot HA deploys. diff --git a/unit_tests/test_cinder_utils.py b/unit_tests/test_cinder_utils.py index e0127d71..6c83d1ad 100644 --- a/unit_tests/test_cinder_utils.py +++ b/unit_tests/test_cinder_utils.py @@ -907,6 +907,8 @@ class TestCinderUtils(CharmTestCase): else: return r_settings + # no clear of cinder_utils.CINDER_DB_INIT_RKEY + self.is_elected_leader.return_value = True self.relation_get.side_effect = mock_relation_get cinder_utils.check_local_db_actions_complete() self.assertFalse(self.relation_set.called) @@ -916,6 +918,34 @@ class TestCinderUtils(CharmTestCase): self.relation_set.assert_has_calls(calls) self.service_restart.assert_called_with('svc1') + @patch.object(cinder_utils, 'is_db_initialised') + @patch.object(cinder_utils, 'enabled_services') + @patch.object(cinder_utils, 'local_unit', lambda *args: 'unit/0') + def test_check_local_db_actions_complete_not_leader( + self, enabled_services, mock_is_db_initialised): + mock_is_db_initialised.return_value = True + enabled_services.return_value = ['svc1'] + r_settings = {} + + def mock_relation_get(unit=None, rid=None, attribute=None): + if attribute: + return r_settings.get(attribute) + else: + return r_settings + + # ensure clear of cinder_utils.CINDER_DB_INIT_RKEY + self.is_elected_leader.return_value = False + self.relation_get.side_effect = mock_relation_get + cinder_utils.check_local_db_actions_complete() + self.assertFalse(self.relation_set.called) + r_settings = {'cinder-db-initialised': 'unit/1-1234'} + cinder_utils.check_local_db_actions_complete() + calls = [call( + **{'cinder-db-initialised-echo': 'unit/1-1234', + cinder_utils.CINDER_DB_INIT_RKEY: None})] + self.relation_set.assert_has_calls(calls) + self.service_restart.assert_called_with('svc1') + @patch('subprocess.check_output') def test_log_lvm_info(self, _check): output = b"some output"