From 164aed0e676b0fcdbc74a0f5ef26b4c70b70e03e Mon Sep 17 00:00:00 2001 From: Gregory Thiemonge Date: Wed, 11 Dec 2019 17:09:10 +0100 Subject: [PATCH] Fix house keeping graceful shutdown SIGTERM was not caught by house keeping service, threads were not correctly terminated and spare amphora creation may have been killed when restarting the service, leaving amphorae in BOOTING status. Story: 2007008 Task: 37788 Change-Id: Ib3d6aa0f2fd550c3a8756c41e0159c5ae8127c7c (cherry picked from commit df0ba06c1e96dc5ad4855d24435970ee926175ca) --- octavia/cmd/house_keeping.py | 23 ++++++++++++------- octavia/tests/unit/cmd/test_house_keeping.py | 20 +++++++--------- ...use-keeping-shutdown-17b04417a2c4849f.yaml | 6 +++++ 3 files changed, 29 insertions(+), 20 deletions(-) create mode 100644 releasenotes/notes/fix-house-keeping-shutdown-17b04417a2c4849f.yaml diff --git a/octavia/cmd/house_keeping.py b/octavia/cmd/house_keeping.py index fea61eb433..0cb9a63a6d 100644 --- a/octavia/cmd/house_keeping.py +++ b/octavia/cmd/house_keeping.py @@ -17,7 +17,6 @@ import datetime import signal import sys import threading -import time from oslo_config import cfg from oslo_log import log as logging @@ -104,28 +103,27 @@ def main(): timestamp = str(datetime.datetime.utcnow()) LOG.info("Starting house keeping at %s", timestamp) + threads = [] + # Thread to perform spare amphora check spare_amp_thread = threading.Thread(target=spare_amphora_check) spare_amp_thread.daemon = True spare_amp_thread.start() + threads.append(spare_amp_thread) # Thread to perform db cleanup db_cleanup_thread = threading.Thread(target=db_cleanup) db_cleanup_thread.daemon = True db_cleanup_thread.start() + threads.append(db_cleanup_thread) # Thread to perform certificate rotation cert_rotate_thread = threading.Thread(target=cert_rotation) cert_rotate_thread.daemon = True cert_rotate_thread.start() + threads.append(cert_rotate_thread) - signal.signal(signal.SIGHUP, _mutate_config) - - # Try-Exception block should be at the end to gracefully exit threads - try: - while True: - time.sleep(1) - except KeyboardInterrupt: + def process_cleanup(*args, **kwargs): LOG.info("Attempting to gracefully terminate House-Keeping") spare_amp_thread_event.set() db_cleanup_thread_event.set() @@ -134,3 +132,12 @@ def main(): db_cleanup_thread.join() cert_rotate_thread.join() LOG.info("House-Keeping process terminated") + + signal.signal(signal.SIGTERM, process_cleanup) + signal.signal(signal.SIGHUP, _mutate_config) + + try: + for thread in threads: + thread.join() + except KeyboardInterrupt: + process_cleanup() diff --git a/octavia/tests/unit/cmd/test_house_keeping.py b/octavia/tests/unit/cmd/test_house_keeping.py index 4d5656c4d7..7d06de7598 100644 --- a/octavia/tests/unit/cmd/test_house_keeping.py +++ b/octavia/tests/unit/cmd/test_house_keeping.py @@ -109,7 +109,6 @@ class TestHouseKeepingCMD(base.TestCase): mock_CertRotation.assert_called_once_with() self.assertEqual(1, cert_rotate_mock.rotate.call_count) - @mock.patch('time.sleep') @mock.patch('octavia.cmd.house_keeping.cert_rotate_thread_event') @mock.patch('octavia.cmd.house_keeping.db_cleanup_thread_event') @mock.patch('octavia.cmd.house_keeping.spare_amp_thread_event') @@ -118,7 +117,7 @@ class TestHouseKeepingCMD(base.TestCase): def test_main(self, mock_service, mock_thread, spare_amp_thread_event_mock, db_cleanup_thread_event_mock, - cert_rotate_thread_event_mock, sleep_time): + cert_rotate_thread_event_mock): spare_amp_thread_mock = mock.MagicMock() db_cleanup_thread_mock = mock.MagicMock() @@ -132,9 +131,7 @@ class TestHouseKeepingCMD(base.TestCase): db_cleanup_thread_mock.daemon.return_value = True cert_rotate_thread_mock.daemon.return_value = True - # mock the time.sleep() in the while loop - sleep_time.side_effect = [True, Exception('break')] - self.assertRaisesRegex(Exception, 'break', house_keeping.main) + house_keeping.main() spare_amp_thread_mock.start.assert_called_once_with() db_cleanup_thread_mock.start.assert_called_once_with() @@ -144,7 +141,6 @@ class TestHouseKeepingCMD(base.TestCase): self.assertTrue(db_cleanup_thread_mock.daemon) self.assertTrue(cert_rotate_thread_mock.daemon) - @mock.patch('time.sleep') @mock.patch('octavia.cmd.house_keeping.cert_rotate_thread_event') @mock.patch('octavia.cmd.house_keeping.db_cleanup_thread_event') @mock.patch('octavia.cmd.house_keeping.spare_amp_thread_event') @@ -153,8 +149,7 @@ class TestHouseKeepingCMD(base.TestCase): def test_main_keyboard_interrupt(self, mock_service, mock_thread, spare_amp_thread_event_mock, db_cleanup_thread_event_mock, - cert_rotate_thread_event_mock, - sleep_time): + cert_rotate_thread_event_mock): spare_amp_thread_mock = mock.MagicMock() db_cleanup_thread_mock = mock.MagicMock() cert_rotate_thread_mock = mock.MagicMock() @@ -167,8 +162,10 @@ class TestHouseKeepingCMD(base.TestCase): db_cleanup_thread_mock.daemon.return_value = True cert_rotate_thread_mock.daemon.return_value = True - # mock the time.sleep() in the while loop - sleep_time.side_effect = [True, KeyboardInterrupt] + mock_join = mock.MagicMock() + mock_join.side_effect = [KeyboardInterrupt, None] + spare_amp_thread_mock.join = mock_join + house_keeping.main() spare_amp_thread_event_mock.set.assert_called_once_with() @@ -184,8 +181,7 @@ class TestHouseKeepingCMD(base.TestCase): self.assertTrue(spare_amp_thread_mock.daemon) self.assertTrue(db_cleanup_thread_mock.daemon) self.assertTrue(cert_rotate_thread_mock.daemon) - - spare_amp_thread_mock.join.assert_called_once_with() + self.assertEqual(2, spare_amp_thread_mock.join.call_count) db_cleanup_thread_mock.join.assert_called_once_with() cert_rotate_thread_mock.join.assert_called_once_with() diff --git a/releasenotes/notes/fix-house-keeping-shutdown-17b04417a2c4849f.yaml b/releasenotes/notes/fix-house-keeping-shutdown-17b04417a2c4849f.yaml new file mode 100644 index 0000000000..0fd7aef9e6 --- /dev/null +++ b/releasenotes/notes/fix-house-keeping-shutdown-17b04417a2c4849f.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fix a bug that could interrupt resource creation when performing a graceful + shutdown of the house keeping service and leave resources such as amphorae + in a BOOTING status.