From bf14c725bbb6ce710a45c55d1dd00eee2fb53f62 Mon Sep 17 00:00:00 2001 From: Rodolfo Alonso Hernandez Date: Tue, 2 Feb 2021 17:25:03 +0000 Subject: [PATCH] Stop metadata proxy gracefully HAProxy supports hard stop [1] via SIGTERM signal. From the documentation: """ ... when the SIGTERM signal is sent to the haproxy process, it immediately quits and all established connections are closed. """ In case the process does not finish, the SIGKILL signal is sent. The PID file created by the process is deleted. [1]https://cbonte.github.io/haproxy-dconv/2.0/management.html#4 Closes-Bug: #1910691 Change-Id: Ifa3734e8eb4e52b1a132c3351ecc2e15463298bb --- neutron/agent/metadata/driver.py | 17 +++++++- neutron/tests/unit/agent/dhcp/test_agent.py | 10 +++-- .../tests/unit/agent/metadata/test_driver.py | 43 +++++++++++++++++++ 3 files changed, 64 insertions(+), 6 deletions(-) diff --git a/neutron/agent/metadata/driver.py b/neutron/agent/metadata/driver.py index cac59b499ec..77e1d8eb033 100644 --- a/neutron/agent/metadata/driver.py +++ b/neutron/agent/metadata/driver.py @@ -16,6 +16,7 @@ import grp import os import pwd +import signal from neutron_lib.callbacks import events from neutron_lib.callbacks import registry @@ -32,10 +33,13 @@ from neutron.agent.l3 import namespaces from neutron.agent.linux import external_process from neutron.agent.linux import ip_lib from neutron.agent.linux import utils as linux_utils +from neutron.common import utils as common_utils LOG = logging.getLogger(__name__) +SIGTERM_TIMEOUT = 5 + METADATA_SERVICE_NAME = 'metadata-proxy' HAPROXY_SERVICE = 'haproxy' @@ -273,10 +277,19 @@ class MetadataDriver(object): monitor.unregister(uuid, METADATA_SERVICE_NAME) pm = cls._get_metadata_proxy_process_manager(uuid, conf, ns_name=ns_name) - pm.disable() + pm.disable(sig=str(int(signal.SIGTERM))) + try: + common_utils.wait_until_true(lambda: not pm.active, + timeout=SIGTERM_TIMEOUT) + except common_utils.WaitTimeout: + LOG.warning('Metadata process %s did not finish after SIGTERM ' + 'signal in %s seconds, sending SIGKILL signal', + pm.pid, SIGTERM_TIMEOUT) + pm.disable(sig=str(int(signal.SIGKILL))) - # Delete metadata proxy config file + # Delete metadata proxy config and PID files. HaproxyConfigurator.cleanup_config_file(uuid, cfg.CONF.state_path) + linux_utils.delete_if_exists(pm.get_pid_file_name(), run_as_root=True) cls.monitors.pop(uuid, None) diff --git a/neutron/tests/unit/agent/dhcp/test_agent.py b/neutron/tests/unit/agent/dhcp/test_agent.py index 9cdcf83d6a4..70079920abe 100644 --- a/neutron/tests/unit/agent/dhcp/test_agent.py +++ b/neutron/tests/unit/agent/dhcp/test_agent.py @@ -16,6 +16,7 @@ import collections import copy import datetime +import signal import sys from unittest import mock import uuid @@ -561,7 +562,8 @@ class TestDhcpAgent(base.BaseTestCase): set(range(sync_max, sync_max + port_count))) self.assertEqual(all_ports, ports_ready) - def test_dhcp_ready_ports_updates_after_enable_dhcp(self): + @mock.patch.object(linux_utils, 'delete_if_exists') + def test_dhcp_ready_ports_updates_after_enable_dhcp(self, *args): with mock.patch('neutron.agent.linux.ip_lib.' 'IpAddrCommand.wait_until_address_ready') as mock_wait: mock_wait.return_value = True @@ -819,7 +821,7 @@ class TestDhcpAgentEventHandler(base.BaseTestCase): else: self.external_process.assert_has_calls([ self._process_manager_constructor_call(), - mock.call().disable()]) + mock.call().disable(sig=str(int(signal.SIGTERM)))]) def test_enable_dhcp_helper_enable_metadata_isolated_network(self): self._enable_dhcp_helper(isolated_network, @@ -938,7 +940,7 @@ class TestDhcpAgentEventHandler(base.BaseTestCase): self.call_driver.assert_called_once_with('disable', fake_network) self.external_process.assert_has_calls([ self._process_manager_constructor_call(), - mock.call().disable()]) + mock.call().disable(sig=str(int(signal.SIGTERM)))]) def test_disable_dhcp_helper_known_network_isolated_metadata(self): self._disable_dhcp_helper_known_network(isolated_metadata=True) @@ -967,7 +969,7 @@ class TestDhcpAgentEventHandler(base.BaseTestCase): [mock.call.get_network_by_id(fake_network.id)]) self.external_process.assert_has_calls([ self._process_manager_constructor_call(), - mock.call().disable() + mock.call().disable(sig=str(int(signal.SIGTERM))) ]) def test_disable_dhcp_helper_driver_failure_isolated_metadata(self): diff --git a/neutron/tests/unit/agent/metadata/test_driver.py b/neutron/tests/unit/agent/metadata/test_driver.py index 40f3c73ca4b..20815002591 100644 --- a/neutron/tests/unit/agent/metadata/test_driver.py +++ b/neutron/tests/unit/agent/metadata/test_driver.py @@ -14,6 +14,7 @@ # under the License. import os +import signal from unittest import mock from neutron_lib import constants @@ -24,6 +25,7 @@ from oslo_utils import uuidutils from neutron.agent.l3 import agent as l3_agent from neutron.agent.l3 import router_info from neutron.agent.linux import iptables_manager +from neutron.agent.linux import utils as linux_utils from neutron.agent.metadata import driver as metadata_driver from neutron.conf.agent import common as agent_config from neutron.conf.agent.l3 import config as l3_config @@ -35,6 +37,12 @@ from neutron.tests.unit.agent.linux import test_utils _uuid = uuidutils.generate_uuid +class FakeL3NATAgent(object): + + def __init__(self): + self.conf = cfg.CONF + + class TestMetadataDriverRules(base.BaseTestCase): def test_metadata_nat_rules(self): @@ -80,6 +88,11 @@ class TestMetadataDriverProcess(base.BaseTestCase): mock.patch('neutron.agent.l3.agent.L3PluginApi').start() mock.patch('neutron.agent.l3.ha.AgentMixin' '._init_ha_conf_path').start() + self.delete_if_exists = mock.patch.object(linux_utils, + 'delete_if_exists') + self.mock_get_process = mock.patch.object( + metadata_driver.MetadataDriver, + '_get_metadata_proxy_process_manager') l3_config.register_l3_agent_config_opts(l3_config.OPTS, cfg.CONF) ha_conf.register_l3_agent_ha_opts() @@ -209,3 +222,33 @@ class TestMetadataDriverProcess(base.BaseTestCase): mock.ANY, mock.ANY) self.assertRaises(metadata_driver.InvalidUserOrGroupException, config.create_config_file) + + def test_destroy_monitored_metadata_proxy(self): + delete_if_exists = self.delete_if_exists.start() + mproxy_process = mock.Mock( + active=False, get_pid_file_name=mock.Mock(return_value='pid_file')) + mock_get_process = self.mock_get_process.start() + mock_get_process.return_value = mproxy_process + driver = metadata_driver.MetadataDriver(FakeL3NATAgent()) + driver.destroy_monitored_metadata_proxy(mock.Mock(), 'uuid', 'conf', + 'ns_name') + mproxy_process.disable.assert_called_once_with( + sig=str(int(signal.SIGTERM))) + delete_if_exists.assert_has_calls([ + mock.call('pid_file', run_as_root=True)]) + + def test_destroy_monitored_metadata_proxy_force(self): + delete_if_exists = self.delete_if_exists.start() + mproxy_process = mock.Mock( + active=True, get_pid_file_name=mock.Mock(return_value='pid_file')) + mock_get_process = self.mock_get_process.start() + mock_get_process.return_value = mproxy_process + driver = metadata_driver.MetadataDriver(FakeL3NATAgent()) + with mock.patch.object(metadata_driver, 'SIGTERM_TIMEOUT', 0): + driver.destroy_monitored_metadata_proxy(mock.Mock(), 'uuid', + 'conf', 'ns_name') + mproxy_process.disable.assert_has_calls([ + mock.call(sig=str(int(signal.SIGTERM))), + mock.call(sig=str(int(signal.SIGKILL)))]) + delete_if_exists.assert_has_calls([ + mock.call('pid_file', run_as_root=True)])