From 56c591996bdf3a2bda8855df4af5e4011779e5a2 Mon Sep 17 00:00:00 2001 From: Slawek Kaplonski Date: Sun, 10 Mar 2019 22:45:15 +0100 Subject: [PATCH] Set initial ha router state in neutron-keepalived-state-change Sometimes in case of HA routers it may happend that keepalived will set status of router to MASTER before neutron-keepalived-state-change daemon will spawn "ip monitor" to monitor changes of IPs in router's namespace. In such case neutron-keepalived-state-change process will never notice that keepalived set router to be MASTER and L3 agent will not be notified about that so router will not be configured properly. To avoid such race condition neutron-keepalived-state-change will now check if VIP address is already configured on ha interface before it will spawn "ip monitor". If it is already configured by keepalived, it will notify L3 agent that router is set to MASTER. Change-Id: Ie3fe825d65408fc969c478767b411fe0156e9fbc Closes-Bug: #1818614 (cherry picked from commit 8fec1ffc833eba9b3fc5f812bf881f44b4beba0c) --- neutron/agent/l3/ha_router.py | 5 +++- neutron/agent/l3/keepalived_state_change.py | 22 ++++++++++++++ .../tests/functional/agent/l3/framework.py | 6 ++++ .../agent/l3/test_keepalived_state_change.py | 30 +++++++++++++++++-- 4 files changed, 60 insertions(+), 3 deletions(-) diff --git a/neutron/agent/l3/ha_router.py b/neutron/agent/l3/ha_router.py index 9da1549402a..b1839913e09 100644 --- a/neutron/agent/l3/ha_router.py +++ b/neutron/agent/l3/ha_router.py @@ -372,7 +372,10 @@ class HaRouter(router.RouterInfo): '--pid_file=%s' % pid_file, '--state_path=%s' % self.agent_conf.state_path, '--user=%s' % os.geteuid(), - '--group=%s' % os.getegid()] + '--group=%s' % os.getegid(), + '--AGENT-root_helper=%s' % self.agent_conf.AGENT.root_helper, + '--AGENT-root_helper_daemon=%s' % + self.agent_conf.AGENT.root_helper_daemon] return cmd return callback diff --git a/neutron/agent/l3/keepalived_state_change.py b/neutron/agent/l3/keepalived_state_change.py index 43e6821ec75..3ae2a8630a9 100644 --- a/neutron/agent/l3/keepalived_state_change.py +++ b/neutron/agent/l3/keepalived_state_change.py @@ -28,6 +28,7 @@ from neutron.agent.linux import ip_lib from neutron.agent.linux import ip_monitor from neutron.agent.linux import utils as agent_utils from neutron.common import config +from neutron.conf.agent import common as agent_config from neutron.conf.agent.l3 import keepalived @@ -66,6 +67,7 @@ class MonitorDaemon(daemon.Daemon): # as root if not run_as_root: super(MonitorDaemon, self).run() + self.handle_initial_state() for iterable in self.monitor: self.parse_and_handle_event(iterable) @@ -89,6 +91,23 @@ class MonitorDaemon(daemon.Daemon): LOG.exception('Failed to process or handle event for line %s', iterable) + def handle_initial_state(self): + try: + state = 'backup' + ip = ip_lib.IPDevice(self.interface, self.namespace) + for address in ip.addr.list(): + if address.get('cidr') == self.cidr: + state = 'master' + self.write_state_change(state) + self.notify_agent(state) + break + + LOG.debug('Initial status of router %s is %s', + self.router_id, state) + except Exception: + LOG.exception('Failed to get initial status of router %s', + self.router_id) + def write_state_change(self, state): with open(os.path.join( self.conf_dir, 'state'), 'w') as state_file: @@ -140,9 +159,12 @@ def configure(conf): conf.set_override('debug', True) conf.set_override('use_syslog', True) config.setup_logging() + agent_config.setup_privsep() def main(): + agent_config.register_root_helper(cfg.CONF) + cfg.CONF.register_cli_opts(agent_config.ROOT_HELPER_OPTS, 'AGENT') keepalived.register_cli_l3_agent_keepalived_opts() keepalived.register_l3_agent_keepalived_opts() configure(cfg.CONF) diff --git a/neutron/tests/functional/agent/l3/framework.py b/neutron/tests/functional/agent/l3/framework.py index 51422b70bdd..8c7c96cfc30 100644 --- a/neutron/tests/functional/agent/l3/framework.py +++ b/neutron/tests/functional/agent/l3/framework.py @@ -71,6 +71,7 @@ class L3AgentTestFramework(base.BaseSudoTestCase): config.register_opts(common_config.core_cli_opts) logging.register_options(config) agent_config.register_process_monitor_opts(config) + agent_config.register_root_helper(config) return config def _configure_agent(self, host, agent_mode='dvr_snat'): @@ -97,6 +98,11 @@ class L3AgentTestFramework(base.BaseSudoTestCase): get_temp_file_path('external/pids')) conf.set_override('host', host) conf.set_override('agent_mode', agent_mode) + conf.set_override( + 'root_helper', cfg.CONF.AGENT.root_helper, group='AGENT') + conf.set_override( + 'root_helper_daemon', cfg.CONF.AGENT.root_helper_daemon, + group='AGENT') return conf diff --git a/neutron/tests/functional/agent/l3/test_keepalived_state_change.py b/neutron/tests/functional/agent/l3/test_keepalived_state_change.py index 5b481f92e10..33976cdd7b9 100644 --- a/neutron/tests/functional/agent/l3/test_keepalived_state_change.py +++ b/neutron/tests/functional/agent/l3/test_keepalived_state_change.py @@ -45,13 +45,13 @@ class TestKeepalivedStateChange(base.BaseSudoTestCase): self.router_id = uuidutils.generate_uuid() self.conf_dir = self.get_default_temp_dir().path self.cidr = '169.254.128.1/24' - self.interface_name = 'interface' + self.interface_name = utils.get_rand_name() self.monitor = keepalived_state_change.MonitorDaemon( self.get_temp_file_path('monitor.pid'), self.router_id, 1, 2, - 'namespace', + utils.get_rand_name(), self.conf_dir, self.interface_name, self.cidr) @@ -83,6 +83,32 @@ class TestKeepalivedStateChange(base.BaseSudoTestCase): self.monitor, 'notify_agent', side_effect=Exception): self.monitor.parse_and_handle_event(self.line) + def test_handle_initial_state_backup(self): + ip = ip_lib.IPWrapper(namespace=self.monitor.namespace) + ip.netns.add(self.monitor.namespace) + self.addCleanup(ip.netns.delete, self.monitor.namespace) + ip.add_dummy(self.interface_name) + + with mock.patch.object( + self.monitor, 'write_state_change') as write_state_change,\ + mock.patch.object( + self.monitor, 'notify_agent') as notify_agent: + + self.monitor.handle_initial_state() + write_state_change.assert_not_called() + notify_agent.assert_not_called() + + def test_handle_initial_state_master(self): + ip = ip_lib.IPWrapper(namespace=self.monitor.namespace) + ip.netns.add(self.monitor.namespace) + self.addCleanup(ip.netns.delete, self.monitor.namespace) + ha_interface = ip.add_dummy(self.interface_name) + + ha_interface.addr.add(self.cidr) + + self.monitor.handle_initial_state() + self.assertEqual('master', self._get_state()) + class TestMonitorDaemon(base.BaseSudoTestCase): def setUp(self):