Kill the vrrp orphan process when (re)spawn keepalived

When keepalived crashed unexpectedly, the vrrp process that
it associates with will be orphan process. This will make
the VIP unable to migrate to the router in the same host.
Also, neutron code is not able to respawn the keepalived
process, because keepalived thinks itself is still running,
according to [1-3]. As a result, neutron will report respawning
keepalived all the time. Restart l3-agent will not help.

This patch will check and delete the orphan vrrp process
if there is any, in the processmonitor of l3 agent.

More details can be found in the bug description and comments.

[1] https://goo.gl/W3GL9I
[2] https://goo.gl/F0Ixfb
[3] https://goo.gl/dUqhTo

Change-Id: Ia1759ed1365b845d404686a8cd25f882cce35caf
Closes-Bug: #1511311
This commit is contained in:
Hong Hui Xiao 2015-11-04 01:44:43 -05:00 committed by Ihar Hrachyshka
parent 709d7d76b1
commit 49ac0c477c
2 changed files with 44 additions and 11 deletions

View File

@ -336,19 +336,18 @@ class KeepalivedManager(object):
def spawn(self):
config_path = self._output_config_file()
def callback(pid_file):
cmd = ['keepalived', '-P',
'-f', config_path,
'-p', pid_file,
'-r', '%s-vrrp' % pid_file]
return cmd
keepalived_pm = self.get_process()
vrrp_pm = self._get_vrrp_process(
'%s-vrrp' % keepalived_pm.get_pid_file_name())
pm = self.get_process(callback=callback)
pm.enable(reload_cfg=True)
keepalived_pm.default_cmd_callback = (
self._get_keepalived_process_callback(vrrp_pm, config_path))
keepalived_pm.enable(reload_cfg=True)
self.process_monitor.register(uuid=self.resource_id,
service_name=KEEPALIVED_SERVICE_NAME,
monitored_process=pm)
monitored_process=keepalived_pm)
LOG.debug('Keepalived spawned with config %s', config_path)
@ -366,3 +365,27 @@ class KeepalivedManager(object):
self.namespace,
pids_path=self.conf_path,
default_cmd_callback=callback)
def _get_vrrp_process(self, pid_file):
return external_process.ProcessManager(
cfg.CONF,
self.resource_id,
self.namespace,
pid_file=pid_file)
def _get_keepalived_process_callback(self, vrrp_pm, config_path):
def callback(pid_file):
# If keepalived process crashed unexpectedly, the vrrp process
# will be orphan and prevent keepalived process to be spawned.
# A check here will let the l3-agent to kill the orphan process
# and spawn keepalived successfully.
if vrrp_pm.active:
vrrp_pm.disable()
cmd = ['keepalived', '-P',
'-f', config_path,
'-p', pid_file,
'-r', '%s-vrrp' % pid_file]
return cmd
return callback

View File

@ -49,15 +49,25 @@ class KeepalivedManagerTestCase(base.BaseTestCase,
self.assertEqual(self.expected_config.get_config_str(),
self.manager.get_conf_on_disk())
def test_keepalived_respawns(self):
def _test_keepalived_respawns(self, normal_exit=True):
self.manager.spawn()
process = self.manager.get_process()
pid = process.pid
self.assertTrue(process.active)
process.disable(sig='15')
exit_code = '-15' if normal_exit else '-9'
# Exit the process, and see that when it comes back
# It's indeed a different process
utils.execute(['kill', exit_code, pid], run_as_root=True)
utils.wait_until_true(
lambda: process.active,
timeout=5,
sleep=0.01,
exception=RuntimeError(_("Keepalived didn't respawn")))
def test_keepalived_respawns(self):
self._test_keepalived_respawns()
def test_keepalived_respawn_with_unexpected_exit(self):
self._test_keepalived_respawns(False)