Kill the vrrp orphan process when (re)spawn keepalived
When keepalived crashed unexpectedly, the vrrp process that it associates with will be orphan process. This will make the VIP unable to migrate to the router in the same host. Also, neutron code is not able to respawn the keepalived process, because keepalived thinks itself is still running, according to [1-3]. As a result, neutron will report respawning keepalived all the time. Restart l3-agent will not help. This patch will check and delete the orphan vrrp process if there is any, in the processmonitor of l3 agent. More details can be found in the bug description and comments. [1] https://goo.gl/W3GL9I [2] https://goo.gl/F0Ixfb [3] https://goo.gl/dUqhTo Change-Id: Ia1759ed1365b845d404686a8cd25f882cce35caf Closes-Bug: #1511311
This commit is contained in:
parent
709d7d76b1
commit
49ac0c477c
|
@ -336,19 +336,18 @@ class KeepalivedManager(object):
|
|||
def spawn(self):
|
||||
config_path = self._output_config_file()
|
||||
|
||||
def callback(pid_file):
|
||||
cmd = ['keepalived', '-P',
|
||||
'-f', config_path,
|
||||
'-p', pid_file,
|
||||
'-r', '%s-vrrp' % pid_file]
|
||||
return cmd
|
||||
keepalived_pm = self.get_process()
|
||||
vrrp_pm = self._get_vrrp_process(
|
||||
'%s-vrrp' % keepalived_pm.get_pid_file_name())
|
||||
|
||||
pm = self.get_process(callback=callback)
|
||||
pm.enable(reload_cfg=True)
|
||||
keepalived_pm.default_cmd_callback = (
|
||||
self._get_keepalived_process_callback(vrrp_pm, config_path))
|
||||
|
||||
keepalived_pm.enable(reload_cfg=True)
|
||||
|
||||
self.process_monitor.register(uuid=self.resource_id,
|
||||
service_name=KEEPALIVED_SERVICE_NAME,
|
||||
monitored_process=pm)
|
||||
monitored_process=keepalived_pm)
|
||||
|
||||
LOG.debug('Keepalived spawned with config %s', config_path)
|
||||
|
||||
|
@ -366,3 +365,27 @@ class KeepalivedManager(object):
|
|||
self.namespace,
|
||||
pids_path=self.conf_path,
|
||||
default_cmd_callback=callback)
|
||||
|
||||
def _get_vrrp_process(self, pid_file):
|
||||
return external_process.ProcessManager(
|
||||
cfg.CONF,
|
||||
self.resource_id,
|
||||
self.namespace,
|
||||
pid_file=pid_file)
|
||||
|
||||
def _get_keepalived_process_callback(self, vrrp_pm, config_path):
|
||||
|
||||
def callback(pid_file):
|
||||
# If keepalived process crashed unexpectedly, the vrrp process
|
||||
# will be orphan and prevent keepalived process to be spawned.
|
||||
# A check here will let the l3-agent to kill the orphan process
|
||||
# and spawn keepalived successfully.
|
||||
if vrrp_pm.active:
|
||||
vrrp_pm.disable()
|
||||
cmd = ['keepalived', '-P',
|
||||
'-f', config_path,
|
||||
'-p', pid_file,
|
||||
'-r', '%s-vrrp' % pid_file]
|
||||
return cmd
|
||||
|
||||
return callback
|
||||
|
|
|
@ -49,15 +49,25 @@ class KeepalivedManagerTestCase(base.BaseTestCase,
|
|||
self.assertEqual(self.expected_config.get_config_str(),
|
||||
self.manager.get_conf_on_disk())
|
||||
|
||||
def test_keepalived_respawns(self):
|
||||
def _test_keepalived_respawns(self, normal_exit=True):
|
||||
self.manager.spawn()
|
||||
process = self.manager.get_process()
|
||||
pid = process.pid
|
||||
self.assertTrue(process.active)
|
||||
|
||||
process.disable(sig='15')
|
||||
exit_code = '-15' if normal_exit else '-9'
|
||||
|
||||
# Exit the process, and see that when it comes back
|
||||
# It's indeed a different process
|
||||
utils.execute(['kill', exit_code, pid], run_as_root=True)
|
||||
utils.wait_until_true(
|
||||
lambda: process.active,
|
||||
timeout=5,
|
||||
sleep=0.01,
|
||||
exception=RuntimeError(_("Keepalived didn't respawn")))
|
||||
|
||||
def test_keepalived_respawns(self):
|
||||
self._test_keepalived_respawns()
|
||||
|
||||
def test_keepalived_respawn_with_unexpected_exit(self):
|
||||
self._test_keepalived_respawns(False)
|
||||
|
|
Loading…
Reference in New Issue