Kill neutron-keepalived-state-change gracefully
When HA routers are deleted, neutron-keepalived-state-change was killed by l3/ha_router using SIGKILL and orphaning its child process 'ip -o monitor'. This patch implements a way to kill it gracefully through SIGTERM, and also sending a SIGKILL if keepalived-state-change didn't die after a 10 seconds timeout. The SIGTERM handler in keepalived-state-change kills the ip monitor process using SIGKILL. It kills ip_monitor PID using kill instead of through IPMonitor.stop() because the monitor runs as root and keepalived-state-change doesn't (it drops its privileges after launching the monitor). Doing so, would lead to a "Permission denied". We can safely kill the monitor this way since it was started with respawn_interval set to None which means that it won't be automatically respawned. Closes-Bug: #1632099 Change-Id: I7385172a007fdd252ea3a1c03c58064160d07e9e
This commit is contained in:
parent
4f8a106cb5
commit
17c2b45d91
|
@ -14,7 +14,9 @@
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
import signal
|
||||||
|
|
||||||
|
import eventlet
|
||||||
import netaddr
|
import netaddr
|
||||||
from neutron_lib import constants as n_consts
|
from neutron_lib import constants as n_consts
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
|
@ -31,6 +33,7 @@ from neutron.extensions import portbindings
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
HA_DEV_PREFIX = 'ha-'
|
HA_DEV_PREFIX = 'ha-'
|
||||||
IP_MONITOR_PROCESS_SERVICE = 'ip_monitor'
|
IP_MONITOR_PROCESS_SERVICE = 'ip_monitor'
|
||||||
|
SIGTERM_TIMEOUT = 10
|
||||||
|
|
||||||
|
|
||||||
class HaRouterNamespace(namespaces.RouterNamespace):
|
class HaRouterNamespace(namespaces.RouterNamespace):
|
||||||
|
@ -342,7 +345,12 @@ class HaRouter(router.RouterInfo):
|
||||||
pm = self._get_state_change_monitor_process_manager()
|
pm = self._get_state_change_monitor_process_manager()
|
||||||
process_monitor.unregister(
|
process_monitor.unregister(
|
||||||
self.router_id, IP_MONITOR_PROCESS_SERVICE)
|
self.router_id, IP_MONITOR_PROCESS_SERVICE)
|
||||||
pm.disable()
|
pm.disable(sig=str(int(signal.SIGTERM)))
|
||||||
|
try:
|
||||||
|
common_utils.wait_until_true(lambda: not pm.active,
|
||||||
|
timeout=SIGTERM_TIMEOUT)
|
||||||
|
except eventlet.timeout.Timeout:
|
||||||
|
pm.disable(sig=str(int(signal.SIGKILL)))
|
||||||
|
|
||||||
def update_initial_state(self, callback):
|
def update_initial_state(self, callback):
|
||||||
ha_device = ip_lib.IPDevice(
|
ha_device = ip_lib.IPDevice(
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import signal
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import httplib2
|
import httplib2
|
||||||
|
@ -118,6 +119,21 @@ class MonitorDaemon(daemon.Daemon):
|
||||||
log_exception=False
|
log_exception=False
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _kill_monitor(self):
|
||||||
|
if self.monitor:
|
||||||
|
# Kill PID instead of calling self.monitor.stop() because the ip
|
||||||
|
# monitor is running as root while keepalived-state-change is not
|
||||||
|
# (dropped privileges after launching the ip monitor) and will fail
|
||||||
|
# with "Permission denied". Also, we can safely do this because the
|
||||||
|
# monitor was launched with respawn_interval=None so it won't be
|
||||||
|
# automatically respawned
|
||||||
|
agent_utils.kill_process(self.monitor.pid, signal.SIGKILL,
|
||||||
|
run_as_root=True)
|
||||||
|
|
||||||
|
def handle_sigterm(self, signum, frame):
|
||||||
|
self._kill_monitor()
|
||||||
|
super(MonitorDaemon, self).handle_sigterm(signum, frame)
|
||||||
|
|
||||||
|
|
||||||
def configure(conf):
|
def configure(conf):
|
||||||
config.init(sys.argv[1:])
|
config.init(sys.argv[1:])
|
||||||
|
|
|
@ -12,6 +12,8 @@
|
||||||
# License for the specific language governing permissions and limitations
|
# License for the specific language governing permissions and limitations
|
||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
|
import signal
|
||||||
|
|
||||||
import mock
|
import mock
|
||||||
from oslo_utils import uuidutils
|
from oslo_utils import uuidutils
|
||||||
|
|
||||||
|
@ -84,3 +86,29 @@ class TestBasicRouterOperations(base.BaseTestCase):
|
||||||
|
|
||||||
ri.remove_floating_ip(device, fip_cidr)
|
ri.remove_floating_ip(device, fip_cidr)
|
||||||
self.assertTrue(super_remove_floating_ip.called)
|
self.assertTrue(super_remove_floating_ip.called)
|
||||||
|
|
||||||
|
def test_destroy_state_change_monitor_ok(self):
|
||||||
|
ri = self._create_router(mock.MagicMock())
|
||||||
|
with mock.patch.object(ri,
|
||||||
|
'_get_state_change_monitor_process_manager')\
|
||||||
|
as m_get_state:
|
||||||
|
mock_pm = m_get_state.return_value
|
||||||
|
mock_pm.active = False
|
||||||
|
ri.destroy_state_change_monitor(mock_pm)
|
||||||
|
|
||||||
|
mock_pm.disable.assert_called_once_with(
|
||||||
|
sig=str(int(signal.SIGTERM)))
|
||||||
|
|
||||||
|
def test_destroy_state_change_monitor_force(self):
|
||||||
|
ri = self._create_router(mock.MagicMock())
|
||||||
|
with mock.patch.object(ri,
|
||||||
|
'_get_state_change_monitor_process_manager')\
|
||||||
|
as m_get_state:
|
||||||
|
mock_pm = m_get_state.return_value
|
||||||
|
mock_pm.active = False
|
||||||
|
with mock.patch.object(ha_router, 'SIGTERM_TIMEOUT', 0):
|
||||||
|
ri.destroy_state_change_monitor(mock_pm)
|
||||||
|
|
||||||
|
calls = ["sig='str(%d)'" % signal.SIGTERM,
|
||||||
|
"sig='str(%d)'" % signal.SIGKILL]
|
||||||
|
mock_pm.disable.has_calls(calls)
|
||||||
|
|
Loading…
Reference in New Issue