summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Alvarez <dalvarez@redhat.com>2016-11-24 16:32:50 +0000
committerDaniel Alvarez <dalvarez@redhat.com>2016-12-20 10:52:41 +0000
commit1d38f30555384257445f0d119a307e74e88d7fbf (patch)
tree5519bc1dd339e63f819e1829ecc523faff2ccc8b
parent640b127a5afcf0c94c18e5cd750284d8bccef290 (diff)
Kill processes when cleaning up namespaces
This patch will kill processes that are listening on any port/UNIX socket within the namespace to be cleaned up. To kill them it will issue a SIGTERM to them (or to their parents if they were forked) and, if they don't die after a few seconds, a SIGKILL to them and all their children. This is intended for those cases when there's no specific cleanup and serves as a fallback method. Change-Id: I4195f633ef4a1788496d1293846f19eef89416aa Partial-Bug: #1403455
Notes
Notes (review): Code-Review+2: Kevin Benton <kevin@benton.pub> Verified+1: Arista CI <arista-openstack-test@aristanetworks.com> Code-Review+2: Miguel Angel Ajo <mangelajo@redhat.com> Workflow+1: Miguel Angel Ajo <mangelajo@redhat.com> Verified+2: Jenkins Submitted-by: Jenkins Submitted-at: Tue, 20 Dec 2016 15:47:24 +0000 Reviewed-on: https://review.openstack.org/402140 Project: openstack/neutron Branch: refs/heads/master
-rw-r--r--etc/neutron/rootwrap.d/dhcp.filters4
-rw-r--r--etc/neutron/rootwrap.d/l3.filters8
-rw-r--r--etc/neutron/rootwrap.d/netns-cleanup.filters12
-rw-r--r--neutron/agent/linux/async_process.py14
-rw-r--r--neutron/agent/linux/utils.py59
-rw-r--r--neutron/cmd/netns_cleanup.py100
-rw-r--r--neutron/tests/contrib/functional-testing.filters6
-rw-r--r--neutron/tests/functional/cmd/process_spawn.py180
-rw-r--r--neutron/tests/functional/cmd/test_netns_cleanup.py76
-rw-r--r--neutron/tests/unit/agent/linux/test_async_process.py12
-rw-r--r--neutron/tests/unit/agent/linux/test_utils.py103
-rw-r--r--neutron/tests/unit/cmd/test_netns_cleanup.py177
-rw-r--r--releasenotes/notes/netns_cleanup_kill_procs-af88d8c47c07dd9c.yaml8
-rw-r--r--setup.cfg1
14 files changed, 720 insertions, 40 deletions
diff --git a/etc/neutron/rootwrap.d/dhcp.filters b/etc/neutron/rootwrap.d/dhcp.filters
index ab87abb..3f06b4a 100644
--- a/etc/neutron/rootwrap.d/dhcp.filters
+++ b/etc/neutron/rootwrap.d/dhcp.filters
@@ -13,8 +13,8 @@ dnsmasq: CommandFilter, dnsmasq, root
13# dhcp-agent uses kill as well, that's handled by the generic KillFilter 13# dhcp-agent uses kill as well, that's handled by the generic KillFilter
14# it looks like these are the only signals needed, per 14# it looks like these are the only signals needed, per
15# neutron/agent/linux/dhcp.py 15# neutron/agent/linux/dhcp.py
16kill_dnsmasq: KillFilter, root, /sbin/dnsmasq, -9, -HUP 16kill_dnsmasq: KillFilter, root, /sbin/dnsmasq, -9, -HUP, -15
17kill_dnsmasq_usr: KillFilter, root, /usr/sbin/dnsmasq, -9, -HUP 17kill_dnsmasq_usr: KillFilter, root, /usr/sbin/dnsmasq, -9, -HUP, -15
18 18
19ovs-vsctl: CommandFilter, ovs-vsctl, root 19ovs-vsctl: CommandFilter, ovs-vsctl, root
20ivs-ctl: CommandFilter, ivs-ctl, root 20ivs-ctl: CommandFilter, ivs-ctl, root
diff --git a/etc/neutron/rootwrap.d/l3.filters b/etc/neutron/rootwrap.d/l3.filters
index 0fdf60c..789a16f 100644
--- a/etc/neutron/rootwrap.d/l3.filters
+++ b/etc/neutron/rootwrap.d/l3.filters
@@ -19,10 +19,10 @@ radvd: CommandFilter, radvd, root
19# metadata proxy 19# metadata proxy
20metadata_proxy: CommandFilter, neutron-ns-metadata-proxy, root 20metadata_proxy: CommandFilter, neutron-ns-metadata-proxy, root
21# RHEL invocation of the metadata proxy will report /usr/bin/python 21# RHEL invocation of the metadata proxy will report /usr/bin/python
22kill_metadata: KillFilter, root, python, -9 22kill_metadata: KillFilter, root, python, -15, -9
23kill_metadata7: KillFilter, root, python2.7, -9 23kill_metadata7: KillFilter, root, python2.7, -15, -9
24kill_radvd_usr: KillFilter, root, /usr/sbin/radvd, -9, -HUP 24kill_radvd_usr: KillFilter, root, /usr/sbin/radvd, -15, -9, -HUP
25kill_radvd: KillFilter, root, /sbin/radvd, -9, -HUP 25kill_radvd: KillFilter, root, /sbin/radvd, -15, -9, -HUP
26 26
27# ip_lib 27# ip_lib
28ip: IpFilter, ip, root 28ip: IpFilter, ip, root
diff --git a/etc/neutron/rootwrap.d/netns-cleanup.filters b/etc/neutron/rootwrap.d/netns-cleanup.filters
new file mode 100644
index 0000000..1ee142e
--- /dev/null
+++ b/etc/neutron/rootwrap.d/netns-cleanup.filters
@@ -0,0 +1,12 @@
1# neutron-rootwrap command filters for nodes on which neutron is
2# expected to control network
3#
4# This file should be owned by (and only-writeable by) the root user
5
6# format seems to be
7# cmd-name: filter-name, raw-command, user, args
8
9[Filters]
10
11# netns-cleanup
12netstat: CommandFilter, netstat, root
diff --git a/neutron/agent/linux/async_process.py b/neutron/agent/linux/async_process.py
index 5d08a61..bcc8b7f 100644
--- a/neutron/agent/linux/async_process.py
+++ b/neutron/agent/linux/async_process.py
@@ -175,15 +175,11 @@ class AsyncProcess(object):
175 try: 175 try:
176 # A process started by a root helper will be running as 176 # A process started by a root helper will be running as
177 # root and need to be killed via the same helper. 177 # root and need to be killed via the same helper.
178 utils.execute(['kill', '-%d' % kill_signal, pid], 178 utils.kill_process(pid, kill_signal, self.run_as_root)
179 run_as_root=self.run_as_root) 179 except Exception:
180 except Exception as ex: 180 LOG.exception(_LE('An error occurred while killing [%s].'),
181 stale_pid = (isinstance(ex, RuntimeError) and 181 self.cmd)
182 'No such process' in str(ex)) 182 return False
183 if not stale_pid:
184 LOG.exception(_LE('An error occurred while killing [%s].'),
185 self.cmd)
186 return False
187 183
188 if self._process: 184 if self._process:
189 self._process.wait() 185 self._process.wait()
diff --git a/neutron/agent/linux/utils.py b/neutron/agent/linux/utils.py
index 936a22a..5d26118 100644
--- a/neutron/agent/linux/utils.py
+++ b/neutron/agent/linux/utils.py
@@ -171,9 +171,11 @@ def get_interface_mac(interface):
171 return ':'.join(["%02x" % b for b in iterbytes(info[MAC_START:MAC_END])]) 171 return ':'.join(["%02x" % b for b in iterbytes(info[MAC_START:MAC_END])])
172 172
173 173
174def find_child_pids(pid): 174def find_child_pids(pid, recursive=False):
175 """Retrieve a list of the pids of child processes of the given pid.""" 175 """Retrieve a list of the pids of child processes of the given pid.
176 176
177 It can also find all children through the hierarchy if recursive=True
178 """
177 try: 179 try:
178 raw_pids = execute(['ps', '--ppid', pid, '-o', 'pid='], 180 raw_pids = execute(['ps', '--ppid', pid, '-o', 'pid='],
179 log_fail_as_error=False) 181 log_fail_as_error=False)
@@ -185,7 +187,58 @@ def find_child_pids(pid):
185 if no_children_found: 187 if no_children_found:
186 ctxt.reraise = False 188 ctxt.reraise = False
187 return [] 189 return []
188 return [x.strip() for x in raw_pids.split('\n') if x.strip()] 190 child_pids = [x.strip() for x in raw_pids.split('\n') if x.strip()]
191 if recursive:
192 for child in child_pids:
193 child_pids = child_pids + find_child_pids(child, True)
194 return child_pids
195
196
197def find_parent_pid(pid):
198 """Retrieve the pid of the parent process of the given pid.
199
200 If the pid doesn't exist in the system, this function will return
201 None
202 """
203 try:
204 ppid = execute(['ps', '-o', 'ppid=', pid],
205 log_fail_as_error=False)
206 except ProcessExecutionError as e:
207 # Unexpected errors are the responsibility of the caller
208 with excutils.save_and_reraise_exception() as ctxt:
209 # Exception has already been logged by execute
210 no_such_pid = e.returncode == 1
211 if no_such_pid:
212 ctxt.reraise = False
213 return
214 return ppid.strip()
215
216
217def find_fork_top_parent(pid):
218 """Retrieve the pid of the top parent of the given pid through a fork.
219
220 This function will search the top parent with its same cmdline. If the
221 given pid has no parent, its own pid will be returned
222 """
223 while True:
224 ppid = find_parent_pid(pid)
225 if (ppid and ppid != pid and
226 pid_invoked_with_cmdline(ppid, get_cmdline_from_pid(pid))):
227 pid = ppid
228 else:
229 return pid
230
231
232def kill_process(pid, signal, run_as_root=False):
233 """Kill the process with the given pid using the given signal."""
234 try:
235 execute(['kill', '-%d' % signal, pid], run_as_root=run_as_root)
236 except ProcessExecutionError as ex:
237 # TODO(dalvarez): this check has i18n issues. Maybe we can use
238 # use gettext module setting a global locale or just pay attention
239 # to returncode instead of checking the ex message.
240 if 'No such process' not in str(ex):
241 raise
189 242
190 243
191def _get_conf_base(cfg_root, uuid, ensure_conf_dir): 244def _get_conf_base(cfg_root, uuid, ensure_conf_dir):
diff --git a/neutron/cmd/netns_cleanup.py b/neutron/cmd/netns_cleanup.py
index 4a142d1..0458fd0 100644
--- a/neutron/cmd/netns_cleanup.py
+++ b/neutron/cmd/netns_cleanup.py
@@ -15,6 +15,7 @@
15 15
16import itertools 16import itertools
17import re 17import re
18import signal
18import time 19import time
19 20
20from neutron_lib import constants 21from neutron_lib import constants
@@ -22,7 +23,7 @@ from oslo_config import cfg
22from oslo_log import log as logging 23from oslo_log import log as logging
23from oslo_utils import importutils 24from oslo_utils import importutils
24 25
25from neutron._i18n import _LE 26from neutron._i18n import _LE, _LW
26from neutron.agent.common import config as agent_config 27from neutron.agent.common import config as agent_config
27from neutron.agent.common import ovs_lib 28from neutron.agent.common import ovs_lib
28from neutron.agent.l3 import dvr_fip_ns 29from neutron.agent.l3 import dvr_fip_ns
@@ -32,7 +33,9 @@ from neutron.agent.linux import dhcp
32from neutron.agent.linux import external_process 33from neutron.agent.linux import external_process
33from neutron.agent.linux import interface 34from neutron.agent.linux import interface
34from neutron.agent.linux import ip_lib 35from neutron.agent.linux import ip_lib
36from neutron.agent.linux import utils
35from neutron.common import config 37from neutron.common import config
38from neutron.common import utils as common_utils
36from neutron.conf.agent import cmd 39from neutron.conf.agent import cmd
37from neutron.conf.agent import dhcp as dhcp_config 40from neutron.conf.agent import dhcp as dhcp_config
38 41
@@ -45,6 +48,12 @@ NS_PREFIXES = {
45 dvr_fip_ns.FIP_NS_PREFIX], 48 dvr_fip_ns.FIP_NS_PREFIX],
46 'lbaas': [LB_NS_PREFIX], 49 'lbaas': [LB_NS_PREFIX],
47} 50}
51SIGTERM_WAITTIME = 10
52NETSTAT_PIDS_REGEX = re.compile(r'.* (?P<pid>\d{2,6})/.*')
53
54
55class PidsInNamespaceException(Exception):
56 pass
48 57
49 58
50class FakeDhcpPlugin(object): 59class FakeDhcpPlugin(object):
@@ -131,6 +140,87 @@ def unplug_device(conf, device):
131 device.set_log_fail_as_error(orig_log_fail_as_error) 140 device.set_log_fail_as_error(orig_log_fail_as_error)
132 141
133 142
143def find_listen_pids_namespace(namespace):
144 """Retrieve a list of pids of listening processes within the given netns.
145
146 It executes netstat -nlp and returns a set of unique pairs
147 """
148 ip = ip_lib.IPWrapper(namespace=namespace)
149 pids = set()
150 cmd = ['netstat', '-nlp']
151 output = ip.netns.execute(cmd, run_as_root=True)
152 for line in output.splitlines():
153 m = NETSTAT_PIDS_REGEX.match(line)
154 if m:
155 pids.add(m.group('pid'))
156 return pids
157
158
159def wait_until_no_listen_pids_namespace(namespace, timeout=SIGTERM_WAITTIME):
160 """Poll listening processes within the given namespace.
161
162 If after timeout seconds, there are remaining processes in the namespace,
163 then a PidsInNamespaceException will be thrown.
164 """
165 # Would be better to handle an eventlet.timeout.Timeout exception
166 # but currently there's a problem importing eventlet since it's
167 # doing a local import from cmd/eventlet which doesn't have a
168 # timeout module
169 common_utils.wait_until_true(
170 lambda: not find_listen_pids_namespace(namespace),
171 timeout=SIGTERM_WAITTIME,
172 exception=PidsInNamespaceException)
173
174
175def _kill_listen_processes(namespace, force=False):
176 """Identify all listening processes within the given namespace.
177
178 Then, for each one, find its top parent with same cmdline (in case this
179 process forked) and issue a SIGTERM to all of them. If force is True,
180 then a SIGKILL will be issued to all parents and all their children. Also,
181 this function returns the number of listening processes.
182 """
183 pids = find_listen_pids_namespace(namespace)
184 pids_to_kill = {utils.find_fork_top_parent(pid) for pid in pids}
185 kill_signal = signal.SIGTERM
186 if force:
187 kill_signal = signal.SIGKILL
188 children = [utils.find_child_pids(pid, True) for pid in pids_to_kill]
189 pids_to_kill.update(itertools.chain.from_iterable(children))
190
191 for pid in pids_to_kill:
192 # Throw a warning since this particular cleanup may need a specific
193 # implementation in the right module. Ideally, netns_cleanup wouldn't
194 # kill any processes as the responsible module should've killed them
195 # before cleaning up the namespace
196 LOG.warning(_LW("Killing (%(signal)d) [%(pid)s] %(cmdline)s"),
197 {'signal': kill_signal,
198 'pid': pid,
199 'cmdline': ' '.join(utils.get_cmdline_from_pid(pid))[:80]
200 })
201 try:
202 utils.kill_process(pid, kill_signal, run_as_root=True)
203 except Exception as ex:
204 LOG.error(_LE('An error occurred while killing '
205 '[%(pid)s]: %(msg)s'), {'pid': pid, 'msg': ex})
206 return len(pids)
207
208
209def kill_listen_processes(namespace):
210 """Kill all processes listening within the given namespace.
211
212 First it tries to kill them using SIGTERM, waits until they die gracefully
213 and then kills remaining processes (if any) with SIGKILL
214 """
215 if _kill_listen_processes(namespace, force=False):
216 try:
217 wait_until_no_listen_pids_namespace(namespace)
218 except PidsInNamespaceException:
219 _kill_listen_processes(namespace, force=True)
220 # Allow some time for remaining processes to die
221 wait_until_no_listen_pids_namespace(namespace)
222
223
134def destroy_namespace(conf, namespace, force=False): 224def destroy_namespace(conf, namespace, force=False):
135 """Destroy a given namespace. 225 """Destroy a given namespace.
136 226
@@ -146,6 +236,14 @@ def destroy_namespace(conf, namespace, force=False):
146 # NOTE: The dhcp driver will remove the namespace if is it empty, 236 # NOTE: The dhcp driver will remove the namespace if is it empty,
147 # so a second check is required here. 237 # so a second check is required here.
148 if ip.netns.exists(namespace): 238 if ip.netns.exists(namespace):
239 try:
240 kill_listen_processes(namespace)
241 except PidsInNamespaceException:
242 # This is unlikely since, at this point, we have SIGKILLed
243 # all remaining processes but if there are still some, log
244 # the error and continue with the cleanup
245 LOG.error(_LE('Not all processes were killed in %s'),
246 namespace)
149 for device in ip.get_devices(exclude_loopback=True): 247 for device in ip.get_devices(exclude_loopback=True):
150 unplug_device(conf, device) 248 unplug_device(conf, device)
151 249
diff --git a/neutron/tests/contrib/functional-testing.filters b/neutron/tests/contrib/functional-testing.filters
index 657ae80..af90e36 100644
--- a/neutron/tests/contrib/functional-testing.filters
+++ b/neutron/tests/contrib/functional-testing.filters
@@ -41,3 +41,9 @@ ovstrace_filter: RegExpFilter, ovs-appctl, root, ovs-appctl, ofproto/trace, .*,
41# needed for TestGetRootHelperChildPid 41# needed for TestGetRootHelperChildPid
42bash_filter: RegExpFilter, /bin/bash, root, bash, -c, \(sleep 100\) 42bash_filter: RegExpFilter, /bin/bash, root, bash, -c, \(sleep 100\)
43sleep_kill: KillFilter, root, sleep, -9 43sleep_kill: KillFilter, root, sleep, -9
44
45#needed by test_netns_cleanup
46process_spawn: EnvFilter, env, root, PATH=, python
47ip_exec: IpNetnsExecFilter, ip, root
48ps: CommandFilter, ps, root
49pid_kill: RegExpFilter, kill, root, kill, -\d+, .*
diff --git a/neutron/tests/functional/cmd/process_spawn.py b/neutron/tests/functional/cmd/process_spawn.py
new file mode 100644
index 0000000..7eddd13
--- /dev/null
+++ b/neutron/tests/functional/cmd/process_spawn.py
@@ -0,0 +1,180 @@
1# Copyright 2016 Red Hat, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may
4# not use this file except in compliance with the License. You may obtain
5# a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations
13# under the License.
14
15import os
16import signal
17import socket
18import sys
19import time
20
21from neutron_lib import constants as n_const
22from oslo_config import cfg
23
24from neutron._i18n import _
25from neutron.agent.linux import daemon
26from neutron.tests.common import net_helpers
27
28UNIX_FAMILY = 'UNIX'
29
30OPTS = [
31 cfg.IntOpt('num_children',
32 short='n',
33 default=0,
34 help=_('Number of children to spawn'),
35 required=False),
36 cfg.StrOpt('family',
37 short='f',
38 default=n_const.IPv4,
39 choices=[n_const.IPv4, n_const.IPv6, UNIX_FAMILY],
40 help=_('Listen socket family (%(v4)s, %(v6)s or %(unix)s)') %
41 {
42 'v4': n_const.IPv4,
43 'v6': n_const.IPv6,
44 'unix': UNIX_FAMILY
45 },
46 required=False),
47 cfg.StrOpt('proto',
48 short='p',
49 default=n_const.PROTO_NAME_TCP,
50 choices=[n_const.PROTO_NAME_TCP, n_const.PROTO_NAME_UDP],
51 help=_('Protocol (%(tcp)s or %(udp)s)') %
52 {
53 'tcp': n_const.PROTO_NAME_TCP,
54 'udp': n_const.PROTO_NAME_UDP
55 },
56 required=False),
57 cfg.BoolOpt('parent_listen',
58 short='pl',
59 default=True,
60 help=_('Parent process must listen too'),
61 required=False),
62 cfg.BoolOpt('ignore_sigterm',
63 short='i',
64 default=False,
65 help=_('Ignore SIGTERM'),
66 required=False)
67]
68
69
70class ProcessSpawn(daemon.Daemon):
71 """
72 This class is part of the functional test of the netns_cleanup module.
73
74 It allows spawning processes that listen on random ports either on
75 tcp(6), udp(6) or unix sockets. Also it allows handling or ignoring
76 SIGTERM to check whether the cleanup works as expected by getting rid
77 of the spawned processes.
78 """
79
80 MAX_BIND_RETRIES = 5
81
82 DCT_FAMILY = {
83 n_const.IPv4: socket.AF_INET,
84 n_const.IPv6: socket.AF_INET6,
85 UNIX_FAMILY: socket.AF_UNIX
86 }
87 DCT_PROTO = {
88 n_const.PROTO_NAME_TCP: socket.SOCK_STREAM,
89 n_const.PROTO_NAME_UDP: socket.SOCK_DGRAM,
90 }
91
92 def __init__(self, pidfile=None,
93 family=n_const.IPv4,
94 proto=n_const.PROTO_NAME_TCP,
95 ignore_sigterm=False, num_children=0,
96 parent_must_listen=True):
97 self.family = family
98 self.proto = proto
99 self.ignore_sigterm = ignore_sigterm
100 self.num_children = num_children
101 self.listen_socket = None
102 self.parent_must_listen = parent_must_listen
103 self.child_pids = []
104
105 super(ProcessSpawn, self).__init__(pidfile)
106
107 def start_listening(self):
108 socket_family = self.DCT_FAMILY[self.family]
109 socket_type = self.DCT_PROTO[self.proto]
110
111 self.listen_socket = socket.socket(socket_family, socket_type)
112
113 # Try to listen in a random port which is not currently in use
114 retries = 0
115 while retries < ProcessSpawn.MAX_BIND_RETRIES:
116 # NOTE(dalvarez): not finding a free port on a freshly created
117 # namespace is very unlikely but if problems show up, retries can
118 # be increased to avoid tests failing
119 try:
120 if self.family == UNIX_FAMILY:
121 self.listen_socket.bind('')
122 else:
123 port = net_helpers.get_free_namespace_port(self.proto)
124 self.listen_socket.bind(('', port))
125 except socket.error:
126 retries += 1
127 else:
128 if n_const.PROTO_NAME_TCP in self.proto:
129 self.listen_socket.listen(0)
130 break
131
132 def do_sleep(self):
133 while True:
134 time.sleep(10)
135
136 def run(self):
137 # Spawn as many children as requested
138 children = []
139 while len(children) != self.num_children:
140 child_pid = os.fork()
141 if child_pid == 0:
142 # Listen and do nothing else
143 self.start_listening()
144 self.do_sleep()
145 return
146 children.append(child_pid)
147
148 # Install a SIGTERM handler if requested
149 if not self.ignore_sigterm:
150 signal.signal(signal.SIGTERM, self.sigterm_handler)
151
152 self.child_pids = children
153 if self.parent_must_listen:
154 self.start_listening()
155 self.do_sleep()
156
157 def sigterm_handler(self, signum, frame):
158 if self.listen_socket:
159 self.listen_socket.close()
160 for child in self.child_pids:
161 try:
162 os.kill(child, signal.SIGTERM)
163 except OSError:
164 pass
165 sys.exit(0)
166
167
168def main():
169 cfg.CONF.register_cli_opts(OPTS)
170 cfg.CONF(project='neutron', default_config_files=[])
171 proc_spawn = ProcessSpawn(num_children=cfg.CONF.num_children,
172 family=cfg.CONF.family,
173 proto=cfg.CONF.proto,
174 parent_must_listen=cfg.CONF.parent_listen,
175 ignore_sigterm=cfg.CONF.ignore_sigterm)
176 proc_spawn.start()
177
178
179if __name__ == "__main__":
180 main()
diff --git a/neutron/tests/functional/cmd/test_netns_cleanup.py b/neutron/tests/functional/cmd/test_netns_cleanup.py
index e5f88f7..7b40951 100644
--- a/neutron/tests/functional/cmd/test_netns_cleanup.py
+++ b/neutron/tests/functional/cmd/test_netns_cleanup.py
@@ -13,19 +13,26 @@
13# License for the specific language governing permissions and limitations 13# License for the specific language governing permissions and limitations
14# under the License. 14# under the License.
15 15
16import os
17
16import mock 18import mock
19from neutron_lib import constants as n_const
17 20
18from neutron.agent.l3 import namespaces 21from neutron.agent.l3 import namespaces
19from neutron.agent.linux import dhcp 22from neutron.agent.linux import dhcp
20from neutron.agent.linux import ip_lib 23from neutron.agent.linux import ip_lib
24from neutron.agent.linux import utils
21from neutron.cmd import netns_cleanup 25from neutron.cmd import netns_cleanup
26from neutron.common import utils as common_utils
22from neutron.conf.agent import cmd 27from neutron.conf.agent import cmd
23from neutron.tests import base as basetest 28from neutron.tests import base as basetest
24from neutron.tests.common import net_helpers 29from neutron.tests.common import net_helpers
25from neutron.tests.functional import base 30from neutron.tests.functional import base
31from neutron.tests.functional.cmd import process_spawn
26 32
27GET_NAMESPACES = 'neutron.agent.linux.ip_lib.IPWrapper.get_namespaces' 33GET_NAMESPACES = 'neutron.agent.linux.ip_lib.IPWrapper.get_namespaces'
28TEST_INTERFACE_DRIVER = 'neutron.agent.linux.interface.OVSInterfaceDriver' 34TEST_INTERFACE_DRIVER = 'neutron.agent.linux.interface.OVSInterfaceDriver'
35NUM_SUBPROCESSES = 6
29 36
30 37
31class NetnsCleanupTest(base.BaseSudoTestCase): 38class NetnsCleanupTest(base.BaseSudoTestCase):
@@ -60,13 +67,82 @@ class NetnsCleanupTest(base.BaseSudoTestCase):
60 # tests, as otherwise cleanup will kill them all 67 # tests, as otherwise cleanup will kill them all
61 self.get_namespaces.return_value = [l3_namespace, dhcp_namespace] 68 self.get_namespaces.return_value = [l3_namespace, dhcp_namespace]
62 69
70 # launch processes in each namespace to make sure they're
71 # killed during cleanup
72 procs_launched = self._launch_processes([l3_namespace, dhcp_namespace])
73 self.assertIsNot(procs_launched, 0)
74 common_utils.wait_until_true(
75 lambda: self._get_num_spawned_procs() == procs_launched,
76 timeout=15,
77 exception=Exception("Didn't spawn expected number of processes"))
78
63 netns_cleanup.cleanup_network_namespaces(self.conf) 79 netns_cleanup.cleanup_network_namespaces(self.conf)
64 80
65 self.get_namespaces_p.stop() 81 self.get_namespaces_p.stop()
66 namespaces_now = ip_lib.IPWrapper.get_namespaces() 82 namespaces_now = ip_lib.IPWrapper.get_namespaces()
83 procs_after = self._get_num_spawned_procs()
84 self.assertEqual(procs_after, 0)
67 self.assertNotIn(l3_namespace, namespaces_now) 85 self.assertNotIn(l3_namespace, namespaces_now)
68 self.assertNotIn(dhcp_namespace, namespaces_now) 86 self.assertNotIn(dhcp_namespace, namespaces_now)
69 87
88 @staticmethod
89 def _launch_processes(namespaces):
90 """
91 Launch processes in the specified namespaces.
92
93 This function will spawn processes inside the given namespaces:
94 - 6 processes listening on tcp ports (parent + 5 children)
95 - 1 process + 5 subprocesses listening on unix sockets
96 - 1 process + 5 subprocesses listening on udp6 sockets
97
98 First two sets of processes will process SIGTERM so when the parent
99 gets killed, it will kill all spawned children
100 The last set of processes will ignore SIGTERM. This will allow us
101 to test the cleanup functionality which will issue a SIGKILL
102 to all remaining processes after the SIGTERM attempt
103 """
104 commands = [['python', process_spawn.__file__,
105 '-n', NUM_SUBPROCESSES,
106 '-f', n_const.IPv4,
107 '-p', n_const.PROTO_NAME_TCP,
108 '--noignore_sigterm',
109 '--parent_listen'],
110 ['python', process_spawn.__file__,
111 '-n', NUM_SUBPROCESSES,
112 '-f', process_spawn.UNIX_FAMILY,
113 '-p', n_const.PROTO_NAME_TCP,
114 '--noignore_sigterm',
115 '--noparent_listen'],
116 ['python', process_spawn.__file__,
117 '-n', NUM_SUBPROCESSES,
118 '-f', n_const.IPv4,
119 '-p', n_const.PROTO_NAME_UDP,
120 '--ignore_sigterm',
121 '--noparent_listen']]
122
123 proc_count = 0
124 for netns in namespaces:
125 ip = ip_lib.IPWrapper(namespace=netns)
126 for command in commands:
127 # The total amount of processes per command is
128 # the process itself plus the number of subprocesses spawned by
129 # it
130 proc_count += (1 + NUM_SUBPROCESSES)
131 # We need to pass the PATH env variable so that python
132 # interpreter runs under the same virtual environment.
133 # Otherwise, it won't find the necessary packages such as
134 # oslo_config
135 ip.netns.execute(command,
136 addl_env={'PATH':
137 os.environ.get('PATH')})
138 return proc_count
139
140 @staticmethod
141 def _get_num_spawned_procs():
142 cmd = ['ps', '-f', '-u', 'root']
143 out = utils.execute(cmd, run_as_root=True)
144 return sum([1 for line in out.splitlines() if 'process_spawn' in line])
145
70 146
71class TestNETNSCLIConfig(basetest.BaseTestCase): 147class TestNETNSCLIConfig(basetest.BaseTestCase):
72 148
diff --git a/neutron/tests/unit/agent/linux/test_async_process.py b/neutron/tests/unit/agent/linux/test_async_process.py
index db0321d..0512556 100644
--- a/neutron/tests/unit/agent/linux/test_async_process.py
+++ b/neutron/tests/unit/agent/linux/test_async_process.py
@@ -198,20 +198,18 @@ class TestAsyncProcess(base.BaseTestCase):
198 exc = RuntimeError(exception_message) 198 exc = RuntimeError(exception_message)
199 else: 199 else:
200 exc = None 200 exc = None
201 with mock.patch.object(utils, 'execute', 201 with mock.patch.object(utils, 'kill_process',
202 side_effect=exc) as mock_execute: 202 side_effect=exc) as mock_kill_process:
203 actual = self.proc._kill_process(pid, kill_signal) 203 actual = self.proc._kill_process(pid, kill_signal)
204 204
205 self.assertEqual(expected, actual) 205 self.assertEqual(expected, actual)
206 mock_execute.assert_called_with(['kill', '-%d' % kill_signal, pid], 206 mock_kill_process.assert_called_with(pid,
207 run_as_root=self.proc.run_as_root) 207 kill_signal,
208 self.proc.run_as_root)
208 209
209 def test__kill_process_returns_true_for_valid_pid(self): 210 def test__kill_process_returns_true_for_valid_pid(self):
210 self._test__kill_process('1', True) 211 self._test__kill_process('1', True)
211 212
212 def test__kill_process_returns_true_for_stale_pid(self):
213 self._test__kill_process('1', True, 'No such process')
214
215 def test__kill_process_returns_false_for_execute_exception(self): 213 def test__kill_process_returns_false_for_execute_exception(self):
216 self._test__kill_process('1', False, 'Invalid') 214 self._test__kill_process('1', False, 'Invalid')
217 215
diff --git a/neutron/tests/unit/agent/linux/test_utils.py b/neutron/tests/unit/agent/linux/test_utils.py
index 9244ab0..a0e7a1f 100644
--- a/neutron/tests/unit/agent/linux/test_utils.py
+++ b/neutron/tests/unit/agent/linux/test_utils.py
@@ -12,6 +12,7 @@
12# License for the specific language governing permissions and limitations 12# License for the specific language governing permissions and limitations
13# under the License. 13# under the License.
14 14
15import signal
15import socket 16import socket
16 17
17import mock 18import mock
@@ -181,6 +182,100 @@ class AgentUtilsGetInterfaceMAC(base.BaseTestCase):
181 self.assertEqual(actual_val, expect_val) 182 self.assertEqual(actual_val, expect_val)
182 183
183 184
185class TestFindParentPid(base.BaseTestCase):
186 def setUp(self):
187 super(TestFindParentPid, self).setUp()
188 self.m_execute = mock.patch.object(utils, 'execute').start()
189
190 def test_returns_none_for_no_valid_pid(self):
191 self.m_execute.side_effect = utils.ProcessExecutionError('',
192 returncode=1)
193 self.assertIsNone(utils.find_parent_pid(-1))
194
195 def test_returns_parent_id_for_good_ouput(self):
196 self.m_execute.return_value = '123 \n'
197 self.assertEqual(utils.find_parent_pid(-1), '123')
198
199 def test_raises_exception_returncode_0(self):
200 with testtools.ExpectedException(utils.ProcessExecutionError):
201 self.m_execute.side_effect = \
202 utils.ProcessExecutionError('', returncode=0)
203 utils.find_parent_pid(-1)
204
205 def test_raises_unknown_exception(self):
206 with testtools.ExpectedException(RuntimeError):
207 self.m_execute.side_effect = RuntimeError()
208 utils.find_parent_pid(-1)
209
210
211class TestFindForkTopParent(base.BaseTestCase):
212 def _test_find_fork_top_parent(self, expected=_marker,
213 find_parent_pid_retvals=None,
214 pid_invoked_with_cmdline_retvals=None):
215 def _find_parent_pid(x):
216 if find_parent_pid_retvals:
217 return find_parent_pid_retvals.pop(0)
218
219 pid_invoked_with_cmdline = {}
220 if pid_invoked_with_cmdline_retvals:
221 pid_invoked_with_cmdline['side_effect'] = (
222 pid_invoked_with_cmdline_retvals)
223 else:
224 pid_invoked_with_cmdline['return_value'] = False
225 with mock.patch.object(utils, 'find_parent_pid',
226 side_effect=_find_parent_pid), \
227 mock.patch.object(utils, 'pid_invoked_with_cmdline',
228 **pid_invoked_with_cmdline):
229 actual = utils.find_fork_top_parent(_marker)
230 self.assertEqual(expected, actual)
231
232 def test_returns_own_pid_no_parent(self):
233 self._test_find_fork_top_parent()
234
235 def test_returns_own_pid_nofork(self):
236 self._test_find_fork_top_parent(find_parent_pid_retvals=['2', '3'])
237
238 def test_returns_first_parent_pid_fork(self):
239 self._test_find_fork_top_parent(
240 expected='2',
241 find_parent_pid_retvals=['2', '3', '4'],
242 pid_invoked_with_cmdline_retvals=[True, False, False])
243
244 def test_returns_top_parent_pid_fork(self):
245 self._test_find_fork_top_parent(
246 expected='4',
247 find_parent_pid_retvals=['2', '3', '4'],
248 pid_invoked_with_cmdline_retvals=[True, True, True])
249
250
251class TestKillProcess(base.BaseTestCase):
252 def _test_kill_process(self, pid, exception_message=None,
253 kill_signal=signal.SIGKILL):
254 if exception_message:
255 exc = utils.ProcessExecutionError(exception_message, returncode=0)
256 else:
257 exc = None
258 with mock.patch.object(utils, 'execute',
259 side_effect=exc) as mock_execute:
260 utils.kill_process(pid, kill_signal, run_as_root=True)
261
262 mock_execute.assert_called_with(['kill', '-%d' % kill_signal, pid],
263 run_as_root=True)
264
265 def test_kill_process_returns_none_for_valid_pid(self):
266 self._test_kill_process('1')
267
268 def test_kill_process_returns_none_for_stale_pid(self):
269 self._test_kill_process('1', 'No such process')
270
271 def test_kill_process_raises_exception_for_execute_exception(self):
272 with testtools.ExpectedException(utils.ProcessExecutionError):
273 self._test_kill_process('1', 'Invalid')
274
275 def test_kill_process_with_different_signal(self):
276 self._test_kill_process('1', kill_signal=signal.SIGTERM)
277
278
184class TestFindChildPids(base.BaseTestCase): 279class TestFindChildPids(base.BaseTestCase):
185 280
186 def test_returns_empty_list_for_exit_code_1(self): 281 def test_returns_empty_list_for_exit_code_1(self):
@@ -197,6 +292,14 @@ class TestFindChildPids(base.BaseTestCase):
197 with mock.patch.object(utils, 'execute', return_value=' 123 \n 185\n'): 292 with mock.patch.object(utils, 'execute', return_value=' 123 \n 185\n'):
198 self.assertEqual(utils.find_child_pids(-1), ['123', '185']) 293 self.assertEqual(utils.find_child_pids(-1), ['123', '185'])
199 294
295 def test_returns_list_of_child_process_ids_recursively(self):
296 with mock.patch.object(utils, 'execute',
297 side_effect=[' 123 \n 185\n',
298 ' 40 \n', '\n',
299 '41\n', '\n']):
300 actual = utils.find_child_pids(-1, True)
301 self.assertEqual(actual, ['123', '185', '40', '41'])
302
200 def test_raises_unknown_exception(self): 303 def test_raises_unknown_exception(self):
201 with testtools.ExpectedException(RuntimeError): 304 with testtools.ExpectedException(RuntimeError):
202 with mock.patch.object(utils, 'execute', 305 with mock.patch.object(utils, 'execute',
diff --git a/neutron/tests/unit/cmd/test_netns_cleanup.py b/neutron/tests/unit/cmd/test_netns_cleanup.py
index fb70e88..354de2b 100644
--- a/neutron/tests/unit/cmd/test_netns_cleanup.py
+++ b/neutron/tests/unit/cmd/test_netns_cleanup.py
@@ -13,11 +13,47 @@
13# License for the specific language governing permissions and limitations 13# License for the specific language governing permissions and limitations
14# under the License. 14# under the License.
15 15
16import signal
17
16import mock 18import mock
19import testtools
17 20
18from neutron.cmd import netns_cleanup as util 21from neutron.cmd import netns_cleanup as util
19from neutron.tests import base 22from neutron.tests import base
20 23
24NETSTAT_NETNS_OUTPUT = ("""
25Active Internet connections (only servers)
26Proto Recv-Q Send-Q Local Address Foreign Address State\
27 PID/Program name
28tcp 0 0 0.0.0.0:9697 0.0.0.0:* LISTEN\
29 1347/python
30raw 0 0 0.0.0.0:112 0.0.0.0:* 7\
31 1279/keepalived
32raw 0 0 0.0.0.0:112 0.0.0.0:* 7\
33 1279/keepalived
34raw6 0 0 :::58 :::* 7\
35 1349/radvd
36Active UNIX domain sockets (only servers)
37Proto RefCnt Flags Type State I-Node PID/Program name\
38 Path
39unix 2 [ ACC ] STREAM LISTENING 82039530 1353/python\
40 /tmp/rootwrap-VKSm8a/rootwrap.sock
41""")
42
43NETSTAT_NO_NAMESPACE = ("""
44Cannot open network namespace "qrouter-e6f206b2-4e8d-4597-a7e1-c3a20337e9c6":\
45 No such file or directory
46""")
47
48NETSTAT_NO_LISTEN_PROCS = ("""
49Active Internet connections (only servers)
50Proto Recv-Q Send-Q Local Address Foreign Address State\
51 PID/Program name
52Active UNIX domain sockets (only servers)
53Proto RefCnt Flags Type State I-Node PID/Program name\
54 Path
55""")
56
21 57
22class TestNetnsCleanup(base.BaseTestCase): 58class TestNetnsCleanup(base.BaseTestCase):
23 def setUp(self): 59 def setUp(self):
@@ -163,6 +199,115 @@ class TestNetnsCleanup(base.BaseTestCase):
163 self.assertEqual([], ovs_br_cls.mock_calls) 199 self.assertEqual([], ovs_br_cls.mock_calls)
164 self.assertTrue(debug.called) 200 self.assertTrue(debug.called)
165 201
202 def _test_find_listen_pids_namespace_helper(self, expected,
203 netstat_output=None):
204 with mock.patch('neutron.agent.linux.ip_lib.IPWrapper') as ip_wrap:
205 ip_wrap.return_value.netns.execute.return_value = netstat_output
206 observed = util.find_listen_pids_namespace(mock.ANY)
207 self.assertEqual(expected, observed)
208
209 def test_find_listen_pids_namespace_correct_output(self):
210 expected = set(['1347', '1279', '1349', '1353'])
211 self._test_find_listen_pids_namespace_helper(expected,
212 NETSTAT_NETNS_OUTPUT)
213
214 def test_find_listen_pids_namespace_no_procs(self):
215 expected = set()
216 self._test_find_listen_pids_namespace_helper(expected,
217 NETSTAT_NO_LISTEN_PROCS)
218
219 def test_find_listen_pids_namespace_no_namespace(self):
220 expected = set()
221 self._test_find_listen_pids_namespace_helper(expected,
222 NETSTAT_NO_NAMESPACE)
223
224 def _test__kill_listen_processes_helper(self, pids, parents, children,
225 kills_expected, force):
226 def _get_element(dct, x):
227 return dct.get(x, [])
228
229 def _find_childs(x, recursive):
230 return _get_element(children, x)
231
232 def _find_parent(x):
233 return _get_element(parents, x)
234
235 utils_mock = dict(
236 find_fork_top_parent=mock.DEFAULT,
237 find_child_pids=mock.DEFAULT,
238 get_cmdline_from_pid=mock.DEFAULT,
239 kill_process=mock.DEFAULT)
240
241 self.log_mock = mock.patch.object(util, 'LOG').start()
242 with mock.patch.multiple('neutron.agent.linux.utils', **utils_mock)\
243 as mocks:
244 mocks['find_fork_top_parent'].side_effect = _find_parent
245 mocks['find_child_pids'].side_effect = _find_childs
246
247 with mock.patch.object(util, 'find_listen_pids_namespace',
248 return_value=pids):
249 calls = []
250 for pid, sig in kills_expected:
251 calls.append(mock.call(pid, sig, run_as_root=True))
252 util._kill_listen_processes(mock.ANY, force=force)
253 mocks['kill_process'].assert_has_calls(calls, any_order=True)
254
255 def test__kill_listen_processes_only_parents_force_false(self):
256 pids = ['4', '5', '6']
257 parents = {'4': '1', '5': '5', '6': '2'}
258 children = {}
259 kills_expected = [('1', signal.SIGTERM),
260 ('5', signal.SIGTERM),
261 ('2', signal.SIGTERM)]
262
263 self._test__kill_listen_processes_helper(pids, parents, children,
264 kills_expected, False)
265
266 def test__kill_listen_processes_parents_and_childs(self):
267 pids = ['4', '5', '6']
268 parents = {'4': '1', '5': '2', '6': '3'}
269 children = {'1': ['4'], '2': ['5'], '3': ['6', '8', '7']}
270 kills_expected = [(str(x), signal.SIGKILL) for x in range(1, 9)]
271 self._test__kill_listen_processes_helper(pids, parents, children,
272 kills_expected, True)
273
274 def test_kill_listen_processes(self):
275 with mock.patch.object(util, '_kill_listen_processes',
276 return_value=1) as mock_kill_listen:
277 with mock.patch('neutron.common.utils.wait_until_true')\
278 as wait_until_true_mock:
279 wait_until_true_mock.side_effect = [
280 util.PidsInNamespaceException,
281 None]
282 namespace = mock.ANY
283 util.kill_listen_processes(namespace)
284 mock_kill_listen.assert_has_calls(
285 [mock.call(namespace, force=False),
286 mock.call(namespace, force=True)])
287
288 def test_kill_listen_processes_still_procs(self):
289 with mock.patch.object(util, '_kill_listen_processes',
290 return_value=1):
291 with mock.patch('neutron.common.utils.wait_until_true')\
292 as wait_until_true_mock:
293 wait_until_true_mock.side_effect = (
294 util.PidsInNamespaceException)
295 namespace = mock.ANY
296 with testtools.ExpectedException(
297 util.PidsInNamespaceException):
298 util.kill_listen_processes(namespace)
299
300 def test_kill_listen_processes_no_procs(self):
301 with mock.patch.object(util, '_kill_listen_processes',
302 return_value=0) as mock_kill_listen:
303 with mock.patch('neutron.common.utils.wait_until_true')\
304 as wait_until_true_mock:
305 namespace = mock.ANY
306 util.kill_listen_processes(namespace)
307 mock_kill_listen.assert_called_once_with(namespace,
308 force=False)
309 self.assertFalse(wait_until_true_mock.called)
310
166 def _test_destroy_namespace_helper(self, force, num_devices): 311 def _test_destroy_namespace_helper(self, force, num_devices):
167 ns = 'qrouter-6e322ac7-ab50-4f53-9cdc-d1d3c1164b6d' 312 ns = 'qrouter-6e322ac7-ab50-4f53-9cdc-d1d3c1164b6d'
168 conf = mock.Mock() 313 conf = mock.Mock()
@@ -182,23 +327,27 @@ class TestNetnsCleanup(base.BaseTestCase):
182 ip_wrap.return_value.get_devices.return_value = devices 327 ip_wrap.return_value.get_devices.return_value = devices
183 ip_wrap.return_value.netns.exists.return_value = True 328 ip_wrap.return_value.netns.exists.return_value = True
184 329
185 with mock.patch.object(util, 'unplug_device') as unplug: 330 with mock.patch.object(util, 'kill_listen_processes'):
331
332 with mock.patch.object(util, 'unplug_device') as unplug:
186 333
187 with mock.patch.object(util, 'kill_dhcp') as kill_dhcp: 334 with mock.patch.object(util, 'kill_dhcp') as kill_dhcp:
188 util.destroy_namespace(conf, ns, force) 335 util.destroy_namespace(conf, ns, force)
189 expected = [mock.call(namespace=ns)] 336 expected = [mock.call(namespace=ns)]
190 337
191 if force: 338 if force:
192 expected.extend([ 339 expected.extend([
193 mock.call().netns.exists(ns), 340 mock.call().netns.exists(ns),
194 mock.call().get_devices(exclude_loopback=True)]) 341 mock.call().get_devices(
195 self.assertTrue(kill_dhcp.called) 342 exclude_loopback=True)])
196 unplug.assert_has_calls( 343 self.assertTrue(kill_dhcp.called)
197 [mock.call(conf, d) for d in 344 unplug.assert_has_calls(
198 devices[1:]]) 345 [mock.call(conf, d) for d in
346 devices[1:]])
199 347
200 expected.append(mock.call().garbage_collect_namespace()) 348 expected.append(
201 ip_wrap.assert_has_calls(expected) 349 mock.call().garbage_collect_namespace())
350 ip_wrap.assert_has_calls(expected)
202 351
203 def test_destroy_namespace_empty(self): 352 def test_destroy_namespace_empty(self):
204 self._test_destroy_namespace_helper(False, 0) 353 self._test_destroy_namespace_helper(False, 0)
diff --git a/releasenotes/notes/netns_cleanup_kill_procs-af88d8c47c07dd9c.yaml b/releasenotes/notes/netns_cleanup_kill_procs-af88d8c47c07dd9c.yaml
new file mode 100644
index 0000000..b231a69
--- /dev/null
+++ b/releasenotes/notes/netns_cleanup_kill_procs-af88d8c47c07dd9c.yaml
@@ -0,0 +1,8 @@
1---
2features:
3 - A new mechanism has been added to netns_cleanup to
4 kill processes that are listening on any port/unix
5 socket within the namespace. This will try to kill
6 them gracefully via SIGTERM and, if they don't die,
7 then a SIGKILL will be sent to the remaining
8 processes to ensure a proper cleanup.
diff --git a/setup.cfg b/setup.cfg
index f34cd07..a210ff4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -36,6 +36,7 @@ data_files =
36 etc/neutron/rootwrap.d/ipset-firewall.filters 36 etc/neutron/rootwrap.d/ipset-firewall.filters
37 etc/neutron/rootwrap.d/l3.filters 37 etc/neutron/rootwrap.d/l3.filters
38 etc/neutron/rootwrap.d/linuxbridge-plugin.filters 38 etc/neutron/rootwrap.d/linuxbridge-plugin.filters
39 etc/neutron/rootwrap.d/netns-cleanup.filters
39 etc/neutron/rootwrap.d/openvswitch-plugin.filters 40 etc/neutron/rootwrap.d/openvswitch-plugin.filters
40scripts = 41scripts =
41 bin/neutron-rootwrap-xen-dom0 42 bin/neutron-rootwrap-xen-dom0