summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Alvarez <dalvarez@redhat.com>2017-01-12 01:06:01 +0000
committerDaniel Alvarez <dalvarez@redhat.com>2017-01-12 03:52:30 +0000
commit3f9f740d81b51be5e563069c720366fa90ade9ee (patch)
tree25973462d4f476aa654f1b810952c9d10ec4673e
parentada4237905053253349d0b48dc667cdbacf8045c (diff)
Fix netns_cleanup interrupted on rwd I/O
Functional tests for netns_cleanup have been failing a few times in the gate lately. After thorough tests we've seen that the issue was related to using rootwrap-daemon inside a wait_until_true loop. When timeout fired while utils.execute() was reading from rootwrap-daemon, it got interrupted and the output of the last command was not read. Therefore, next calls to utils.execute() would read the output of their previous command rather than their own, leading to unexpected results. This fix will poll existing processes in the namespace without making use of the wait_until_true loop. Instead, it will check elapsed time and raise the exception if timeout is exceeded. Also, i'm removing debug traces introduced in 327f7fc4d54bbaaed3778b5eb3c51a037a9a178f which helped finding the root cause of this bug. Change-Id: Ie233261e4be36eecaf6ec6d0532f0f5e2e996cd2 Closes-Bug: #1654287
Notes
Notes (review): Verified+1: Arista CI <arista-openstack-test@aristanetworks.com> Verified+1: Nuage CI <nuage-ci@nuagenetworks.net> Code-Review+1: John Schwarz <jschwarz@redhat.com> Verified+1: Mellanox CI <mlnx-openstack-ci@dev.mellanox.co.il> Code-Review+2: Kevin Benton <kevin@benton.pub> Code-Review+2: Armando Migliaccio <armamig@gmail.com> Workflow+1: Armando Migliaccio <armamig@gmail.com> Verified+2: Jenkins Submitted-by: Jenkins Submitted-at: Fri, 20 Jan 2017 08:25:01 +0000 Reviewed-on: https://review.openstack.org/421325 Project: openstack/neutron Branch: refs/heads/master
-rw-r--r--neutron/agent/linux/ip_lib.py8
-rw-r--r--neutron/cmd/netns_cleanup.py22
-rw-r--r--neutron/tests/unit/cmd/test_netns_cleanup.py21
3 files changed, 24 insertions, 27 deletions
diff --git a/neutron/agent/linux/ip_lib.py b/neutron/agent/linux/ip_lib.py
index fce568f..c47e814 100644
--- a/neutron/agent/linux/ip_lib.py
+++ b/neutron/agent/linux/ip_lib.py
@@ -131,14 +131,10 @@ class IPWrapper(SubProcessBase):
131 cmd = ['ip', 'netns', 'exec', self.namespace, 131 cmd = ['ip', 'netns', 'exec', self.namespace,
132 'find', SYS_NET_PATH, '-maxdepth', '1', 132 'find', SYS_NET_PATH, '-maxdepth', '1',
133 '-type', 'l', '-printf', '%f '] 133 '-type', 'l', '-printf', '%f ']
134 output_str = utils.execute( 134 output = utils.execute(
135 cmd, 135 cmd,
136 run_as_root=True, 136 run_as_root=True,
137 log_fail_as_error=self.log_fail_as_error) 137 log_fail_as_error=self.log_fail_as_error).split()
138 # NOTE(dalvarez): Logging the output of this call due to
139 # bug1654287.
140 LOG.debug('get_devices(): %s', output_str)
141 output = output_str.split()
142 except RuntimeError: 138 except RuntimeError:
143 # We could be racing with a cron job deleting namespaces. 139 # We could be racing with a cron job deleting namespaces.
144 # Just return a empty list if the namespace is deleted. 140 # Just return a empty list if the namespace is deleted.
diff --git a/neutron/cmd/netns_cleanup.py b/neutron/cmd/netns_cleanup.py
index 0458fd0..1dd391c 100644
--- a/neutron/cmd/netns_cleanup.py
+++ b/neutron/cmd/netns_cleanup.py
@@ -35,7 +35,6 @@ from neutron.agent.linux import interface
35from neutron.agent.linux import ip_lib 35from neutron.agent.linux import ip_lib
36from neutron.agent.linux import utils 36from neutron.agent.linux import utils
37from neutron.common import config 37from neutron.common import config
38from neutron.common import utils as common_utils
39from neutron.conf.agent import cmd 38from neutron.conf.agent import cmd
40from neutron.conf.agent import dhcp as dhcp_config 39from neutron.conf.agent import dhcp as dhcp_config
41 40
@@ -162,14 +161,19 @@ def wait_until_no_listen_pids_namespace(namespace, timeout=SIGTERM_WAITTIME):
162 If after timeout seconds, there are remaining processes in the namespace, 161 If after timeout seconds, there are remaining processes in the namespace,
163 then a PidsInNamespaceException will be thrown. 162 then a PidsInNamespaceException will be thrown.
164 """ 163 """
165 # Would be better to handle an eventlet.timeout.Timeout exception 164 # NOTE(dalvarez): This function can block forever if
166 # but currently there's a problem importing eventlet since it's 165 # find_listen_pids_in_namespace never returns which is really unlikely. We
167 # doing a local import from cmd/eventlet which doesn't have a 166 # can't use wait_until_true because we might get interrupted by eventlet
168 # timeout module 167 # Timeout during our I/O with rootwrap daemon and that will lead to errors
169 common_utils.wait_until_true( 168 # in subsequent calls to utils.execute grabbing always the output of the
170 lambda: not find_listen_pids_namespace(namespace), 169 # previous command
171 timeout=SIGTERM_WAITTIME, 170 start = end = time.time()
172 exception=PidsInNamespaceException) 171 while end - start < timeout:
172 if not find_listen_pids_namespace(namespace):
173 return
174 time.sleep(1)
175 end = time.time()
176 raise PidsInNamespaceException
173 177
174 178
175def _kill_listen_processes(namespace, force=False): 179def _kill_listen_processes(namespace, force=False):
diff --git a/neutron/tests/unit/cmd/test_netns_cleanup.py b/neutron/tests/unit/cmd/test_netns_cleanup.py
index 354de2b..56af8ea 100644
--- a/neutron/tests/unit/cmd/test_netns_cleanup.py
+++ b/neutron/tests/unit/cmd/test_netns_cleanup.py
@@ -274,11 +274,9 @@ class TestNetnsCleanup(base.BaseTestCase):
274 def test_kill_listen_processes(self): 274 def test_kill_listen_processes(self):
275 with mock.patch.object(util, '_kill_listen_processes', 275 with mock.patch.object(util, '_kill_listen_processes',
276 return_value=1) as mock_kill_listen: 276 return_value=1) as mock_kill_listen:
277 with mock.patch('neutron.common.utils.wait_until_true')\ 277 with mock.patch.object(util, 'wait_until_no_listen_pids_namespace',
278 as wait_until_true_mock: 278 side_effect=[util.PidsInNamespaceException,
279 wait_until_true_mock.side_effect = [ 279 None]):
280 util.PidsInNamespaceException,
281 None]
282 namespace = mock.ANY 280 namespace = mock.ANY
283 util.kill_listen_processes(namespace) 281 util.kill_listen_processes(namespace)
284 mock_kill_listen.assert_has_calls( 282 mock_kill_listen.assert_has_calls(
@@ -288,10 +286,8 @@ class TestNetnsCleanup(base.BaseTestCase):
288 def test_kill_listen_processes_still_procs(self): 286 def test_kill_listen_processes_still_procs(self):
289 with mock.patch.object(util, '_kill_listen_processes', 287 with mock.patch.object(util, '_kill_listen_processes',
290 return_value=1): 288 return_value=1):
291 with mock.patch('neutron.common.utils.wait_until_true')\ 289 with mock.patch.object(util, 'wait_until_no_listen_pids_namespace',
292 as wait_until_true_mock: 290 side_effect=util.PidsInNamespaceException):
293 wait_until_true_mock.side_effect = (
294 util.PidsInNamespaceException)
295 namespace = mock.ANY 291 namespace = mock.ANY
296 with testtools.ExpectedException( 292 with testtools.ExpectedException(
297 util.PidsInNamespaceException): 293 util.PidsInNamespaceException):
@@ -300,13 +296,14 @@ class TestNetnsCleanup(base.BaseTestCase):
300 def test_kill_listen_processes_no_procs(self): 296 def test_kill_listen_processes_no_procs(self):
301 with mock.patch.object(util, '_kill_listen_processes', 297 with mock.patch.object(util, '_kill_listen_processes',
302 return_value=0) as mock_kill_listen: 298 return_value=0) as mock_kill_listen:
303 with mock.patch('neutron.common.utils.wait_until_true')\ 299 with mock.patch.object(util,
304 as wait_until_true_mock: 300 'wait_until_no_listen_pids_namespace')\
301 as wait_until_mock:
305 namespace = mock.ANY 302 namespace = mock.ANY
306 util.kill_listen_processes(namespace) 303 util.kill_listen_processes(namespace)
307 mock_kill_listen.assert_called_once_with(namespace, 304 mock_kill_listen.assert_called_once_with(namespace,
308 force=False) 305 force=False)
309 self.assertFalse(wait_until_true_mock.called) 306 self.assertFalse(wait_until_mock.called)
310 307
311 def _test_destroy_namespace_helper(self, force, num_devices): 308 def _test_destroy_namespace_helper(self, force, num_devices):
312 ns = 'qrouter-6e322ac7-ab50-4f53-9cdc-d1d3c1164b6d' 309 ns = 'qrouter-6e322ac7-ab50-4f53-9cdc-d1d3c1164b6d'