Use multiprocessing.Event to ensure services have started

ServiceRestartTest.test_terminate_sigterm would often fail in my Jenkins
instance. It turns out that sometimes the test case would manage to send
its SIGTERM after the signal handler is put in place, but before we
enter the "try: except" block in _wait_for_exit_or_signal. This meant
that the SignalExit exception isn't caught and makes the process
terminate with non-zero exit code. To verify this, you can add a
time.sleep(3) right after self.handle_signal().

To avoid this race condition, we add a ready_callback argument to
Service.wait() that gets called when entering the try-catch block.
In the test, we pass an Event object and wait for it to be set before
proceeding. This also gets rid of the questionable call to "ps".

This patch uses the port 9001 instead of 9000 for the test_pool test. It
looks like something is running on the 9000 port on the gate and is
preventing this test to succeed. This test, as the others in this test
suite, are meant to test the server live, hence the listen method is not
being mocked out.

This test doesn't exist on master anymore, so this is a stable/havana
thing only. Also, this change can't be done in a separate patch because
it depends on the change done in `test_service` to succeed.

Change-Id: I598e8bc889b6c2627a62271627d17b44e01acbe4
(cherry picked from commit 3110c0f66f)
This commit is contained in:
Flavio Percoco 2014-03-24 11:42:07 +01:00
parent 5007a01cc4
commit 5eac8fea0d
3 changed files with 19 additions and 28 deletions

View File

@ -129,7 +129,7 @@ class ServiceLauncher(Launcher):
def handle_signal(self):
_set_signals_handler(self._handle_signal)
def _wait_for_exit_or_signal(self):
def _wait_for_exit_or_signal(self, ready_callback=None):
status = None
signo = 0
@ -137,6 +137,8 @@ class ServiceLauncher(Launcher):
CONF.log_opt_values(LOG, std_logging.DEBUG)
try:
if ready_callback:
ready_callback()
super(ServiceLauncher, self).wait()
except SignalExit as exc:
signame = _signo_to_signame(exc.signo)
@ -156,10 +158,10 @@ class ServiceLauncher(Launcher):
return status, signo
def wait(self):
def wait(self, ready_callback=None):
while True:
self.handle_signal()
status, signo = self._wait_for_exit_or_signal()
status, signo = self._wait_for_exit_or_signal(ready_callback)
if not _is_sighup(signo):
return status
self.restart()

View File

@ -488,7 +488,7 @@ class ServerTest(test.BaseTestCase):
class WSGIServerTest(test.BaseTestCase):
def test_pool(self):
server = wsgi.Service('fake', 9000)
server = wsgi.Service('fake', 9001)
self.assertTrue(server.tg)
self.assertTrue(server.tg.pool)
self.assertEqual(server.tg.pool.free(), 1000)

View File

@ -25,6 +25,7 @@ from __future__ import print_function
import errno
import eventlet
import mox
import multiprocessing
import os
import signal
import socket
@ -230,17 +231,8 @@ class ServiceLauncherTest(ServiceTestBase):
class ServiceRestartTest(ServiceTestBase):
def _check_process_alive(self):
f = os.popen('ps ax -o pid,stat,cmd')
f.readline()
pid_stat = [tuple(p for p in line.strip().split()[:2])
for line in f.readlines()]
for p, stat in pid_stat:
if int(p) == self.pid:
return stat not in ['Z', 'T', 'Z+']
return False
def _spawn_service(self):
ready_event = multiprocessing.Event()
pid = os.fork()
status = 0
if pid == 0:
@ -248,33 +240,30 @@ class ServiceRestartTest(ServiceTestBase):
serv = ServiceWithTimer()
launcher = service.ServiceLauncher()
launcher.launch_service(serv)
launcher.wait()
launcher.wait(ready_callback=ready_event.set)
except SystemExit as exc:
status = exc.code
os._exit(status)
self.pid = pid
return ready_event
def test_service_restart(self):
self._spawn_service()
ready = self._spawn_service()
cond = self._check_process_alive
timeout = 5
self._wait(cond, timeout)
ret = self._check_process_alive()
self.assertTrue(ret)
ready.wait(timeout)
self.assertTrue(ready.is_set(), 'Service never became ready')
ready.clear()
os.kill(self.pid, signal.SIGHUP)
self._wait(cond, timeout)
ret_restart = self._check_process_alive()
self.assertTrue(ret_restart)
ready.wait(timeout)
self.assertTrue(ready.is_set(), 'Service never back after SIGHUP')
def test_terminate_sigterm(self):
self._spawn_service()
cond = self._check_process_alive
ready = self._spawn_service()
timeout = 5
self._wait(cond, timeout)
ready.wait(timeout)
self.assertTrue(ready.is_set(), 'Service never became ready')
os.kill(self.pid, signal.SIGTERM)