remove test_multiprocess_api
This test suite should be removed for the following reasons. 1) it's not a unit test 2) it's failing randomly, a lot, and depends on timing of the host for process start and signal handling which are provably not predicable on slower machines 3) it's triggering testr bugs in the ways that it fails which means we don't actually get any subunit in the fail scenario, which makes it actually impossible to move forward on fixes. Change-Id: Ieb7c02115c12cd9f6c9fa67691a643aed7632784 Closes-Bug: #1357578
This commit is contained in:
parent
8cf117af1c
commit
f3c99ba1ef
|
@ -1,254 +0,0 @@
|
|||
# Copyright (c) 2012 Intel, LLC
|
||||
# Copyright (c) 2012 OpenStack Foundation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""
|
||||
Test multiprocess enabled API service.
|
||||
"""
|
||||
import errno
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
import fixtures
|
||||
|
||||
from nova.openstack.common import log as logging
|
||||
from nova import service
|
||||
from nova.tests.integrated.api import client
|
||||
from nova.tests.integrated import integrated_helpers
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MultiprocessWSGITest(integrated_helpers._IntegratedTestBase):
|
||||
_api_version = 'v2'
|
||||
|
||||
def _start_api_service(self):
|
||||
# Process will be started in _spawn()
|
||||
self.osapi = service.WSGIService("osapi_compute")
|
||||
self.auth_url = 'http://%(host)s:%(port)s/%(api_version)s' % ({
|
||||
'host': self.osapi.host, 'port': self.osapi.port,
|
||||
'api_version': self._api_version})
|
||||
LOG.info('auth_url = %s' % self.auth_url)
|
||||
|
||||
def _get_flags(self):
|
||||
self.workers = 2
|
||||
f = super(MultiprocessWSGITest, self)._get_flags()
|
||||
f['osapi_compute_workers'] = self.workers
|
||||
return f
|
||||
|
||||
def _spawn(self):
|
||||
pid = os.fork()
|
||||
if pid == 0:
|
||||
# NOTE(johannes): We can't let the child processes exit back
|
||||
# into the unit test framework since then we'll have multiple
|
||||
# processes running the same tests (and possibly forking more
|
||||
# processes that end up in the same situation). So we need
|
||||
# to catch all exceptions and make sure nothing leaks out, in
|
||||
# particular SystemExit, which is raised by sys.exit(). We use
|
||||
# os._exit() which doesn't have this problem.
|
||||
sys.stdin.close()
|
||||
sys.stdout.close()
|
||||
sys.stderr.close()
|
||||
status = 0
|
||||
try:
|
||||
launcher = service.process_launcher()
|
||||
launcher.launch_service(self.osapi, workers=self.osapi.workers)
|
||||
launcher.wait()
|
||||
except SystemExit as exc:
|
||||
status = exc.code
|
||||
except BaseException:
|
||||
# We need to be defensive here too
|
||||
try:
|
||||
traceback.print_exc()
|
||||
except BaseException:
|
||||
LOG.error("Couldn't print traceback")
|
||||
status = 2
|
||||
|
||||
# Really exit
|
||||
os._exit(status)
|
||||
|
||||
LOG.info("Launched OSAPI WSGI service in process %d", pid)
|
||||
self.pid = pid
|
||||
|
||||
# Wait at most 10 seconds to spawn workers
|
||||
cond = lambda: self.workers == len(self._get_workers())
|
||||
timeout = 10
|
||||
self._wait(cond, timeout)
|
||||
|
||||
workers = self._get_workers()
|
||||
self.assertEqual(len(workers), self.workers)
|
||||
return workers
|
||||
|
||||
def _wait(self, cond, timeout):
|
||||
start = time.time()
|
||||
while True:
|
||||
if cond():
|
||||
break
|
||||
if time.time() - start > timeout:
|
||||
break
|
||||
time.sleep(.1)
|
||||
|
||||
def tearDown(self):
|
||||
if self.pid:
|
||||
# Make sure all processes are stopped
|
||||
LOG.info("in tearDown(), stopping launcher pid %d with SIGTERM",
|
||||
self.pid)
|
||||
os.kill(self.pid, signal.SIGTERM)
|
||||
|
||||
try:
|
||||
# Make sure we reap our test process
|
||||
self._wait_for_all_workers_to_end()
|
||||
self._reap_test()
|
||||
except fixtures.TimeoutException:
|
||||
# If the child gets stuck or is too slow in exiting
|
||||
# after receiving the SIGTERM, gracefully handle the
|
||||
# timeout exception and try harder to kill it. We need
|
||||
# to do this otherwise the child process can hold up
|
||||
# the test run
|
||||
LOG.warn("got fixtures.TimeoutException during tearDown(). "
|
||||
"going nuclear with SIGKILL.")
|
||||
for worker_pid in self._get_workers():
|
||||
LOG.warn("worker pid %d" % worker_pid)
|
||||
os.kill(worker_pid, signal.SIGKILL)
|
||||
|
||||
LOG.warn("parent pid %d" % self.pid)
|
||||
os.kill(self.pid, signal.SIGKILL)
|
||||
raise
|
||||
|
||||
super(MultiprocessWSGITest, self).tearDown()
|
||||
|
||||
def _reap_test(self):
|
||||
pid, status = os.waitpid(self.pid, 0)
|
||||
self.pid = None
|
||||
return status
|
||||
|
||||
def _get_workers(self):
|
||||
# NOTE(hartsocks): use of ps checks the process table for child pid
|
||||
# entries these processes may be ended but not reaped so ps may
|
||||
# show processes that are still being cleaned out of the table.
|
||||
f = os.popen('ps ax -o pid,ppid,command')
|
||||
# Skip ps header
|
||||
f.readline()
|
||||
|
||||
processes = [tuple(int(p) for p in l.strip().split()[:2])
|
||||
for l in f.readlines()]
|
||||
workers = [p for p, pp in processes if pp == self.pid]
|
||||
LOG.info('in _get_workers(), workers: %r for PPID %d',
|
||||
workers, self.pid)
|
||||
return workers
|
||||
|
||||
def wait_on_process_until_end(self, worker_pid):
|
||||
# NOTE: the testing framework itself has a
|
||||
# built in test timeout function so a test
|
||||
# stuck in an infinite loop will eventually
|
||||
# be killed by the test framework.
|
||||
LOG.info('waiting on process %r to exit' % worker_pid)
|
||||
|
||||
while True:
|
||||
# poll the process until it isn't there to poll
|
||||
try:
|
||||
os.kill(worker_pid, 0)
|
||||
time.sleep(0.1)
|
||||
LOG.info("Worker: %s still around, trying again" % worker_pid)
|
||||
except OSError as err:
|
||||
# by watching specifically for errno.ESRCH
|
||||
# we guarantee this loop continues until
|
||||
# the process table has cleared the pid.
|
||||
# Child process table entries hang around
|
||||
# for several cycles in case a parent process
|
||||
# needs to check their exit state.
|
||||
if err.errno == errno.ESRCH:
|
||||
LOG.info("Worker: %s successfully shut down" % worker_pid)
|
||||
break
|
||||
LOG.warning("got non-ESRCH errno %d when attempting "
|
||||
"status of worker PID %d",
|
||||
err.errno, worker_pid)
|
||||
LOG.info('process %r has exited' % worker_pid)
|
||||
|
||||
def test_killed_worker_recover(self):
|
||||
start_workers = self._spawn()
|
||||
|
||||
worker_pid = start_workers[0]
|
||||
# kill one worker and check if new worker can come up
|
||||
LOG.info('pid of first child is %s' % worker_pid)
|
||||
|
||||
# signal child
|
||||
os.kill(worker_pid, signal.SIGTERM)
|
||||
|
||||
self.wait_on_process_until_end(worker_pid)
|
||||
|
||||
# Make sure worker pids don't match
|
||||
end_workers = self._get_workers()
|
||||
self.assertNotEqual(start_workers, end_workers)
|
||||
|
||||
# check if api service still works
|
||||
flavors = self.api.get_flavors()
|
||||
self.assertTrue(len(flavors) > 0, 'Num of flavors > 0.')
|
||||
|
||||
def _terminate_with_signal(self, sig):
|
||||
self._spawn()
|
||||
|
||||
# check if api service is working
|
||||
flavors = self.api.get_flavors()
|
||||
self.assertTrue(len(flavors) > 0, 'Num of flavors > 0.')
|
||||
|
||||
LOG.info("sent launcher_process pid: %r signal: %r" % (self.pid, sig))
|
||||
os.kill(self.pid, sig)
|
||||
|
||||
self._wait_for_all_workers_to_end()
|
||||
|
||||
def _wait_for_all_workers_to_end(self):
|
||||
worker_pids = self._get_workers()
|
||||
|
||||
# did you know the test framework has a timeout of its own?
|
||||
# if a test takes too long, the test will be killed.
|
||||
for pid in worker_pids:
|
||||
self.wait_on_process_until_end(pid)
|
||||
|
||||
workers = self._get_workers()
|
||||
self.assertFalse(workers, 'OS processes left %r' % workers)
|
||||
|
||||
def test_terminate_sigkill(self):
|
||||
self._terminate_with_signal(signal.SIGKILL)
|
||||
status = self._reap_test()
|
||||
self.assertTrue(os.WIFSIGNALED(status))
|
||||
self.assertEqual(os.WTERMSIG(status), signal.SIGKILL)
|
||||
|
||||
def test_terminate_sigterm(self):
|
||||
self._terminate_with_signal(signal.SIGTERM)
|
||||
status = self._reap_test()
|
||||
self.assertTrue(os.WIFEXITED(status))
|
||||
self.assertEqual(os.WEXITSTATUS(status), 0)
|
||||
|
||||
def test_restart_sighup(self):
|
||||
start_workers = self._spawn()
|
||||
|
||||
os.kill(self.pid, signal.SIGHUP)
|
||||
|
||||
# Wait at most 5 seconds to restart a worker
|
||||
cond = lambda: start_workers == self._get_workers()
|
||||
timeout = 5
|
||||
self._wait(cond, timeout)
|
||||
|
||||
# Make sure worker pids match
|
||||
end_workers = self._get_workers()
|
||||
self.assertEqual(start_workers, end_workers)
|
||||
|
||||
|
||||
class MultiprocessWSGITestV3(client.TestOpenStackClientV3Mixin,
|
||||
MultiprocessWSGITest):
|
||||
_api_version = 'v3'
|
Loading…
Reference in New Issue