Rewrite heartbeat runner with event

The current implementation uses an event attached to the Heart class to detect
if the thread is done. Since the thread is a daemon, it might be killed after
the interpreter is done, especially if an application is killed before calling
stop() on the coordinator. This results in the following backtrace:

Exception in thread Thread-1 (most likely raised during interpreter shutdown):
Traceback (most recent call last):
  File "/usr/lib/python2.7/threading.py", line 801, in __bootstrap_inner
  File "/usr/lib/python2.7/threading.py", line 754, in run
  File "/usr/local/lib/python2.7/dist-packages/oslo_utils/excutils.py", line 250, in wrapper
  File "/usr/local/lib/python2.7/dist-packages/tooz/coordination.py", line 206, in _beat_forever_until_stopped
  File "/usr/lib/python2.7/threading.py", line 585, in set
  File "/usr/lib/python2.7/threading.py", line 407, in notifyAll
<type 'exceptions.TypeError'>: 'NoneType' object is not callable

This patches simplifies the heart class by using the thread status rather than
an event to check if it's alive and wait for it to stop.

Change-Id: I03ccbefd3d68e9dd85ea23a3840d606d1ffc80bb
(cherry picked from commit 3253241376)
This commit is contained in:
Julien Danjou 2017-02-27 13:55:40 +01:00 committed by Pradeep Kilambi
parent 1b5b21e7c0
commit bc11f09b83
1 changed files with 21 additions and 28 deletions

View File

@ -150,8 +150,6 @@ class Heart(object):
event_cls=threading.Event):
self._thread_cls = thread_cls
self._dead = event_cls()
self._finished = event_cls()
self._finished.set()
self._runner = None
self._driver = driver
self._beats = 0
@ -164,38 +162,33 @@ class Heart(object):
def is_alive(self):
"""Returns if the heart is beating."""
return not (self._runner is None
or not self._runner.is_alive()
or self._finished.is_set())
or not self._runner.is_alive())
@excutils.forever_retry_uncaught_exceptions
def _beat_forever_until_stopped(self):
"""Inner beating loop."""
try:
while not self._dead.is_set():
with timeutils.StopWatch() as w:
wait_until_next_beat = self._driver.heartbeat()
ran_for = w.elapsed()
if ran_for > wait_until_next_beat:
LOG.warning(
"Heartbeating took too long to execute (it ran for"
" %0.2f seconds which is %0.2f seconds longer than"
" the next heartbeat idle time). This may cause"
" timeouts (in locks, leadership, ...) to"
" happen (which will not end well).", ran_for,
ran_for - wait_until_next_beat)
self._beats += 1
# NOTE(harlowja): use the event object for waiting and
# not a sleep function since doing that will allow this code
# to terminate early if stopped via the stop() method vs
# having to wait until the sleep function returns.
self._dead.wait(wait_until_next_beat)
finally:
self._finished.set()
while not self._dead.is_set():
with timeutils.StopWatch() as w:
wait_until_next_beat = self._driver.heartbeat()
ran_for = w.elapsed()
if ran_for > wait_until_next_beat:
LOG.warning(
"Heartbeating took too long to execute (it ran for"
" %0.2f seconds which is %0.2f seconds longer than"
" the next heartbeat idle time). This may cause"
" timeouts (in locks, leadership, ...) to"
" happen (which will not end well).", ran_for,
ran_for - wait_until_next_beat)
self._beats += 1
# NOTE(harlowja): use the event object for waiting and
# not a sleep function since doing that will allow this code
# to terminate early if stopped via the stop() method vs
# having to wait until the sleep function returns.
self._dead.wait(wait_until_next_beat)
def start(self, thread_cls=None):
"""Starts the heart beating thread (noop if already started)."""
if not self.is_alive():
self._finished.clear()
self._dead.clear()
self._beats = 0
if thread_cls is None:
@ -210,8 +203,8 @@ class Heart(object):
def wait(self, timeout=None):
"""Wait up to given timeout for the heart beating thread to stop."""
self._finished.wait(timeout)
return self._finished.is_set()
self._runner.join(timeout)
return self._runner.is_alive()
@six.add_metaclass(abc.ABCMeta)