Reschedule failed periodic tasks after a short delay

When testing Ironic with Futurist I found out that whe number of tasks
seriously exceed number of possible workers, the conductor goes into loop
retrying them and mostly stops reacting. Insert a short random delay before
retrying, so that the executor has chances of processing tasks before we
through more on it. Also recalculate the "now" value, as submitting a task
could take some time.

Change-Id: I8e45ddf7c87cf130028fc9fe937691968db17ee9
This commit is contained in:
Dmitry Tantsur 2016-02-17 16:55:31 +01:00
parent cc7bc7017a
commit f9f685e51d
2 changed files with 14 additions and 4 deletions

View File

@ -341,6 +341,10 @@ class PeriodicWorker(object):
'successes': 0,
}
# When scheduling fails temporary, use a random delay between 0.9-1.1 sec.
_RESCHEDULE_DELAY = 0.9
_RESCHEDULE_JITTER = 0.2
DEFAULT_JITTER = fractions.Fraction(5, 100)
"""
Default jitter percentage the built-in strategies (that have jitter
@ -610,11 +614,15 @@ class PeriodicWorker(object):
self._now_func,
cb, *args, **kwargs)
except _SCHEDULE_RETRY_EXCEPTIONS as exc:
# Restart after a short delay
delay = (self._RESCHEDULE_DELAY +
random().random() * self._RESCHEDULE_JITTER)
self._log.error("Failed to submit periodic function "
"%s, retrying. Error: %s",
cb_name, exc)
# Restart as soon as possible
self._schedule.push(now, index)
"%s, retrying after %.2f sec. "
"Error: %s",
cb_name, delay, exc)
self._schedule.push(self._now_func() + delay,
index)
else:
barrier.incr()
fut.add_done_callback(functools.partial(_on_done,

View File

@ -375,6 +375,8 @@ class TestRetrySubmission(base.TestCase):
executor_factory=RejectingExecutor,
cond_cls=green_threading.Condition,
event_cls=green_threading.Event)
w._RESCHEDULE_DELAY = 0
w._RESCHEDULE_JITTER = 0
with create_destroy_green_thread(w.start):
eventlet.sleep(2.0)
w.stop()