Improve exponential backoff for wrap_db_retry

The @oslo_db.api.wrap_db_retry is used for db.api methods
retrying. If db_error raised, the decorator help us to call
the api methods again after a few seconds(accurately, is
2**retry_times seconds).

If the db_error is deadlock error, the old wrap_db_retry
is not so suitable anymore. As we know, some deadlocks cause
because we call some methods(transactions) concurrently. If
we only retry after stable 2**retry_times seconds, we will
recall the method concurrently again.

In order to minimize the chance of regenerating a deadlock
and reduce the average sleep time, we propose to add some
random jitter to the delay period by default when the
deadlock error is detected.

Change-Id: I206745708570f1f292529ff58eee9b83fc09a9f2
Closes-bug: #1737869
This commit is contained in:
Yikun Jiang 2018-03-29 18:29:17 +08:00
parent 85cf42e841
commit 4c20534179
2 changed files with 90 additions and 7 deletions

View File

@ -24,6 +24,7 @@ API methods.
"""
import logging
import random
import threading
import time
@ -103,15 +104,21 @@ class wrap_db_retry(object):
:param exception_checker: checks if an exception should trigger a retry
:type exception_checker: callable
:param jitter: determine increase retry interval use jitter or not, jitter
is always interpreted as True for a DBDeadlockError
:type jitter: bool
"""
def __init__(self, retry_interval=1, max_retries=20,
inc_retry_interval=True,
max_retry_interval=10, retry_on_disconnect=False,
retry_on_deadlock=False,
exception_checker=lambda exc: False):
exception_checker=lambda exc: False,
jitter=False):
super(wrap_db_retry, self).__init__()
self.jitter = jitter
self.db_error = (exception.RetryRequest, )
# default is that we re-raise anything unexpected
self.exception_checker = exception_checker
@ -127,7 +134,7 @@ class wrap_db_retry(object):
def __call__(self, f):
@six.wraps(f)
def wrapper(*args, **kwargs):
next_interval = self.retry_interval
sleep_time = next_interval = self.retry_interval
remaining = self.max_retries
while True:
@ -150,12 +157,20 @@ class wrap_db_retry(object):
# NOTE(vsergeyev): We are using patched time module, so
# this effectively yields the execution
# context to another green thread.
time.sleep(next_interval)
time.sleep(sleep_time)
if self.inc_retry_interval:
next_interval = min(
next_interval * 2,
self.max_retry_interval
)
# NOTE(jiangyikun): In order to minimize the chance of
# regenerating a deadlock and reduce the average sleep
# time, we are using jitter by default when the
# deadlock is detected. With the jitter,
# sleep_time = [0, next_interval), otherwise, without
# the jitter, sleep_time = next_interval.
if isinstance(e, exception.DBDeadlock):
jitter = True
else:
jitter = self.jitter
sleep_time, next_interval = self._get_inc_interval(
next_interval, jitter)
remaining -= 1
return wrapper
@ -170,6 +185,18 @@ class wrap_db_retry(object):
return True
return self.exception_checker(exc)
def _get_inc_interval(self, n, jitter):
# NOTE(jiangyikun): The "n" help us to record the 2 ** retry_times.
# The "sleep_time" means the real time to sleep:
# - Without jitter: sleep_time = 2 ** retry_times = n
# - With jitter: sleep_time = [0, 2 ** retry_times) < n
n = n * 2
if jitter:
sleep_time = random.uniform(0, n)
else:
sleep_time = n
return min(sleep_time, self.max_retry_interval), n
class DBAPI(object):
"""Initialize the chosen DB API backend.

View File

@ -253,3 +253,59 @@ class DBRetryRequestCase(DBAPITestCase):
self.assertRaises(AttributeError, some_method)
self.assertFalse(mock_log.called)
@mock.patch('oslo_db.api.time.sleep', return_value=None)
def test_retry_wrapper_deadlock(self, mock_sleep):
# Tests that jitter is False, if the retry wrapper hits a
# non-deadlock error
@api.wrap_db_retry(max_retries=1, retry_on_deadlock=True)
def some_method_no_deadlock():
raise exception.RetryRequest(ValueError())
with mock.patch(
'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get:
mock_get.return_value = 2, 2
self.assertRaises(ValueError, some_method_no_deadlock)
mock_get.assert_called_once_with(1, False)
# Tests that jitter is True, if the retry wrapper hits a deadlock
# error.
@api.wrap_db_retry(max_retries=1, retry_on_deadlock=True)
def some_method_deadlock():
raise exception.DBDeadlock('test')
with mock.patch(
'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get:
mock_get.return_value = 0.1, 2
self.assertRaises(exception.DBDeadlock, some_method_deadlock)
mock_get.assert_called_once_with(1, True)
# Tests that jitter is True, if the jitter is enable by user
@api.wrap_db_retry(max_retries=1, retry_on_deadlock=True, jitter=True)
def some_method_no_deadlock_exp():
raise exception.RetryRequest(ValueError())
with mock.patch(
'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get:
mock_get.return_value = 0.1, 2
self.assertRaises(ValueError, some_method_no_deadlock_exp)
mock_get.assert_called_once_with(1, True)
def test_wrap_db_retry_get_interval(self):
x = api.wrap_db_retry(max_retries=5, retry_on_deadlock=True,
max_retry_interval=11)
self.assertEqual(11, x.max_retry_interval)
for i in (1, 2, 4):
# With jitter: sleep_time = [0, 2 ** retry_times)
sleep_time, n = x._get_inc_interval(i, True)
self.assertEqual(2 * i, n)
self.assertTrue(2 * i > sleep_time)
# Without jitter: sleep_time = 2 ** retry_times
sleep_time, n = x._get_inc_interval(i, False)
self.assertEqual(2 * i, n)
self.assertEqual(2 * i, sleep_time)
for i in (8, 16, 32):
sleep_time, n = x._get_inc_interval(i, False)
self.assertEqual(x.max_retry_interval, sleep_time)
self.assertEqual(2 * i, n)
sleep_time, n = x._get_inc_interval(i, True)
self.assertTrue(x.max_retry_interval >= sleep_time)
self.assertEqual(2 * i, n)