diff --git a/kuryr_kubernetes/k8s_client.py b/kuryr_kubernetes/k8s_client.py index aed047d0d..98127f947 100644 --- a/kuryr_kubernetes/k8s_client.py +++ b/kuryr_kubernetes/k8s_client.py @@ -33,10 +33,6 @@ from kuryr_kubernetes import utils CONF = config.CONF LOG = logging.getLogger(__name__) -# Hardcoding 60 seconds as I don't see a scenario when we want to wait more -# than a minute for reconnection. -MAX_BACKOFF = 60 - class K8sClient(object): # REVISIT(ivc): replace with python-k8sclient if it could be extended @@ -298,8 +294,7 @@ class K8sClient(object): resource_version = m.get('resourceVersion', None) except (requests.ReadTimeout, requests.ConnectionError, ssl.SSLError, requests.exceptions.ChunkedEncodingError): - t = utils.exponential_backoff(attempt, min_backoff=0, - max_backoff=MAX_BACKOFF) + t = utils.exponential_backoff(attempt) log = LOG.debug if attempt > 0: # Only make it a warning if it's happening again, no need diff --git a/kuryr_kubernetes/tests/unit/handlers/test_retry.py b/kuryr_kubernetes/tests/unit/handlers/test_retry.py index 3f177f8d1..d9470fbe4 100644 --- a/kuryr_kubernetes/tests/unit/handlers/test_retry.py +++ b/kuryr_kubernetes/tests/unit/handlers/test_retry.py @@ -50,9 +50,8 @@ class TestRetryHandler(test_base.TestCase): 'kuryr_kubernetes.clients.get_kubernetes_client')) f_k8s.mock.return_value = self.k8s - @mock.patch('random.randint') @mock.patch('time.sleep') - def test_should_not_sleep(self, m_sleep, m_randint): + def test_should_not_sleep(self, m_sleep): deadline = self.now - 1 retry = h_retry.Retry(mock.Mock()) @@ -60,28 +59,25 @@ class TestRetryHandler(test_base.TestCase): self.assertFalse(ret) m_sleep.assert_not_called() - m_randint.assert_not_called() def _test_should_sleep(self, seconds_left, slept): - attempt = 5 + attempt = 2 timeout = 20 interval = 3 - randint = 2 deadline = self.now + seconds_left retry = h_retry.Retry(mock.Mock(), timeout=timeout, interval=interval) with mock.patch('random.randint') as m_randint, \ mock.patch('time.sleep') as m_sleep: - m_randint.return_value = randint + m_randint.return_value = 0 # Assume 0 as jitter ret = retry._sleep(deadline, attempt, _EX2()) self.assertEqual(slept, ret) - m_randint.assert_called_once_with(1, 2 ** attempt - 1) m_sleep.assert_called_once_with(slept) def test_should_sleep(self): - self._test_should_sleep(7, 6) + self._test_should_sleep(20, 12) def test_should_sleep_last(self): self._test_should_sleep(5, 5) diff --git a/kuryr_kubernetes/utils.py b/kuryr_kubernetes/utils.py index ab5b912f8..afc975a22 100644 --- a/kuryr_kubernetes/utils.py +++ b/kuryr_kubernetes/utils.py @@ -42,7 +42,9 @@ VALID_MULTI_POD_POOLS_OPTS = {'noop': ['neutron-vif', 'nested': ['nested-vlan'], } DEFAULT_TIMEOUT = 500 -DEFAULT_INTERVAL = 3 +DEFAULT_INTERVAL = 1 +DEFAULT_JITTER = 3 +MAX_BACKOFF = 60 MAX_ATTEMPTS = 10 subnet_caching_opts = [ @@ -110,18 +112,15 @@ def check_suitable_multi_pool_driver_opt(pool_driver, pod_driver): return pod_driver in VALID_MULTI_POD_POOLS_OPTS.get(pool_driver, []) -def exponential_sleep(deadline, attempt, interval=DEFAULT_INTERVAL): +def exponential_sleep(deadline, attempt, interval=DEFAULT_INTERVAL, + max_backoff=MAX_BACKOFF, jitter=DEFAULT_JITTER): """Sleep for exponential duration. - This implements a variation of exponential backoff algorithm [1] and - ensures that there is a minimal time `interval` to sleep. - (expected backoff E(c) = interval * 2 ** c / 2). - - [1] https://en.wikipedia.org/wiki/Exponential_backoff - :param deadline: sleep timeout duration in seconds. :param attempt: attempt count of sleep function. :param interval: minimal time interval to sleep + :param max_backoff: maximum time to sleep + :param jitter: max value of jitter added to the sleep time :return: the actual time that we've slept """ now = time.time() @@ -130,7 +129,8 @@ def exponential_sleep(deadline, attempt, interval=DEFAULT_INTERVAL): if seconds_left <= 0: return 0 - to_sleep = exponential_backoff(attempt, interval) + to_sleep = exponential_backoff(attempt, interval, max_backoff=max_backoff, + jitter=jitter) if to_sleep > seconds_left: to_sleep = seconds_left @@ -142,17 +142,28 @@ def exponential_sleep(deadline, attempt, interval=DEFAULT_INTERVAL): return to_sleep -def exponential_backoff(attempt, interval=DEFAULT_INTERVAL, min_backoff=1, - max_backoff=None): +def exponential_backoff(attempt, interval=DEFAULT_INTERVAL, + max_backoff=MAX_BACKOFF, jitter=DEFAULT_JITTER): + """Return exponential backoff duration with jitter. + + This implements a variation of exponential backoff algorithm [1] (expected + backoff E(c) = interval * 2 ** attempt / 2). + + [1] https://en.wikipedia.org/wiki/Exponential_backoff + """ + if attempt >= MAX_ATTEMPTS: # No need to calculate very long intervals attempt = MAX_ATTEMPTS - backoff = random.randint(min_backoff, 2 ** attempt - 1) * interval + backoff = 2 ** attempt * interval if max_backoff is not None and backoff > max_backoff: backoff = max_backoff + if jitter: + backoff += random.randint(0, jitter) + return backoff