Add backoff to ebtables retry
We need a backoff between ebtables retries. In some tempest tests we have seen the retries complete in 100ms and still fail. We now sleep for ebtables_retry_interval * loop count seconds. With a default of 1.0 this means by default we sleep for 1.0s, 2.0s, and 3.0s before we finally giving up. Change-Id: I0b9b664a592364bedd11124a1ec921d8ea011704 Partial-Bug: #1316621
This commit is contained in:
parent
e048e5700c
commit
4f418727f7
|
@ -21,6 +21,7 @@ import calendar
|
|||
import inspect
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
|
||||
import netaddr
|
||||
from oslo.config import cfg
|
||||
|
@ -130,6 +131,9 @@ linux_net_opts = [
|
|||
cfg.IntOpt('ebtables_exec_attempts',
|
||||
default=3,
|
||||
help='Number of times to retry ebtables commands on failure.'),
|
||||
cfg.FloatOpt('ebtables_retry_interval',
|
||||
default=1.0,
|
||||
help='Number of seconds to wait between ebtables retries.'),
|
||||
]
|
||||
|
||||
CONF = cfg.CONF
|
||||
|
@ -1665,8 +1669,11 @@ def _exec_ebtables(*cmd, **kwargs):
|
|||
attempts = CONF.ebtables_exec_attempts
|
||||
if attempts <= 0:
|
||||
attempts = 1
|
||||
while attempts > 0:
|
||||
attempts -= 1
|
||||
count = 1
|
||||
while count <= attempts:
|
||||
# Updated our counters if needed
|
||||
sleep = CONF.ebtables_retry_interval * count
|
||||
count += 1
|
||||
# NOTE(cfb): ebtables reports all errors with a return code of 255.
|
||||
# As such we can't know if we hit a locking error, or some
|
||||
# other error (like a rule doesn't exist) so we have to
|
||||
|
@ -1674,11 +1681,15 @@ def _exec_ebtables(*cmd, **kwargs):
|
|||
try:
|
||||
_execute(*cmd, check_exit_code=[0], **kwargs)
|
||||
except processutils.ProcessExecutionError:
|
||||
if not attempts and check_exit_code:
|
||||
if count > attempts and check_exit_code:
|
||||
LOG.warning(_LW('%s failed. Not Retrying.'), ' '.join(cmd))
|
||||
raise
|
||||
else:
|
||||
LOG.warning(_LW('%s failed. Retrying.'), ' '.join(cmd))
|
||||
# We need to sleep a bit before retrying
|
||||
LOG.warning(_LW("%(cmd)s failed. Sleeping %(time)s seconds "
|
||||
"before retry."),
|
||||
{'cmd': ' '.join(cmd), 'time': sleep})
|
||||
time.sleep(sleep)
|
||||
else:
|
||||
# Success
|
||||
return
|
||||
|
|
|
@ -17,6 +17,7 @@ import calendar
|
|||
import contextlib
|
||||
import datetime
|
||||
import os
|
||||
import time
|
||||
|
||||
import mock
|
||||
from mox3 import mox
|
||||
|
@ -1192,10 +1193,14 @@ class LinuxNetworkTestCase(test.NoDBTestCase):
|
|||
def test_exec_ebtables_fail_all(self):
|
||||
executes = []
|
||||
|
||||
def fake_sleep(interval):
|
||||
pass
|
||||
|
||||
def fake_execute(*args, **kwargs):
|
||||
executes.append(args)
|
||||
raise processutils.ProcessExecutionError('error')
|
||||
|
||||
self.stubs.Set(time, 'sleep', fake_sleep)
|
||||
self.stubs.Set(self.driver, '_execute', fake_execute)
|
||||
self.assertRaises(processutils.ProcessExecutionError,
|
||||
self.driver._exec_ebtables, 'fake')
|
||||
|
@ -1206,6 +1211,9 @@ class LinuxNetworkTestCase(test.NoDBTestCase):
|
|||
def test_exec_ebtables_fail_once(self):
|
||||
executes = []
|
||||
|
||||
def fake_sleep(interval):
|
||||
pass
|
||||
|
||||
def fake_execute(*args, **kwargs):
|
||||
executes.append(args)
|
||||
if len(executes) == 1:
|
||||
|
@ -1213,6 +1221,7 @@ class LinuxNetworkTestCase(test.NoDBTestCase):
|
|||
else:
|
||||
return "", ""
|
||||
|
||||
self.stubs.Set(time, 'sleep', fake_sleep)
|
||||
self.stubs.Set(self.driver, '_execute', fake_execute)
|
||||
self.driver._exec_ebtables('fake')
|
||||
self.assertEqual(2, len(executes))
|
||||
|
|
Loading…
Reference in New Issue