Add backoff to ebtables retry

We need a backoff between ebtables retries. In some tempest tests we
have seen the retries complete in 100ms and still fail.

We now sleep for ebtables_retry_interval * loop count seconds. With
a default of 1.0 this means by default we sleep for 1.0s, 2.0s, and
3.0s before we finally giving up.

Change-Id: I0b9b664a592364bedd11124a1ec921d8ea011704
Partial-Bug: #1316621
This commit is contained in:
Chet Burgess 2014-12-09 14:51:40 -08:00
parent e048e5700c
commit 4f418727f7
2 changed files with 24 additions and 4 deletions

View File

@ -21,6 +21,7 @@ import calendar
import inspect
import os
import re
import time
import netaddr
from oslo.config import cfg
@ -130,6 +131,9 @@ linux_net_opts = [
cfg.IntOpt('ebtables_exec_attempts',
default=3,
help='Number of times to retry ebtables commands on failure.'),
cfg.FloatOpt('ebtables_retry_interval',
default=1.0,
help='Number of seconds to wait between ebtables retries.'),
]
CONF = cfg.CONF
@ -1665,8 +1669,11 @@ def _exec_ebtables(*cmd, **kwargs):
attempts = CONF.ebtables_exec_attempts
if attempts <= 0:
attempts = 1
while attempts > 0:
attempts -= 1
count = 1
while count <= attempts:
# Updated our counters if needed
sleep = CONF.ebtables_retry_interval * count
count += 1
# NOTE(cfb): ebtables reports all errors with a return code of 255.
# As such we can't know if we hit a locking error, or some
# other error (like a rule doesn't exist) so we have to
@ -1674,11 +1681,15 @@ def _exec_ebtables(*cmd, **kwargs):
try:
_execute(*cmd, check_exit_code=[0], **kwargs)
except processutils.ProcessExecutionError:
if not attempts and check_exit_code:
if count > attempts and check_exit_code:
LOG.warning(_LW('%s failed. Not Retrying.'), ' '.join(cmd))
raise
else:
LOG.warning(_LW('%s failed. Retrying.'), ' '.join(cmd))
# We need to sleep a bit before retrying
LOG.warning(_LW("%(cmd)s failed. Sleeping %(time)s seconds "
"before retry."),
{'cmd': ' '.join(cmd), 'time': sleep})
time.sleep(sleep)
else:
# Success
return

View File

@ -17,6 +17,7 @@ import calendar
import contextlib
import datetime
import os
import time
import mock
from mox3 import mox
@ -1192,10 +1193,14 @@ class LinuxNetworkTestCase(test.NoDBTestCase):
def test_exec_ebtables_fail_all(self):
executes = []
def fake_sleep(interval):
pass
def fake_execute(*args, **kwargs):
executes.append(args)
raise processutils.ProcessExecutionError('error')
self.stubs.Set(time, 'sleep', fake_sleep)
self.stubs.Set(self.driver, '_execute', fake_execute)
self.assertRaises(processutils.ProcessExecutionError,
self.driver._exec_ebtables, 'fake')
@ -1206,6 +1211,9 @@ class LinuxNetworkTestCase(test.NoDBTestCase):
def test_exec_ebtables_fail_once(self):
executes = []
def fake_sleep(interval):
pass
def fake_execute(*args, **kwargs):
executes.append(args)
if len(executes) == 1:
@ -1213,6 +1221,7 @@ class LinuxNetworkTestCase(test.NoDBTestCase):
else:
return "", ""
self.stubs.Set(time, 'sleep', fake_sleep)
self.stubs.Set(self.driver, '_execute', fake_execute)
self.driver._exec_ebtables('fake')
self.assertEqual(2, len(executes))