Adds retry function to HNAS driver

Multiple requests sent to HNAS can cause concurrency problems
and that ends up with 'SSC failed connection' errors. This
patch adds a retry decorator to _execute in HDS HNAS Manila
driver to fix this problem.

The current retry functionality in Manila always uses fixed
numbers to define the time to wait before performing the next
attempts. This behavior can make the retries of multiple
requests to collide in each attempt as they will wait the
same amount of time and try to use the same resource together
again. So additionally, this patch changes the behavior of
manila.utils.retry() to receive a parameter that allows the
function to implement randomly generated wait intervals.

Change-Id: Ib862f62517fcc8816781204b902119e9b20121e0
Closes-bug: 1491550
This commit is contained in:
Alyson Rosa 2015-09-02 13:28:30 -03:00
parent 6977b0196d
commit 8dc3863e10
5 changed files with 113 additions and 24 deletions

View File

@ -659,6 +659,10 @@ class HNASBackendException(ManilaException):
message = _("HNAS Backend Exception: %(msg)s")
class HNASConnException(ManilaException):
message = _("HNAS Connection Exception: %(msg)s")
# ConsistencyGroup
class ConsistencyGroupNotFound(NotFound):
message = _("ConsistencyGroup %(consistency_group_id)s could not be "

View File

@ -414,6 +414,7 @@ class HNASSSHBackend(object):
msg = (_("Share %s was not created.") % share['id'])
raise exception.HNASBackendException(msg=msg)
@mutils.retry(exception=exception.HNASConnException, wait_random=True)
def _execute(self, commands):
command = ['ssc', '127.0.0.1']
if self.admin_ip0 is not None:
@ -442,13 +443,19 @@ class HNASSSHBackend(object):
'out': out, 'err': err})
return out, err
except processutils.ProcessExecutionError as e:
LOG.debug("Command %(cmd)s result: out = %(out)s - err = "
"%(err)s - exit = %(exit)s.", {'cmd': e.cmd,
'out': e.stdout,
'err': e.stderr,
'exit': e.exit_code})
LOG.error(_LE("Error running SSH command."))
raise
if 'Failed to establish SSC connection' in e.stderr:
LOG.debug("SSC connection error!")
msg = _("Failed to establish SSC connection.")
raise exception.HNASConnException(msg=msg)
else:
LOG.debug("Command %(cmd)s result: out = %(out)s - err = "
"%(err)s - exit = %(exit)s.", {'cmd': e.cmd,
'out': e.stdout,
'err': e.stderr,
'exit':
e.exit_code})
LOG.error(_LE("Error running SSH command."))
raise
def _check_fs_mounted(self, fs_name):
self._check_fs()

View File

@ -23,6 +23,7 @@ import paramiko
from manila import exception
from manila.share.drivers.hitachi import ssh
from manila import test
from manila import utils
CONF = cfg.CONF
@ -69,7 +70,6 @@ file_system 1055 fake_span Umount 2 4 5
file_system2 1050 fake_span2 NoEVS - 10 0 1
fake_fs 1051 fake_span Umount 2 100 1024 """
HNAS_RESULT_one_fs = """ \
Instance name Dev On span State EVS Cap/GiB Confined Flag
----------------- ---- ------- ------ --- ------- -------- ----
@ -1170,6 +1170,31 @@ class HNASSSHTestCase(test.TestCase):
port=self.port)
self.assertIn('Request submitted successfully.', output)
def test__execute_retry(self):
commands = ['tree-clone-job-submit', '-e', '/src', '/dst']
concat_command = ('ssc --smuauth fake console-context --evs 2 '
'tree-clone-job-submit -e /src /dst')
msg = 'Failed to establish SSC connection'
item_mock = mock.Mock()
self.mock_object(utils.pools.Pool, 'item',
mock.Mock(return_value=item_mock))
setattr(item_mock, '__enter__', mock.Mock())
setattr(item_mock, '__exit__', mock.Mock())
self.mock_object(paramiko.SSHClient, 'connect')
# testing retrying 3 times
self.mock_object(putils, 'ssh_execute', mock.Mock(
side_effect=[putils.ProcessExecutionError(stderr=msg),
putils.ProcessExecutionError(stderr=msg),
putils.ProcessExecutionError(stderr=msg),
(HNAS_RESULT_job, '')]))
self._driver._execute(commands)
putils.ssh_execute.assert_called_with(mock.ANY, concat_command,
check_exit_code=True)
def test__execute_ssh_exception(self):
key = self.ssh_private_key
commands = ['tree-clone-job-submit', '-e', '/src', '/dst']
@ -1177,7 +1202,8 @@ class HNASSSHTestCase(test.TestCase):
'tree-clone-job-submit -e /src /dst')
self.mock_object(paramiko.SSHClient, 'connect')
self.mock_object(putils, 'ssh_execute',
mock.Mock(side_effect=putils.ProcessExecutionError))
mock.Mock(side_effect=putils.ProcessExecutionError
(stderr='Error')))
self.assertRaises(putils.ProcessExecutionError,
self._driver._execute, commands)

View File

@ -694,6 +694,49 @@ class TestRetryDecorator(test.TestCase):
self.assertEqual('success', ret)
self.assertEqual(1, self.counter)
def test_no_retry_required_random(self):
self.counter = 0
with mock.patch.object(time, 'sleep') as mock_sleep:
@utils.retry(exception.ManilaException,
interval=2,
retries=3,
backoff_rate=2,
wait_random=True)
def succeeds():
self.counter += 1
return 'success'
ret = succeeds()
self.assertFalse(mock_sleep.called)
self.assertEqual('success', ret)
self.assertEqual(1, self.counter)
def test_retries_once_random(self):
self.counter = 0
interval = 2
backoff_rate = 2
retries = 3
with mock.patch.object(time, 'sleep') as mock_sleep:
@utils.retry(exception.ManilaException,
interval,
retries,
backoff_rate,
wait_random=True)
def fails_once():
self.counter += 1
if self.counter < 2:
raise exception.ManilaException(data='fake')
else:
return 'success'
ret = fails_once()
self.assertEqual('success', ret)
self.assertEqual(2, self.counter)
self.assertEqual(1, mock_sleep.call_count)
self.assertTrue(mock_sleep.called)
def test_retries_once(self):
self.counter = 0
interval = 2

View File

@ -22,6 +22,7 @@ import errno
import inspect
import os
import pyclbr
import random
import re
import shutil
import socket
@ -540,23 +541,25 @@ class ComparableMixin(object):
return self._compare(other, lambda s, o: s != o)
def retry(exception, interval=1, retries=10, backoff_rate=2):
def retry(exception, interval=1, retries=10, backoff_rate=2,
wait_random=False):
"""A wrapper around retrying library.
This decorator allows to log and to check 'retries' input param.
Time interval between retries is calculated in the following way:
interval * backoff_rate ^ previous_attempt_number
This decorator allows to log and to check 'retries' input param.
Time interval between retries is calculated in the following way:
interval * backoff_rate ^ previous_attempt_number
:param exception: expected exception type. When wrapped function
raises an exception of this type,the function
execution is retried.
:param interval: param 'interval' is used to calculate time interval
between retries:
:param exception: expected exception type. When wrapped function
raises an exception of this type, the function
execution is retried.
:param interval: param 'interval' is used to calculate time interval
between retries:
interval * backoff_rate ^ previous_attempt_number
:param retries: number of retries.
:param backoff_rate: param 'backoff_rate' is used to calculate time
interval between retries:
interval * backoff_rate ^ previous_attempt_number
:param retries: number of retries
:param backoff_rate: param 'backoff_rate' is used to calculate time
interval between retries:
interval * backoff_rate ^ previous_attempt_number
:param wait_random: boolean value to enable retry with random wait timer.
"""
def _retry_on_exception(e):
@ -565,8 +568,14 @@ def retry(exception, interval=1, retries=10, backoff_rate=2):
def _backoff_sleep(previous_attempt_number, delay_since_first_attempt_ms):
exp = backoff_rate ** previous_attempt_number
wait_for = max(0, interval * exp)
LOG.debug("Sleeping for %s seconds", wait_for)
return wait_for * 1000.0
if wait_random:
wait_val = random.randrange(interval * 1000.0, wait_for * 1000.0)
else:
wait_val = wait_for * 1000.0
LOG.debug("Sleeping for %s seconds.", (wait_val / 1000.0))
return wait_val
def _print_stop(previous_attempt_number, delay_since_first_attempt_ms):
delay_since_first_attempt = delay_since_first_attempt_ms / 1000.0