Merge "Retry on ilo state error"
This commit is contained in:
commit
f03d77ebcb
|
@ -0,0 +1,10 @@
|
||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
An issue was encountered on some HPE iLO supported machines where the
|
||||||
|
Baseboard Management Controller would respond with a HTTP 400 error
|
||||||
|
and an error message indicating the requested operation was invalid
|
||||||
|
for the then system state. For example, attempting to change the power
|
||||||
|
state via the BMC shortly after previously changing the power state.
|
||||||
|
We now attempt to retry within the permitted number of retries when
|
||||||
|
this error is encountered.
|
|
@ -94,12 +94,17 @@ class Connector(object):
|
||||||
|
|
||||||
def check_retry_on_exception(self, exception_msg):
|
def check_retry_on_exception(self, exception_msg):
|
||||||
"""Checks whether retry on exception is required."""
|
"""Checks whether retry on exception is required."""
|
||||||
if ('SYS518' in str(exception_msg)):
|
retry = False
|
||||||
|
exc_str = str(exception_msg)
|
||||||
|
if 'SYS518' in exc_str:
|
||||||
LOG.debug('iDRAC is not yet ready after previous operation. '
|
LOG.debug('iDRAC is not yet ready after previous operation. '
|
||||||
'Error: %(err)s', {'err': str(exception_msg)})
|
'Error: %(err)s', {'err': exc_str})
|
||||||
return True
|
retry = True
|
||||||
else:
|
elif 'iLO.2.15.InvalidOperationForSystemState' in exc_str:
|
||||||
return False
|
LOG.debug('iLO is not ready after previous operation. '
|
||||||
|
'Error: %(error)s', {'err': exc_str})
|
||||||
|
retry = True
|
||||||
|
return retry
|
||||||
|
|
||||||
def _op(self, method, path='', data=None, headers=None, blocking=False,
|
def _op(self, method, path='', data=None, headers=None, blocking=False,
|
||||||
timeout=60, server_side_retries_left=None,
|
timeout=60, server_side_retries_left=None,
|
||||||
|
@ -226,7 +231,7 @@ class Connector(object):
|
||||||
or self.check_retry_on_exception(e.message))
|
or self.check_retry_on_exception(e.message))
|
||||||
and server_side_retries_left > 0):
|
and server_side_retries_left > 0):
|
||||||
LOG.warning('Got server side error %s in response to a '
|
LOG.warning('Got server side error %s in response to a '
|
||||||
'GET request, retrying after %d seconds. Retries '
|
'request, retrying after %d seconds. Retries '
|
||||||
'left %d.',
|
'left %d.',
|
||||||
e, self._server_side_retries_delay,
|
e, self._server_side_retries_delay,
|
||||||
server_side_retries_left)
|
server_side_retries_left)
|
||||||
|
@ -239,7 +244,25 @@ class Connector(object):
|
||||||
**extra_session_req_kwargs)
|
**extra_session_req_kwargs)
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
except exceptions.BadRequestError as e:
|
||||||
|
if (method.lower() != 'get'
|
||||||
|
and self.check_retry_on_exception(e.message)
|
||||||
|
and server_side_retries_left > 0):
|
||||||
|
LOG.warning('Server has indicated a BadRequest for %s but '
|
||||||
|
'the response payload is a known retriable '
|
||||||
|
'condition and we will retry in %d seconds. '
|
||||||
|
'Retries left %d.',
|
||||||
|
e, self._server_side_retries_delay,
|
||||||
|
server_side_retries_left)
|
||||||
|
time.sleep(self._server_side_retries_delay)
|
||||||
|
server_side_retries_left -= 1
|
||||||
|
return self._op(
|
||||||
|
method, path, data=data, headers=headers,
|
||||||
|
blocking=blocking, timeout=timeout,
|
||||||
|
server_side_retries_left=server_side_retries_left,
|
||||||
|
**extra_session_req_kwargs)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
if blocking and response.status_code == 202:
|
if blocking and response.status_code == 202:
|
||||||
if not response.headers.get('Location'):
|
if not response.headers.get('Location'):
|
||||||
m = ('HTTP response for %(method)s request to %(url)s '
|
m = ('HTTP response for %(method)s request to %(url)s '
|
||||||
|
|
|
@ -445,6 +445,43 @@ class ConnectorOpTestCase(base.TestCase):
|
||||||
self.assertEqual(0, mock_sleep.call_count)
|
self.assertEqual(0, mock_sleep.call_count)
|
||||||
self.assertEqual(1, self.request.call_count)
|
self.assertEqual(1, self.request.call_count)
|
||||||
|
|
||||||
|
@mock.patch('time.sleep', autospec=True)
|
||||||
|
def test_op_retry_on_server_400_ilo_not_ready(self, mock_sleep):
|
||||||
|
response_info = {"error": {"@Message.ExtendedInfo": [
|
||||||
|
{'MessageId': 'iLO.2.15.InvalidOperationForSystemState'}]}}
|
||||||
|
mock_error = mock.Mock()
|
||||||
|
mock_error.status_code = 400
|
||||||
|
mock_error.json.return_value = response_info
|
||||||
|
self.request.return_value.status_code = (
|
||||||
|
http_client.INTERNAL_SERVER_ERROR)
|
||||||
|
self.request.return_value.json.side_effect =\
|
||||||
|
exceptions.ServerSideError(
|
||||||
|
method='DELETE', url='http://foo.bar', response=mock_error)
|
||||||
|
|
||||||
|
self.assertRaises(exceptions.ServerSideError, self.conn._op, 'DELETE',
|
||||||
|
'http://foo.bar')
|
||||||
|
self.assertEqual(10, mock_sleep.call_count)
|
||||||
|
self.assertEqual(11, self.request.call_count)
|
||||||
|
|
||||||
|
@mock.patch('time.sleep', autospec=True)
|
||||||
|
def test_op_retry_on_server_400_ilo_not_ready_other_error(self,
|
||||||
|
mock_sleep):
|
||||||
|
response_info = {"error": {"@Message.ExtendedInfo": [
|
||||||
|
{'MessageId': 'iLO.Invalid'}]}}
|
||||||
|
mock_error = mock.Mock()
|
||||||
|
mock_error.status_code = 400
|
||||||
|
mock_error.json.return_value = response_info
|
||||||
|
self.request.return_value.status_code = (
|
||||||
|
http_client.INTERNAL_SERVER_ERROR)
|
||||||
|
self.request.return_value.json.side_effect =\
|
||||||
|
exceptions.ServerSideError(
|
||||||
|
method='DELETE', url='http://foo.bar', response=mock_error)
|
||||||
|
|
||||||
|
self.assertRaises(exceptions.ServerSideError, self.conn._op, 'DELETE',
|
||||||
|
'http://foo.bar')
|
||||||
|
self.assertEqual(0, mock_sleep.call_count)
|
||||||
|
self.assertEqual(1, self.request.call_count)
|
||||||
|
|
||||||
def test_access_error(self):
|
def test_access_error(self):
|
||||||
self.conn._auth = None
|
self.conn._auth = None
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue