Merge "Retry on ilo state error"

This commit is contained in:
Zuul 2023-05-04 08:49:21 +00:00 committed by Gerrit Code Review
commit f03d77ebcb
3 changed files with 77 additions and 7 deletions

View File

@ -0,0 +1,10 @@
---
fixes:
- |
An issue was encountered on some HPE iLO supported machines where the
Baseboard Management Controller would respond with a HTTP 400 error
and an error message indicating the requested operation was invalid
for the then system state. For example, attempting to change the power
state via the BMC shortly after previously changing the power state.
We now attempt to retry within the permitted number of retries when
this error is encountered.

View File

@ -94,12 +94,17 @@ class Connector(object):
def check_retry_on_exception(self, exception_msg):
"""Checks whether retry on exception is required."""
if ('SYS518' in str(exception_msg)):
retry = False
exc_str = str(exception_msg)
if 'SYS518' in exc_str:
LOG.debug('iDRAC is not yet ready after previous operation. '
'Error: %(err)s', {'err': str(exception_msg)})
return True
else:
return False
'Error: %(err)s', {'err': exc_str})
retry = True
elif 'iLO.2.15.InvalidOperationForSystemState' in exc_str:
LOG.debug('iLO is not ready after previous operation. '
'Error: %(error)s', {'err': exc_str})
retry = True
return retry
def _op(self, method, path='', data=None, headers=None, blocking=False,
timeout=60, server_side_retries_left=None,
@ -226,7 +231,7 @@ class Connector(object):
or self.check_retry_on_exception(e.message))
and server_side_retries_left > 0):
LOG.warning('Got server side error %s in response to a '
'GET request, retrying after %d seconds. Retries '
'request, retrying after %d seconds. Retries '
'left %d.',
e, self._server_side_retries_delay,
server_side_retries_left)
@ -239,7 +244,25 @@ class Connector(object):
**extra_session_req_kwargs)
else:
raise
except exceptions.BadRequestError as e:
if (method.lower() != 'get'
and self.check_retry_on_exception(e.message)
and server_side_retries_left > 0):
LOG.warning('Server has indicated a BadRequest for %s but '
'the response payload is a known retriable '
'condition and we will retry in %d seconds. '
'Retries left %d.',
e, self._server_side_retries_delay,
server_side_retries_left)
time.sleep(self._server_side_retries_delay)
server_side_retries_left -= 1
return self._op(
method, path, data=data, headers=headers,
blocking=blocking, timeout=timeout,
server_side_retries_left=server_side_retries_left,
**extra_session_req_kwargs)
else:
raise
if blocking and response.status_code == 202:
if not response.headers.get('Location'):
m = ('HTTP response for %(method)s request to %(url)s '

View File

@ -445,6 +445,43 @@ class ConnectorOpTestCase(base.TestCase):
self.assertEqual(0, mock_sleep.call_count)
self.assertEqual(1, self.request.call_count)
@mock.patch('time.sleep', autospec=True)
def test_op_retry_on_server_400_ilo_not_ready(self, mock_sleep):
response_info = {"error": {"@Message.ExtendedInfo": [
{'MessageId': 'iLO.2.15.InvalidOperationForSystemState'}]}}
mock_error = mock.Mock()
mock_error.status_code = 400
mock_error.json.return_value = response_info
self.request.return_value.status_code = (
http_client.INTERNAL_SERVER_ERROR)
self.request.return_value.json.side_effect =\
exceptions.ServerSideError(
method='DELETE', url='http://foo.bar', response=mock_error)
self.assertRaises(exceptions.ServerSideError, self.conn._op, 'DELETE',
'http://foo.bar')
self.assertEqual(10, mock_sleep.call_count)
self.assertEqual(11, self.request.call_count)
@mock.patch('time.sleep', autospec=True)
def test_op_retry_on_server_400_ilo_not_ready_other_error(self,
mock_sleep):
response_info = {"error": {"@Message.ExtendedInfo": [
{'MessageId': 'iLO.Invalid'}]}}
mock_error = mock.Mock()
mock_error.status_code = 400
mock_error.json.return_value = response_info
self.request.return_value.status_code = (
http_client.INTERNAL_SERVER_ERROR)
self.request.return_value.json.side_effect =\
exceptions.ServerSideError(
method='DELETE', url='http://foo.bar', response=mock_error)
self.assertRaises(exceptions.ServerSideError, self.conn._op, 'DELETE',
'http://foo.bar')
self.assertEqual(0, mock_sleep.call_count)
self.assertEqual(1, self.request.call_count)
def test_access_error(self):
self.conn._auth = None