diff --git a/releasenotes/notes/retry-ilo-not-ready-error-0b4dce882282eaac.yaml b/releasenotes/notes/retry-ilo-not-ready-error-0b4dce882282eaac.yaml new file mode 100644 index 00000000..2d647a83 --- /dev/null +++ b/releasenotes/notes/retry-ilo-not-ready-error-0b4dce882282eaac.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + An issue was encountered on some HPE iLO supported machines where the + Baseboard Management Controller would respond with a HTTP 400 error + and an error message indicating the requested operation was invalid + for the then system state. For example, attempting to change the power + state via the BMC shortly after previously changing the power state. + We now attempt to retry within the permitted number of retries when + this error is encountered. diff --git a/sushy/connector.py b/sushy/connector.py index 017b856a..60d5a7b4 100644 --- a/sushy/connector.py +++ b/sushy/connector.py @@ -94,12 +94,17 @@ class Connector(object): def check_retry_on_exception(self, exception_msg): """Checks whether retry on exception is required.""" - if ('SYS518' in str(exception_msg)): + retry = False + exc_str = str(exception_msg) + if 'SYS518' in exc_str: LOG.debug('iDRAC is not yet ready after previous operation. ' - 'Error: %(err)s', {'err': str(exception_msg)}) - return True - else: - return False + 'Error: %(err)s', {'err': exc_str}) + retry = True + elif 'iLO.2.15.InvalidOperationForSystemState' in exc_str: + LOG.debug('iLO is not ready after previous operation. ' + 'Error: %(error)s', {'err': exc_str}) + retry = True + return retry def _op(self, method, path='', data=None, headers=None, blocking=False, timeout=60, server_side_retries_left=None, @@ -226,7 +231,7 @@ class Connector(object): or self.check_retry_on_exception(e.message)) and server_side_retries_left > 0): LOG.warning('Got server side error %s in response to a ' - 'GET request, retrying after %d seconds. Retries ' + 'request, retrying after %d seconds. Retries ' 'left %d.', e, self._server_side_retries_delay, server_side_retries_left) @@ -239,7 +244,25 @@ class Connector(object): **extra_session_req_kwargs) else: raise - + except exceptions.BadRequestError as e: + if (method.lower() != 'get' + and self.check_retry_on_exception(e.message) + and server_side_retries_left > 0): + LOG.warning('Server has indicated a BadRequest for %s but ' + 'the response payload is a known retriable ' + 'condition and we will retry in %d seconds. ' + 'Retries left %d.', + e, self._server_side_retries_delay, + server_side_retries_left) + time.sleep(self._server_side_retries_delay) + server_side_retries_left -= 1 + return self._op( + method, path, data=data, headers=headers, + blocking=blocking, timeout=timeout, + server_side_retries_left=server_side_retries_left, + **extra_session_req_kwargs) + else: + raise if blocking and response.status_code == 202: if not response.headers.get('Location'): m = ('HTTP response for %(method)s request to %(url)s ' diff --git a/sushy/tests/unit/test_connector.py b/sushy/tests/unit/test_connector.py index 32cd2180..4f0950a6 100644 --- a/sushy/tests/unit/test_connector.py +++ b/sushy/tests/unit/test_connector.py @@ -445,6 +445,43 @@ class ConnectorOpTestCase(base.TestCase): self.assertEqual(0, mock_sleep.call_count) self.assertEqual(1, self.request.call_count) + @mock.patch('time.sleep', autospec=True) + def test_op_retry_on_server_400_ilo_not_ready(self, mock_sleep): + response_info = {"error": {"@Message.ExtendedInfo": [ + {'MessageId': 'iLO.2.15.InvalidOperationForSystemState'}]}} + mock_error = mock.Mock() + mock_error.status_code = 400 + mock_error.json.return_value = response_info + self.request.return_value.status_code = ( + http_client.INTERNAL_SERVER_ERROR) + self.request.return_value.json.side_effect =\ + exceptions.ServerSideError( + method='DELETE', url='http://foo.bar', response=mock_error) + + self.assertRaises(exceptions.ServerSideError, self.conn._op, 'DELETE', + 'http://foo.bar') + self.assertEqual(10, mock_sleep.call_count) + self.assertEqual(11, self.request.call_count) + + @mock.patch('time.sleep', autospec=True) + def test_op_retry_on_server_400_ilo_not_ready_other_error(self, + mock_sleep): + response_info = {"error": {"@Message.ExtendedInfo": [ + {'MessageId': 'iLO.Invalid'}]}} + mock_error = mock.Mock() + mock_error.status_code = 400 + mock_error.json.return_value = response_info + self.request.return_value.status_code = ( + http_client.INTERNAL_SERVER_ERROR) + self.request.return_value.json.side_effect =\ + exceptions.ServerSideError( + method='DELETE', url='http://foo.bar', response=mock_error) + + self.assertRaises(exceptions.ServerSideError, self.conn._op, 'DELETE', + 'http://foo.bar') + self.assertEqual(0, mock_sleep.call_count) + self.assertEqual(1, self.request.call_count) + def test_access_error(self): self.conn._auth = None