Add retry logic mechanism
Added retry logic mechanism and added them to the API calls where ironic presently attempts to pull an exclusive lock on the baremetal node. Change-Id: Ia6fbc9eec612793b3214d7883e0552913a088d5d
This commit is contained in:
parent
faaf5f024b
commit
44bafb8cda
|
@ -523,6 +523,60 @@ def safe_dict_max(key, data):
|
|||
return max_value
|
||||
|
||||
|
||||
def _call_client_and_retry(client, url, retry_on=None,
|
||||
call_retries=3, retry_wait=2,
|
||||
**kwargs):
|
||||
"""Method to provide retry operations.
|
||||
|
||||
Some APIs utilize HTTP errors on certian operations to indicate that
|
||||
the resource is presently locked, and as such this mechanism provides
|
||||
the ability to retry upon known error codes.
|
||||
|
||||
:param object client: The client method, such as:
|
||||
``self.baremetal_client.post``
|
||||
:param string url: The URL to perform the operation upon.
|
||||
:param integer retry_on: A list of error codes that can be retried on.
|
||||
The method also supports a single integer to be
|
||||
defined.
|
||||
:param integer call_retries: The number of times to retry the call upon
|
||||
the error code defined by the 'retry_on'
|
||||
parameter. Default: 3
|
||||
:param integer retry_wait: The time in seconds to wait between retry
|
||||
attempts. Default: 2
|
||||
|
||||
:returns: The object returned by the client call.
|
||||
"""
|
||||
|
||||
# NOTE(TheJulia): This method, as of this note, does not have direct
|
||||
# unit tests, although is fairly well tested by the tests checking
|
||||
# retry logic in test_baremetal_node.py.
|
||||
log = _log.setup_logging('shade.http')
|
||||
|
||||
if isinstance(retry_on, int):
|
||||
retry_on = [retry_on]
|
||||
|
||||
count = 0
|
||||
while (count < call_retries):
|
||||
count += 1
|
||||
try:
|
||||
ret_val = client(url, **kwargs)
|
||||
except exc.OpenStackCloudHTTPError as e:
|
||||
if (retry_on is not None and
|
||||
e.response.status_code in retry_on):
|
||||
log.debug('Received retryable error {err}, waiting '
|
||||
'{wait} seconds to retry', {
|
||||
'err': e.response.status_code,
|
||||
'wait': retry_wait
|
||||
})
|
||||
time.sleep(retry_wait)
|
||||
continue
|
||||
else:
|
||||
raise
|
||||
# Break out of the loop, since the loop should only continue
|
||||
# when we encounter a known connection error.
|
||||
return ret_val
|
||||
|
||||
|
||||
def parse_range(value):
|
||||
"""Parse a numerical range string.
|
||||
|
||||
|
|
|
@ -9166,7 +9166,9 @@ class OpenStackCloud(
|
|||
port = self._baremetal_client.get(port_url, microversion=1.6,
|
||||
error_message=port_msg)
|
||||
port_url = '/ports/{uuid}'.format(uuid=port['ports'][0]['uuid'])
|
||||
self._baremetal_client.delete(port_url, error_message=port_msg)
|
||||
_utils._call_client_and_retry(self._baremetal_client.delete,
|
||||
port_url, retry_on=[409, 503],
|
||||
error_message=port_msg)
|
||||
|
||||
with _utils.shade_exceptions(
|
||||
"Error unregistering machine {node_id} from the baremetal "
|
||||
|
@ -9178,10 +9180,11 @@ class OpenStackCloud(
|
|||
# microversions in future releases, as such, we explicitly set
|
||||
# the version to what we have been using with the client library..
|
||||
version = "1.6"
|
||||
msg = "Baremetal machine failed to be deleted."
|
||||
msg = "Baremetal machine failed to be deleted"
|
||||
url = '/nodes/{node_id}'.format(
|
||||
node_id=uuid)
|
||||
self._baremetal_client.delete(url,
|
||||
_utils._call_client_and_retry(self._baremetal_client.delete,
|
||||
url, retry_on=[409, 503],
|
||||
error_message=msg,
|
||||
microversion=version)
|
||||
|
||||
|
@ -9407,10 +9410,12 @@ class OpenStackCloud(
|
|||
if configdrive:
|
||||
payload['configdrive'] = configdrive
|
||||
|
||||
machine = self._baremetal_client.put(url,
|
||||
json=payload,
|
||||
error_message=msg,
|
||||
microversion=version)
|
||||
machine = _utils._call_client_and_retry(self._baremetal_client.put,
|
||||
url,
|
||||
retry_on=[409, 503],
|
||||
json=payload,
|
||||
error_message=msg,
|
||||
microversion=version)
|
||||
if wait:
|
||||
for count in _utils._iterate_timeout(
|
||||
timeout,
|
||||
|
@ -9511,10 +9516,12 @@ class OpenStackCloud(
|
|||
else:
|
||||
desired_state = 'power {state}'.format(state=state)
|
||||
payload = {'target': desired_state}
|
||||
self._baremetal_client.put(url,
|
||||
json=payload,
|
||||
error_message=msg,
|
||||
microversion="1.6")
|
||||
_utils._call_client_and_retry(self._baremetal_client.put,
|
||||
url,
|
||||
retry_on=[409, 503],
|
||||
json=payload,
|
||||
error_message=msg,
|
||||
microversion="1.6")
|
||||
return None
|
||||
|
||||
def set_machine_power_on(self, name_or_id):
|
||||
|
|
|
@ -553,6 +553,40 @@ class TestBaremetalNode(base.IronicTestCase):
|
|||
|
||||
self.assert_calls()
|
||||
|
||||
def test_set_machine_power_on_with_retires(self):
|
||||
# NOTE(TheJulia): This logic ends up testing power on/off and reboot
|
||||
# as they all utilize the same helper method.
|
||||
self.register_uris([
|
||||
dict(
|
||||
method='PUT',
|
||||
status_code=503,
|
||||
uri=self.get_mock_url(
|
||||
resource='nodes',
|
||||
append=[self.fake_baremetal_node['uuid'],
|
||||
'states', 'power']),
|
||||
validate=dict(json={'target': 'power on'})),
|
||||
dict(
|
||||
method='PUT',
|
||||
status_code=409,
|
||||
uri=self.get_mock_url(
|
||||
resource='nodes',
|
||||
append=[self.fake_baremetal_node['uuid'],
|
||||
'states', 'power']),
|
||||
validate=dict(json={'target': 'power on'})),
|
||||
dict(
|
||||
method='PUT',
|
||||
uri=self.get_mock_url(
|
||||
resource='nodes',
|
||||
append=[self.fake_baremetal_node['uuid'],
|
||||
'states', 'power']),
|
||||
validate=dict(json={'target': 'power on'})),
|
||||
])
|
||||
return_value = self.op_cloud.set_machine_power_on(
|
||||
self.fake_baremetal_node['uuid'])
|
||||
self.assertIsNone(return_value)
|
||||
|
||||
self.assert_calls()
|
||||
|
||||
def test_set_machine_power_off(self):
|
||||
self.register_uris([
|
||||
dict(
|
||||
|
@ -630,6 +664,51 @@ class TestBaremetalNode(base.IronicTestCase):
|
|||
|
||||
self.assert_calls()
|
||||
|
||||
def test_node_set_provision_state_with_retries(self):
|
||||
deploy_node = self.fake_baremetal_node.copy()
|
||||
deploy_node['provision_state'] = 'deploying'
|
||||
active_node = self.fake_baremetal_node.copy()
|
||||
active_node['provision_state'] = 'active'
|
||||
self.register_uris([
|
||||
dict(
|
||||
method='PUT',
|
||||
status_code=409,
|
||||
uri=self.get_mock_url(
|
||||
resource='nodes',
|
||||
append=[self.fake_baremetal_node['uuid'],
|
||||
'states', 'provision']),
|
||||
validate=dict(json={'target': 'active',
|
||||
'configdrive': 'http://host/file'})),
|
||||
dict(
|
||||
method='PUT',
|
||||
status_code=503,
|
||||
uri=self.get_mock_url(
|
||||
resource='nodes',
|
||||
append=[self.fake_baremetal_node['uuid'],
|
||||
'states', 'provision']),
|
||||
validate=dict(json={'target': 'active',
|
||||
'configdrive': 'http://host/file'})),
|
||||
dict(
|
||||
method='PUT',
|
||||
uri=self.get_mock_url(
|
||||
resource='nodes',
|
||||
append=[self.fake_baremetal_node['uuid'],
|
||||
'states', 'provision']),
|
||||
validate=dict(json={'target': 'active',
|
||||
'configdrive': 'http://host/file'})),
|
||||
dict(method='GET',
|
||||
uri=self.get_mock_url(
|
||||
resource='nodes',
|
||||
append=[self.fake_baremetal_node['uuid']]),
|
||||
json=self.fake_baremetal_node),
|
||||
])
|
||||
self.op_cloud.node_set_provision_state(
|
||||
self.fake_baremetal_node['uuid'],
|
||||
'active',
|
||||
configdrive='http://host/file')
|
||||
|
||||
self.assert_calls()
|
||||
|
||||
def test_node_set_provision_state_wait_timeout(self):
|
||||
deploy_node = self.fake_baremetal_node.copy()
|
||||
deploy_node['provision_state'] = 'deploying'
|
||||
|
@ -1408,6 +1487,64 @@ class TestBaremetalNode(base.IronicTestCase):
|
|||
timeout=0.001)
|
||||
self.assert_calls()
|
||||
|
||||
def test_unregister_machine_retries(self):
|
||||
mac_address = self.fake_baremetal_port['address']
|
||||
nics = [{'mac': mac_address}]
|
||||
port_uuid = self.fake_baremetal_port['uuid']
|
||||
# NOTE(TheJulia): The two values below should be the same.
|
||||
port_node_uuid = self.fake_baremetal_port['node_uuid']
|
||||
port_url_address = 'detail?address=%s' % mac_address
|
||||
self.fake_baremetal_node['provision_state'] = 'available'
|
||||
self.register_uris([
|
||||
dict(
|
||||
method='GET',
|
||||
uri=self.get_mock_url(
|
||||
resource='nodes',
|
||||
append=[self.fake_baremetal_node['uuid']]),
|
||||
json=self.fake_baremetal_node),
|
||||
dict(
|
||||
method='GET',
|
||||
uri=self.get_mock_url(
|
||||
resource='ports',
|
||||
append=[port_url_address]),
|
||||
json={'ports': [{'address': mac_address,
|
||||
'node_uuid': port_node_uuid,
|
||||
'uuid': port_uuid}]}),
|
||||
dict(
|
||||
method='DELETE',
|
||||
status_code=503,
|
||||
uri=self.get_mock_url(
|
||||
resource='ports',
|
||||
append=[self.fake_baremetal_port['uuid']])),
|
||||
dict(
|
||||
method='DELETE',
|
||||
status_code=409,
|
||||
uri=self.get_mock_url(
|
||||
resource='ports',
|
||||
append=[self.fake_baremetal_port['uuid']])),
|
||||
dict(
|
||||
method='DELETE',
|
||||
uri=self.get_mock_url(
|
||||
resource='ports',
|
||||
append=[self.fake_baremetal_port['uuid']])),
|
||||
dict(
|
||||
method='DELETE',
|
||||
status_code=409,
|
||||
uri=self.get_mock_url(
|
||||
resource='nodes',
|
||||
append=[self.fake_baremetal_node['uuid']])),
|
||||
dict(
|
||||
method='DELETE',
|
||||
uri=self.get_mock_url(
|
||||
resource='nodes',
|
||||
append=[self.fake_baremetal_node['uuid']])),
|
||||
])
|
||||
|
||||
self.op_cloud.unregister_machine(nics,
|
||||
self.fake_baremetal_node['uuid'])
|
||||
|
||||
self.assert_calls()
|
||||
|
||||
def test_unregister_machine_unavailable(self):
|
||||
# This is a list of invalid states that the method
|
||||
# should fail on.
|
||||
|
|
Loading…
Reference in New Issue