Separate wait_for_provision_state method

Change-Id: I27d976162c503ff494d5786649e273a6385207d2
This commit is contained in:
Dmitry Tantsur 2018-05-28 16:25:31 +02:00
parent 703bddb775
commit 6030b02bb3
4 changed files with 232 additions and 39 deletions

View File

@ -25,7 +25,8 @@ from metalsmith import _utils
LOG = logging.getLogger(__name__)
NODE_FIELDS = ['name', 'uuid', 'instance_info', 'instance_uuid', 'maintenance',
'maintenance_reason', 'properties', 'provision_state', 'extra']
'maintenance_reason', 'properties', 'provision_state', 'extra',
'last_error']
HOSTNAME_FIELD = 'metalsmith_hostname'
@ -195,11 +196,6 @@ class API(object):
if not result['result']:
raise RuntimeError('%s: %s' % (iface, result['reason']))
def wait_for_node_state(self, node, state, timeout):
self.ironic.node.wait_for_provision_state(_node_id(node),
state,
timeout=timeout)
def _node_id(node):
if isinstance(node, six.string_types):

View File

@ -16,6 +16,7 @@
import logging
import random
import sys
import time
import six
@ -237,9 +238,8 @@ class Provisioner(object):
try:
LOG.error('Deploy attempt failed on node %s, cleaning up',
_utils.log_node(node))
self._delete_ports(node, created_ports, attached_ports)
LOG.debug('Releasing lock on node %s', _utils.log_node(node))
self._api.release_node(node)
self._clean_up(node, created_ports=created_ports,
attached_ports=attached_ports)
except Exception:
LOG.exception('Clean up failed')
@ -251,11 +251,12 @@ class Provisioner(object):
LOG.debug('Waiting for node %(node)s to reach state active '
'with timeout %(timeout)s',
{'node': _utils.log_node(node), 'timeout': wait})
self._api.wait_for_node_state(node, 'active', timeout=wait)
node = self.wait_for_provisioning([node], timeout=wait)[0]
LOG.info('Deploy succeeded on node %s', _utils.log_node(node))
else:
# Update the node to return it's latest state
node = self._api.get_node(node, refresh=True)
# Update the node to return it's latest state
node = self._api.get_node(node, refresh=True)
return _instance.Instance(self._api, node)
def _get_nics(self, nics):
@ -346,6 +347,87 @@ class Provisioner(object):
LOG.debug('Failed to clear node %(node)s extra: %(exc)s',
{'node': _utils.log_node(node), 'exc': exc})
def wait_for_provisioning(self, nodes, timeout=None, delay=15):
"""Wait for nodes to be provisioned.
Loops until all nodes finish provisioning.
:param nodes: List of nodes (UUID, name, `Node` object or
:py:class:`metalsmith.Instance`).
:param timeout: How much time (in seconds) to wait for all nodes
to finish provisioning. If ``None`` (the default), wait forever
(more precisely, until the operation times out on server side).
:param delay: Delay (in seconds) between two provision state checks.
:return: List of updated nodes if all succeeded.
:raises: :py:class:`metalsmith.exceptions.DeploymentFailure`
if the deployment failed or timed out for any nodes.
"""
return self._wait_for_state(nodes, 'active',
timeout=timeout, delay=delay)
def _wait_for_state(self, nodes, state, timeout, delay=15):
if timeout is not None and timeout <= 0:
raise ValueError("The timeout argument must be a positive int")
if delay < 0:
raise ValueError("The delay argument must be a non-negative int")
failed_nodes = []
finished_nodes = []
deadline = time.time() + timeout if timeout is not None else None
while timeout is None or time.time() < deadline:
remaining_nodes = []
for node in nodes:
node = self._api.get_node(node, refresh=True,
accept_hostname=True)
if node.provision_state == state:
LOG.debug('Node %(node)s reached state %(state)s',
{'node': _utils.log_node(node), 'state': state})
finished_nodes.append(node)
elif (node.provision_state == 'error' or
node.provision_state.endswith(' failed')):
LOG.error('Node %(node)s failed deployment: %(error)s',
{'node': _utils.log_node(node),
'error': node.last_error})
failed_nodes.append(node)
else:
remaining_nodes.append(node)
if remaining_nodes:
nodes = remaining_nodes
else:
nodes = []
break
LOG.debug('Still waiting for the following nodes to reach state '
'%(state)s: %(nodes)s',
{'state': state,
'nodes': ', '.join(_utils.log_node(n) for n in nodes)})
time.sleep(delay)
messages = []
if failed_nodes:
messages.append('the following nodes failed deployment: %s' %
', '.join('%s (%s)' % (_utils.log_node(node),
node.last_error)
for node in failed_nodes))
if nodes:
messages.append('deployment timed out for nodes %s' %
', '.join(_utils.log_node(node) for node in nodes))
if messages:
raise exceptions.DeploymentFailure(
'Deployment failed: %s' % '; '.join(messages),
failed_nodes + nodes)
else:
LOG.debug('All nodes reached state %s', state)
return finished_nodes
def _clean_up(self, node, created_ports=None, attached_ports=None):
self._delete_ports(node, created_ports, attached_ports)
LOG.debug('Releasing lock on node %s', _utils.log_node(node))
self._api.release_node(node)
def unprovision_node(self, node, wait=None):
"""Unprovision a previously provisioned node.
@ -360,15 +442,13 @@ class Provisioner(object):
LOG.warning("Dry run, not unprovisioning")
return
self._delete_ports(node)
LOG.debug('Releasing lock on node %s', _utils.log_node(node))
self._api.release_node(node)
self._clean_up(node)
self._api.node_action(node, 'deleted')
LOG.info('Deleting started for node %s', _utils.log_node(node))
if wait is not None:
self._api.wait_for_node_state(node, 'available', timeout=wait)
self._wait_for_state([node], 'available', timeout=wait)
LOG.info('Node %s undeployed successfully', _utils.log_node(node))
return self._api.get_node(node, refresh=True)

View File

@ -71,3 +71,14 @@ class UnknownRootDiskSize(Error):
class InvalidNode(Error):
"""This node cannot be deployed onto."""
class DeploymentFailure(Error):
"""One or more nodes have failed the deployment.
:ivar nodes: List of failed nodes.
"""
def __init__(self, message, nodes):
self.nodes = nodes
super(DeploymentFailure, self).__init__(message)

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import fixtures
import mock
import testtools
@ -114,6 +115,10 @@ class TestProvisionNode(Base):
],
'/instance_info/%s' % _os_api.HOSTNAME_FIELD: 'control-0'
}
self.wait_fixture = self.useFixture(
fixtures.MockPatchObject(_provisioner.Provisioner,
'_wait_for_state', autospec=True))
self.wait_mock = self.wait_fixture.mock
def test_ok(self):
inst = self.pr.provision_node(self.node, 'image',
@ -131,7 +136,7 @@ class TestProvisionNode(Base):
validate_deploy=True)
self.api.node_action.assert_called_once_with(self.node, 'active',
configdrive=mock.ANY)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.assertFalse(self.api.release_node.called)
self.assertFalse(self.api.delete_port.called)
@ -155,7 +160,7 @@ class TestProvisionNode(Base):
validate_deploy=True)
self.api.node_action.assert_called_once_with(self.node, 'active',
configdrive=mock.ANY)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.assertFalse(self.api.release_node.called)
self.assertFalse(self.api.delete_port.called)
@ -178,7 +183,7 @@ class TestProvisionNode(Base):
validate_deploy=True)
self.api.node_action.assert_called_once_with(self.node, 'active',
configdrive=mock.ANY)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.assertFalse(self.api.release_node.called)
self.assertFalse(self.api.delete_port.called)
@ -200,7 +205,7 @@ class TestProvisionNode(Base):
validate_deploy=True)
self.api.node_action.assert_called_once_with(self.node, 'active',
configdrive=mock.ANY)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.assertFalse(self.api.release_node.called)
self.assertFalse(self.api.delete_port.called)
@ -220,7 +225,7 @@ class TestProvisionNode(Base):
validate_deploy=True)
self.api.node_action.assert_called_once_with(self.node, 'active',
configdrive=mock.ANY)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.assertFalse(self.api.release_node.called)
self.assertFalse(self.api.delete_port.called)
@ -244,7 +249,7 @@ class TestProvisionNode(Base):
validate_deploy=True)
self.api.node_action.assert_called_once_with(self.node, 'active',
configdrive=mock.ANY)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.assertFalse(self.api.release_node.called)
self.assertFalse(self.api.delete_port.called)
@ -266,7 +271,7 @@ class TestProvisionNode(Base):
validate_deploy=True)
self.api.node_action.assert_called_once_with(self.node, 'active',
configdrive=mock.ANY)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.assertFalse(self.api.release_node.called)
self.assertFalse(self.api.delete_port.called)
@ -285,7 +290,7 @@ class TestProvisionNode(Base):
validate_deploy=True)
self.api.node_action.assert_called_once_with(self.node, 'active',
configdrive=mock.ANY)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.assertFalse(self.api.release_node.called)
self.assertFalse(self.api.delete_port.called)
@ -306,9 +311,11 @@ class TestProvisionNode(Base):
validate_deploy=True)
self.api.node_action.assert_called_once_with(self.node, 'active',
configdrive=mock.ANY)
self.api.wait_for_node_state.assert_called_once_with(self.node,
'active',
timeout=3600)
self.wait_mock.assert_called_once_with(self.pr,
[self.node],
'active',
delay=15,
timeout=3600)
self.assertFalse(self.api.release_node.called)
self.assertFalse(self.api.delete_port.called)
@ -320,7 +327,7 @@ class TestProvisionNode(Base):
self.assertFalse(self.api.attach_port_to_node.called)
self.assertFalse(self.api.update_node.called)
self.assertFalse(self.api.node_action.called)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.assertFalse(self.api.release_node.called)
self.assertFalse(self.api.delete_port.called)
@ -335,7 +342,7 @@ class TestProvisionNode(Base):
self.assertFalse(self.api.attach_port_to_node.called)
self.assertFalse(self.api.update_node.called)
self.assertFalse(self.api.node_action.called)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.assertFalse(self.api.release_node.called)
self.assertFalse(self.api.delete_port.called)
@ -347,7 +354,7 @@ class TestProvisionNode(Base):
wait=3600)
self.api.update_node.assert_any_call(self.node, CLEAN_UP)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.api.release_node.assert_called_once_with(self.node)
self.api.delete_port.assert_called_once_with(
self.api.create_port.return_value.id)
@ -395,7 +402,7 @@ class TestProvisionNode(Base):
'image', [{'network': 'network'}],
wait=3600)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.api.release_node.assert_called_once_with(self.node)
self.api.delete_port.assert_called_once_with(
self.api.create_port.return_value.id)
@ -412,7 +419,7 @@ class TestProvisionNode(Base):
'image', [{'network': 'network'}],
wait=3600)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.api.release_node.assert_called_once_with(self.node)
self.api.delete_port.assert_called_once_with(
self.api.create_port.return_value.id)
@ -420,7 +427,7 @@ class TestProvisionNode(Base):
self.node, self.api.create_port.return_value.id)
def test_wait_failure(self):
self.api.wait_for_node_state.side_effect = RuntimeError('boom')
self.wait_mock.side_effect = RuntimeError('boom')
self.assertRaisesRegex(RuntimeError, 'boom',
self.pr.provision_node, self.node,
'image', [{'network': 'network'}], wait=3600)
@ -579,6 +586,13 @@ class TestProvisionNode(Base):
class TestUnprovisionNode(Base):
def setUp(self):
super(TestUnprovisionNode, self).setUp()
self.wait_fixture = self.useFixture(
fixtures.MockPatchObject(_provisioner.Provisioner,
'_wait_for_state', autospec=True))
self.wait_mock = self.wait_fixture.mock
def test_ok(self):
self.node.extra['metalsmith_created_ports'] = ['port1']
result = self.pr.unprovision_node(self.node)
@ -589,7 +603,7 @@ class TestUnprovisionNode(Base):
'port1')
self.api.node_action.assert_called_once_with(self.node, 'deleted')
self.api.release_node.assert_called_once_with(self.node)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.api.update_node.assert_called_once_with(self.node, CLEAN_UP)
def test_with_attached(self):
@ -602,7 +616,7 @@ class TestUnprovisionNode(Base):
self.api.detach_port_from_node.assert_has_calls(calls, any_order=True)
self.api.node_action.assert_called_once_with(self.node, 'deleted')
self.api.release_node.assert_called_once_with(self.node)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.api.update_node.assert_called_once_with(self.node, CLEAN_UP)
def test_with_wait(self):
@ -615,9 +629,10 @@ class TestUnprovisionNode(Base):
'port1')
self.api.node_action.assert_called_once_with(self.node, 'deleted')
self.api.release_node.assert_called_once_with(self.node)
self.api.wait_for_node_state.assert_called_once_with(self.node,
'available',
timeout=3600)
self.wait_mock.assert_called_once_with(self.pr,
[self.node],
'available',
timeout=3600)
def test_dry_run(self):
self.pr._dry_run = True
@ -628,7 +643,7 @@ class TestUnprovisionNode(Base):
self.assertFalse(self.api.release_node.called)
self.assertFalse(self.api.delete_port.called)
self.assertFalse(self.api.detach_port_from_node.called)
self.assertFalse(self.api.wait_for_node_state.called)
self.assertFalse(self.wait_mock.called)
self.assertFalse(self.api.update_node.called)
@ -656,3 +671,94 @@ class TestShowInstance(Base):
self.assertIs(result[0].node, self.node)
self.assertIs(result[0].uuid, self.node.uuid)
self.api.cache_node_list_for_lookup.assert_called_once_with()
@mock.patch('time.sleep', autospec=True)
class TestWaitForState(Base):
def test_invalid_timeout(self, mock_sleep):
for invalid in (0, -42):
self.assertRaisesRegex(ValueError,
'timeout argument must be a positive',
self.pr.wait_for_provisioning,
['uuid1'], timeout=invalid)
def test_invalid_delay(self, mock_sleep):
self.assertRaisesRegex(ValueError,
'delay argument must be a non-negative',
self.pr.wait_for_provisioning,
['uuid1'], delay=-42)
def test_success_one_node(self, mock_sleep):
nodes = [
mock.Mock(spec=_os_api.NODE_FIELDS, provision_state=state)
for state in ('deploying', 'deploy wait', 'deploying', 'active')
]
self.api.get_node.side_effect = nodes
result = self.pr.wait_for_provisioning(['uuid1'])
self.assertEqual(nodes[-1:], result)
mock_sleep.assert_called_with(15)
self.assertEqual(3, mock_sleep.call_count)
def test_success_several_nodes(self, mock_sleep):
nodes = [
mock.Mock(spec=_os_api.NODE_FIELDS, provision_state=state)
for state in ('deploying', 'deploy wait', # iteration 1
'deploying', 'active', # iteration 2
'active') # iteration 3
]
self.api.get_node.side_effect = nodes
result = self.pr.wait_for_provisioning(['uuid1', 'uuid2'])
self.assertEqual(nodes[-2:], result)
mock_sleep.assert_called_with(15)
self.assertEqual(2, mock_sleep.call_count)
def test_one_node_failed(self, mock_sleep):
nodes = [
mock.Mock(spec=_os_api.NODE_FIELDS, provision_state=state)
for state in ('deploying', 'deploy wait', # iteration 1
'deploying', 'deploy failed', # iteration 2
'active') # iteration 3
]
self.api.get_node.side_effect = nodes
exc = self.assertRaises(exceptions.DeploymentFailure,
self.pr.wait_for_provisioning,
['uuid1', 'uuid2'])
# The exception contains the failed node
self.assertEqual(exc.nodes, [nodes[-2]])
mock_sleep.assert_called_with(15)
self.assertEqual(2, mock_sleep.call_count)
def test_timeout(self, mock_sleep):
def _fake_get(*args, **kwargs):
while True:
yield mock.Mock(spec=_os_api.NODE_FIELDS,
provision_state='deploying')
self.api.get_node.side_effect = _fake_get()
exc = self.assertRaises(exceptions.DeploymentFailure,
self.pr.wait_for_provisioning,
['uuid1', 'uuid2'],
timeout=0.001)
self.assertEqual(2, len(exc.nodes))
mock_sleep.assert_called_with(15)
def test_custom_delay(self, mock_sleep):
nodes = [
mock.Mock(spec=_os_api.NODE_FIELDS, provision_state=state)
for state in ('deploying', 'deploy wait', 'deploying', 'active')
]
self.api.get_node.side_effect = nodes
result = self.pr.wait_for_provisioning(['uuid1'], delay=1)
self.assertEqual(nodes[-1:], result)
mock_sleep.assert_called_with(1)
self.assertEqual(3, mock_sleep.call_count)