Rework exception handling on deploy failures in conductor

Currently on unexpected python error we only log the error message, e.g.
"NoneType object is not iterable" or just KeyError missing key. This is
obviously not helping debugging too much.

This change splits error handling into separate handling of IronicException
and other exceptions.

For IronicException: only log the error message, remove redundant "Error: "
from the last_error message.

For other exceptions: log traceback, mention that the exception was not
expected.

Finally, move logging to the top of the error handling helper to make sure
it never gets lost.

Change-Id: Idc2339c3bdad8092907d9651d40f241a2ae50dbe
Related-Bug: #1741223
This commit is contained in:
Dmitry Tantsur 2018-01-04 11:10:18 +01:00
parent bf8cb05aa4
commit 2b5849b49e
2 changed files with 73 additions and 11 deletions

View File

@ -2851,11 +2851,11 @@ def do_node_deploy(task, conductor_id, configdrive=None):
"""Prepare the environment and deploy a node."""
node = task.node
def handle_failure(e, task, logmsg, errmsg):
def handle_failure(e, task, logmsg, errmsg, traceback=False):
args = {'node': task.node.uuid, 'err': e}
LOG.error(logmsg, args, exc_info=traceback)
# NOTE(deva): there is no need to clear conductor_affinity
task.process_event('fail')
args = {'node': task.node.uuid, 'err': e}
LOG.error(logmsg, args)
node.last_error = errmsg % e
try:
@ -2873,22 +2873,37 @@ def do_node_deploy(task, conductor_id, configdrive=None):
try:
task.driver.deploy.prepare(task)
except Exception as e:
except exception.IronicException as e:
with excutils.save_and_reraise_exception():
handle_failure(
e, task,
('Error while preparing to deploy to node %(node)s: '
'%(err)s'),
_("Failed to prepare to deploy. Error: %s"))
try:
new_state = task.driver.deploy.deploy(task)
_("Failed to prepare to deploy: %s"))
except Exception as e:
with excutils.save_and_reraise_exception():
handle_failure(
e, task,
('Unexpected error while preparing to deploy to node '
'%(node)s'),
_("Failed to prepare to deploy. Exception: %s"),
traceback=True)
try:
new_state = task.driver.deploy.deploy(task)
except exception.IronicException as e:
with excutils.save_and_reraise_exception():
handle_failure(
e, task,
'Error in deploy of node %(node)s: %(err)s',
_("Failed to deploy. Error: %s"))
_("Failed to deploy: %s"))
except Exception as e:
with excutils.save_and_reraise_exception():
handle_failure(
e, task,
'Unexpected error while deploying node %(node)s',
_("Failed to deploy. Exception: %s"),
traceback=True)
# Update conductor_affinity to reference this conductor's ID
# since there may be local persistent state

View File

@ -1431,7 +1431,7 @@ class DoNodeDeployTearDownTestCase(mgr_utils.ServiceSetUpMixin,
def test__do_node_deploy_driver_raises_prepare_error(self, mock_prepare,
mock_deploy):
self._start_service()
# test when driver.deploy.prepare raises an exception
# test when driver.deploy.prepare raises an ironic error
mock_prepare.side_effect = exception.InstanceDeployFailure('test')
node = obj_utils.create_test_node(self.context, driver='fake',
provision_state=states.DEPLOYING,
@ -1451,10 +1451,35 @@ class DoNodeDeployTearDownTestCase(mgr_utils.ServiceSetUpMixin,
self.assertTrue(mock_prepare.called)
self.assertFalse(mock_deploy.called)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy')
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare')
def test__do_node_deploy_unexpected_prepare_error(self, mock_prepare,
mock_deploy):
self._start_service()
# test when driver.deploy.prepare raises an exception
mock_prepare.side_effect = RuntimeError('test')
node = obj_utils.create_test_node(self.context, driver='fake',
provision_state=states.DEPLOYING,
target_provision_state=states.ACTIVE)
task = task_manager.TaskManager(self.context, node.uuid)
self.assertRaises(RuntimeError,
manager.do_node_deploy, task,
self.service.conductor.id)
node.refresh()
self.assertEqual(states.DEPLOYFAIL, node.provision_state)
# NOTE(deva): failing a deploy does not clear the target state
# any longer. Instead, it is cleared when the instance
# is deleted.
self.assertEqual(states.ACTIVE, node.target_provision_state)
self.assertIsNotNone(node.last_error)
self.assertTrue(mock_prepare.called)
self.assertFalse(mock_deploy.called)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy')
def test__do_node_deploy_driver_raises_error(self, mock_deploy):
self._start_service()
# test when driver.deploy.deploy raises an exception
# test when driver.deploy.deploy raises an ironic error
mock_deploy.side_effect = exception.InstanceDeployFailure('test')
node = obj_utils.create_test_node(self.context, driver='fake',
provision_state=states.DEPLOYING,
@ -1473,6 +1498,28 @@ class DoNodeDeployTearDownTestCase(mgr_utils.ServiceSetUpMixin,
self.assertIsNotNone(node.last_error)
mock_deploy.assert_called_once_with(mock.ANY)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy')
def test__do_node_deploy_driver_unexpected_exception(self, mock_deploy):
self._start_service()
# test when driver.deploy.deploy raises an exception
mock_deploy.side_effect = RuntimeError('test')
node = obj_utils.create_test_node(self.context, driver='fake',
provision_state=states.DEPLOYING,
target_provision_state=states.ACTIVE)
task = task_manager.TaskManager(self.context, node.uuid)
self.assertRaises(RuntimeError,
manager.do_node_deploy, task,
self.service.conductor.id)
node.refresh()
self.assertEqual(states.DEPLOYFAIL, node.provision_state)
# NOTE(deva): failing a deploy does not clear the target state
# any longer. Instead, it is cleared when the instance
# is deleted.
self.assertEqual(states.ACTIVE, node.target_provision_state)
self.assertIsNotNone(node.last_error)
mock_deploy.assert_called_once_with(mock.ANY)
@mock.patch.object(manager, '_store_configdrive')
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy')
def test__do_node_deploy_ok(self, mock_deploy, mock_store):