diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index fae7d56ff4..c6221a1e60 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -50,6 +50,7 @@ import eventlet from futurist import periodics from futurist import waiters from ironic_lib import metrics_utils +from oslo_db import exception as db_exception from oslo_log import log import oslo_messaging as messaging from oslo_utils import excutils @@ -2810,6 +2811,7 @@ def _store_configdrive(node, configdrive): i_info = node.instance_info i_info['configdrive'] = configdrive node.instance_info = i_info + node.save() @METRICS.timer('do_node_deploy') @@ -2829,7 +2831,7 @@ def do_node_deploy(task, conductor_id, configdrive=None): try: if configdrive: _store_configdrive(node, configdrive) - except exception.SwiftOperationError as e: + except (exception.SwiftOperationError, exception.ConfigInvalid) as e: with excutils.save_and_reraise_exception(): handle_failure( e, task, @@ -2837,6 +2839,25 @@ def do_node_deploy(task, conductor_id, configdrive=None): '%(node)s to Swift'), _('Failed to upload the configdrive to Swift. ' 'Error: %s')) + except db_exception.DBDataError as e: + with excutils.save_and_reraise_exception(): + # NOTE(hshiina): This error happens when the configdrive is + # too large. Remove the configdrive from the + # object to update DB successfully in handling + # the failure. + node.obj_reset_changes() + handle_failure( + e, task, + ('Error while storing the configdrive for %(node)s into ' + 'the database: %(err)s'), + _("Failed to store the configdrive in the database. : %s")) + except Exception as e: + with excutils.save_and_reraise_exception(): + handle_failure( + e, task, + ('Unexpected error while preparing the configdrive for ' + 'node %(node)s'), + _("Failed to prepare the configdrive. Exception: %s")) try: task.driver.deploy.prepare(task) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index b743d42c69..619451efdb 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -24,6 +24,7 @@ import datetime import eventlet import mock from oslo_config import cfg +from oslo_db import exception as db_exception import oslo_messaging as messaging from oslo_utils import uuidutils from oslo_versionedobjects import base as ovo_base @@ -48,7 +49,6 @@ from ironic.drivers.modules.network import flat as n_flat from ironic import objects from ironic.objects import base as obj_base from ironic.objects import fields as obj_fields -from ironic.tests import base as tests_base from ironic.tests.unit.conductor import mgr_utils from ironic.tests.unit.db import base as db_base from ironic.tests.unit.db import utils as db_utils @@ -1539,6 +1539,59 @@ class DoNodeDeployTearDownTestCase(mgr_utils.ServiceSetUpMixin, self.assertIsNotNone(node.last_error) self.assertFalse(mock_deploy.called) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy') + def test__do_node_deploy_configdrive_db_error(self, mock_deploy): + self._start_service() + node = obj_utils.create_test_node(self.context, driver='fake', + provision_state=states.DEPLOYING, + target_provision_state=states.ACTIVE) + task = task_manager.TaskManager(self.context, node.uuid) + task.node.save() + expected_instance_info = dict(node.instance_info) + with mock.patch.object(dbapi.IMPL, 'update_node') as mock_db: + db_node = self.dbapi.get_node_by_uuid(node.uuid) + mock_db.side_effect = [db_exception.DBDataError('DB error'), + db_node, db_node] + self.assertRaises(db_exception.DBDataError, + manager.do_node_deploy, task, + self.service.conductor.id, + configdrive=b'fake config drive') + expected_instance_info.update(configdrive=b'fake config drive') + expected_calls = [ + mock.call(node.uuid, + {'version': mock.ANY, + 'instance_info': expected_instance_info}), + mock.call(node.uuid, + {'version': mock.ANY, + 'provision_state': states.DEPLOYFAIL, + 'target_provision_state': states.ACTIVE}), + mock.call(node.uuid, + {'version': mock.ANY, + 'last_error': mock.ANY})] + self.assertEqual(expected_calls, mock_db.mock_calls) + self.assertFalse(mock_deploy.called) + + @mock.patch.object(manager, '_store_configdrive') + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy') + def test__do_node_deploy_configdrive_unexpected_error(self, mock_deploy, + mock_store): + self._start_service() + node = obj_utils.create_test_node(self.context, driver='fake', + provision_state=states.DEPLOYING, + target_provision_state=states.ACTIVE) + task = task_manager.TaskManager(self.context, node.uuid) + + mock_store.side_effect = RuntimeError('unexpected') + self.assertRaises(RuntimeError, + manager.do_node_deploy, task, + self.service.conductor.id, + configdrive=b'fake config drive') + node.refresh() + self.assertEqual(states.DEPLOYFAIL, node.provision_state) + self.assertEqual(states.ACTIVE, node.target_provision_state) + self.assertIsNotNone(node.last_error) + self.assertFalse(mock_deploy.called) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy') def test__do_node_deploy_ok_2(self, mock_deploy): # NOTE(rloo): a different way of testing for the same thing as in @@ -5284,16 +5337,17 @@ class ManagerSyncLocalStateTestCase(mgr_utils.CommonMixIn, db_base.DbTestCase): @mock.patch.object(swift, 'SwiftAPI') -class StoreConfigDriveTestCase(tests_base.TestCase): +class StoreConfigDriveTestCase(db_base.DbTestCase): def setUp(self): super(StoreConfigDriveTestCase, self).setUp() - self.node = obj_utils.get_test_node(self.context, driver='fake', - instance_info=None) + self.node = obj_utils.create_test_node(self.context, driver='fake', + instance_info=None) def test_store_configdrive(self, mock_swift): manager._store_configdrive(self.node, 'foo') expected_instance_info = {'configdrive': 'foo'} + self.node.refresh() self.assertEqual(expected_instance_info, self.node.instance_info) self.assertFalse(mock_swift.called) @@ -5321,6 +5375,7 @@ class StoreConfigDriveTestCase(tests_base.TestCase): object_headers=expected_obj_header) mock_swift.return_value.get_temp_url.assert_called_once_with( container_name, expected_obj_name, timeout) + self.node.refresh() self.assertEqual(expected_instance_info, self.node.instance_info) diff --git a/releasenotes/notes/bug-1745630-d28c8de54cebd329.yaml b/releasenotes/notes/bug-1745630-d28c8de54cebd329.yaml new file mode 100644 index 0000000000..5fafb4be52 --- /dev/null +++ b/releasenotes/notes/bug-1745630-d28c8de54cebd329.yaml @@ -0,0 +1,8 @@ +--- +fixes: + - | + Fixes a bug to get a node stuck in ``deploying`` state when the size of + the configdrive exceeds the limitation of the database. In MySQL, the + limitation is about 64KiB. With this fix, the provision state gets + ``deploy failed`` in this case. See `bug 1745630 + `_ for details.