Collect ramdisk logs also during cleaning

Conflicts:
	ironic/conductor/cleaning.py
	ironic/drivers/modules/agent_base.py
	ironic/tests/unit/conductor/test_cleaning.py
	ironic/tests/unit/drivers/modules/test_agent_base_vendor.py

Change-Id: Ia60996b4198e6fcfba6094af26498869589e175e
(cherry picked from commit d31e71a736)
This commit is contained in:
Dmitry Tantsur 2020-05-14 14:52:16 +02:00
parent a7b6b6c08f
commit e277c4d3ae
7 changed files with 100 additions and 9 deletions

View File

@ -75,6 +75,7 @@ from ironic.conductor import task_manager
from ironic.conductor import utils
from ironic.conf import CONF
from ironic.drivers import base as drivers_base
from ironic.drivers import utils as driver_utils
from ironic import objects
from ironic.objects import base as objects_base
from ironic.objects import fields
@ -1458,6 +1459,7 @@ class ConductorManager(base_manager.BaseConductorManager):
{'node': node.uuid, 'exc': e,
'step': node.clean_step})
LOG.exception(msg)
driver_utils.collect_ramdisk_logs(task.node, label='cleaning')
utils.cleaning_error_handler(task, msg)
return
@ -1482,6 +1484,9 @@ class ConductorManager(base_manager.BaseConductorManager):
LOG.info('Node %(node)s finished clean step %(step)s',
{'node': node.uuid, 'step': step})
if CONF.agent.deploy_logs_collect == 'always':
driver_utils.collect_ramdisk_logs(node, label='cleaning')
# Clear clean_step
node.clean_step = None
driver_internal_info = node.driver_internal_info

View File

@ -585,6 +585,7 @@ class AgentDeployMixin(HeartbeatMixin):
'err': command.get('command_error'),
'step': node.clean_step})
LOG.error(msg)
driver_utils.collect_ramdisk_logs(task.node, label='cleaning')
return manager_utils.cleaning_error_handler(task, msg)
elif command.get('command_status') == 'CLEAN_VERSION_MISMATCH':
# Cache the new clean steps (and 'hardware_manager_version')
@ -649,6 +650,8 @@ class AgentDeployMixin(HeartbeatMixin):
'cls': e.__class__.__name__,
'step': node.clean_step})
LOG.exception(msg)
driver_utils.collect_ramdisk_logs(task.node,
label='cleaning')
return manager_utils.cleaning_error_handler(task, msg)
if task.node.clean_step.get('reboot_requested'):
@ -665,6 +668,7 @@ class AgentDeployMixin(HeartbeatMixin):
'err': command.get('command_status'),
'step': node.clean_step})
LOG.error(msg)
driver_utils.collect_ramdisk_logs(task.node, label='cleaning')
return manager_utils.cleaning_error_handler(task, msg)
@METRICS.timer('AgentDeployMixin.reboot_and_finish_deploy')

View File

@ -324,7 +324,7 @@ def store_ramdisk_logs(node, logs, label=None):
f.name, object_headers=object_headers)
def collect_ramdisk_logs(node):
def collect_ramdisk_logs(node, label=None):
"""Collect and store the system logs from the IPA ramdisk.
Collect and store the system logs from the IPA ramdisk. This method
@ -332,8 +332,11 @@ def collect_ramdisk_logs(node):
according to the configured storage backend.
:param node: A node object.
:param label: A string to label the log file such as a clean step name.
"""
if CONF.agent.deploy_logs_collect == 'never':
return
client = agent_client.AgentClient()
try:
result = client.collect_system_logs(node)
@ -351,7 +354,8 @@ def collect_ramdisk_logs(node):
return
try:
store_ramdisk_logs(node, result['command_result']['system_logs'])
store_ramdisk_logs(node, result['command_result']['system_logs'],
label=label)
except exception.SwiftOperationError as e:
LOG.error('Failed to store the logs from the node %(node)s '
'deployment in Swift. Error: %(error)s',

View File

@ -4275,12 +4275,14 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
def test__do_next_clean_step_manual_last_step_noop(self):
self._do_next_clean_step_last_step_noop(manual=True)
@mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step',
autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step',
autospec=True)
def _do_next_clean_step_all(self, mock_deploy_execute,
mock_power_execute, manual=False):
mock_power_execute, mock_collect_logs,
manual=False):
# Run all steps from start to finish (all synchronous)
tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE
@ -4322,6 +4324,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
mock_deploy_execute.assert_has_calls(
[mock.call(mock.ANY, mock.ANY, self.clean_steps[0]),
mock.call(mock.ANY, mock.ANY, self.clean_steps[2])])
self.assertFalse(mock_collect_logs.called)
def test_do_next_clean_step_automated_all(self):
self._do_next_clean_step_all()
@ -4329,11 +4332,64 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
def test_do_next_clean_step_manual_all(self):
self._do_next_clean_step_all(manual=True)
@mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step',
autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step',
autospec=True)
def test_do_next_clean_step_collect_logs(self, mock_deploy_execute,
mock_power_execute,
mock_collect_logs):
CONF.set_override('deploy_logs_collect', 'always', group='agent')
# Run all steps from start to finish (all synchronous)
tgt_prov_state = states.MANAGEABLE
self._start_service()
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.CLEANING,
target_provision_state=tgt_prov_state,
last_error=None,
driver_internal_info={'clean_steps': self.clean_steps,
'clean_step_index': None},
clean_step={})
def fake_deploy(conductor_obj, task, step):
driver_internal_info = task.node.driver_internal_info
driver_internal_info['goober'] = 'test'
task.node.driver_internal_info = driver_internal_info
task.node.save()
mock_deploy_execute.side_effect = fake_deploy
mock_power_execute.return_value = None
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
self.service._do_next_clean_step(task, 0)
self._stop_service()
node.refresh()
# Cleaning should be complete
self.assertEqual(tgt_prov_state, node.provision_state)
self.assertEqual(states.NOSTATE, node.target_provision_state)
self.assertEqual({}, node.clean_step)
self.assertNotIn('clean_step_index', node.driver_internal_info)
self.assertEqual('test', node.driver_internal_info['goober'])
self.assertIsNone(node.driver_internal_info['clean_steps'])
mock_power_execute.assert_called_once_with(mock.ANY, mock.ANY,
self.clean_steps[1])
mock_deploy_execute.assert_has_calls(
[mock.call(mock.ANY, mock.ANY, self.clean_steps[0]),
mock.call(mock.ANY, mock.ANY, self.clean_steps[2])])
mock_collect_logs.assert_called_once_with(mock.ANY, label='cleaning')
@mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step',
autospec=True)
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
def _do_next_clean_step_execute_fail(self, tear_mock, mock_execute,
manual=False):
mock_collect_logs, manual=False):
# When a clean step fails, go to CLEANFAIL
tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE
@ -4365,6 +4421,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
self.assertTrue(node.maintenance)
mock_execute.assert_called_once_with(
mock.ANY, mock.ANY, self.clean_steps[0])
mock_collect_logs.assert_called_once_with(mock.ANY, label='cleaning')
def test__do_next_clean_step_automated_execute_fail(self):
self._do_next_clean_step_execute_fail()

View File

@ -1626,6 +1626,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
hook_mock.assert_called_once_with(task, command_status)
notify_mock.assert_called_once_with(task)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(manager_utils, 'notify_conductor_resume_clean',
autospec=True)
@mock.patch.object(agent_base_vendor,
@ -1635,7 +1636,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
autospec=True)
def test_continue_cleaning_with_hook_fails(
self, status_mock, error_handler_mock, get_hook_mock,
notify_mock):
notify_mock, collect_logs_mock):
self.node.clean_step = {
'priority': 10,
'interface': 'raid',
@ -1658,6 +1659,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
hook_mock.assert_called_once_with(task, command_status)
error_handler_mock.assert_called_once_with(task, mock.ANY)
self.assertFalse(notify_mock.called)
collect_logs_mock.assert_called_once_with(task.node,
label='cleaning')
@mock.patch.object(manager_utils, 'notify_conductor_resume_clean',
autospec=True)
@ -1704,10 +1707,12 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
self.deploy.continue_cleaning(task)
self.assertFalse(notify_mock.called)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_continue_cleaning_fail(self, status_mock, error_mock):
def test_continue_cleaning_fail(self, status_mock, error_mock,
collect_logs_mock):
# Test that a failure puts the node in CLEANFAIL
status_mock.return_value = [{
'command_status': 'FAILED',
@ -1718,6 +1723,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
shared=False) as task:
self.deploy.continue_cleaning(task)
error_mock.assert_called_once_with(task, mock.ANY)
collect_logs_mock.assert_called_once_with(task.node,
label='cleaning')
@mock.patch.object(conductor_steps, 'set_node_cleaning_steps',
autospec=True)

View File

@ -252,7 +252,16 @@ class UtilsRamdiskLogsTestCase(tests_base.TestCase):
logs = 'Gary the Snail'
mock_collect.return_value = {'command_result': {'system_logs': logs}}
driver_utils.collect_ramdisk_logs(self.node)
mock_store.assert_called_once_with(self.node, logs)
mock_store.assert_called_once_with(self.node, logs, label=None)
@mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True)
@mock.patch.object(agent_client.AgentClient,
'collect_system_logs', autospec=True)
def test_collect_ramdisk_logs_with_label(self, mock_collect, mock_store):
logs = 'Gary the Snail'
mock_collect.return_value = {'command_result': {'system_logs': logs}}
driver_utils.collect_ramdisk_logs(self.node, label='logs')
mock_store.assert_called_once_with(self.node, logs, label='logs')
@mock.patch.object(driver_utils.LOG, 'error', autospec=True)
@mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True)
@ -286,7 +295,7 @@ class UtilsRamdiskLogsTestCase(tests_base.TestCase):
logs = 'Gary the Snail'
mock_collect.return_value = {'command_result': {'system_logs': logs}}
driver_utils.collect_ramdisk_logs(self.node)
mock_store.assert_called_once_with(self.node, logs)
mock_store.assert_called_once_with(self.node, logs, label=None)
@mock.patch.object(driver_utils.LOG, 'exception', autospec=True)
def test_collect_ramdisk_logs_storage_fail_fs(self, mock_log):

View File

@ -0,0 +1,5 @@
---
other:
- |
Ramdisk logs are now collected during cleaning the same way as during
deployment.