Collect deployment logs from IPA

This patch adds the code to collect the deployment logs from the IPA
ramdisk. The logs can be collect for every deployment, upon a failure or
never. By default, logs are collected upon a failure.

After collection, logs can be storaged either in the local filesystem
(default) or in Swift.

If an error occurs when the logs are being collected, storaged or if the
ramdisk does not support the collect_system_logs command Ironic will log
an error message, but the deployment will proceed.

Documentation on how to enable and other configuration will be done on a
subsequent patch.

Partial-Bug: #1587143
Change-Id: I6da1110daa94ea25670f71f9862e51cc9bbc6f93
This commit is contained in:
Lucas Alvares Gomes 2016-06-29 16:47:16 +01:00
parent bc106b56bb
commit cd7507f04b
13 changed files with 422 additions and 30 deletions

View File

@ -417,6 +417,33 @@
# (string value)
#agent_api_version = v1
# Whether Ironic should collect the deployment logs on
# deployment failure (on_failure), always or never. (string
# value)
# Allowed values: always, on_failure, never
#deploy_logs_collect = on_failure
# The name of the storage backend where the logs will be
# stored. (string value)
# Allowed values: local, swift
#deploy_logs_storage_backend = local
# The path to the directory where the logs should be stored,
# used when the deploy_logs_storage_backend is configured to
# "local". (string value)
#deploy_logs_local_path = /var/log/ironic/deploy
# The name of the Swift container to store the logs, used when
# the deploy_logs_storage_backend is configured to "swift".
# (string value)
#deploy_logs_swift_container = ironic_deploy_logs_container
# Number of days before a log object is marked as expired in
# Swift. If None, the logs will be kept forever or until
# manually deleted. Used when the deploy_logs_storage_backend
# is configured to "swift". (integer value)
#deploy_logs_swift_days_to_expire = 30
[amt]
@ -1929,10 +1956,6 @@
# value)
#max_polling_attempts = 12
# Period (in seconds) for periodic tasks to be executed.
# (integer value)
#periodic_check_interval = 300
# Whether to enable the periodic tasks for OneView driver be
# aware when OneView hardware resources are taken and released
# by Ironic or OneView users and proactively manage nodes in
@ -1940,6 +1963,10 @@
# hardware resources allocation in OneView. (boolean value)
#enable_periodic_tasks = true
# Period (in seconds) for periodic tasks to be executed when
# enable_periodic_tasks=True. (integer value)
#periodic_check_interval = 300
[oslo_concurrency]

View File

@ -59,7 +59,34 @@ opts = [
cfg.StrOpt('agent_api_version',
default='v1',
help=_('API version to use for communicating with the ramdisk '
'agent.'))
'agent.')),
cfg.StrOpt('deploy_logs_collect',
choices=['always', 'on_failure', 'never'],
default='on_failure',
help=_('Whether Ironic should collect the deployment logs on '
'deployment failure (on_failure), always or never.')),
cfg.StrOpt('deploy_logs_storage_backend',
choices=['local', 'swift'],
default='local',
help=_('The name of the storage backend where the logs '
'will be stored.')),
cfg.StrOpt('deploy_logs_local_path',
default='/var/log/ironic/deploy',
help=_('The path to the directory where the logs should be '
'stored, used when the deploy_logs_storage_backend '
'is configured to "local".')),
cfg.StrOpt('deploy_logs_swift_container',
default='ironic_deploy_logs_container',
help=_('The name of the Swift container to store the logs, '
'used when the deploy_logs_storage_backend is '
'configured to "swift".')),
cfg.IntOpt('deploy_logs_swift_days_to_expire',
default=30,
help=_('Number of days before a log object is marked as '
'expired in Swift. If None, the logs will be kept '
'forever or until manually deleted. Used when the '
'deploy_logs_storage_backend is configured to '
'"swift".')),
]

View File

@ -41,6 +41,7 @@ from ironic.conf import CONF
from ironic.drivers import base
from ironic.drivers.modules import agent_client
from ironic.drivers.modules import deploy_utils
from ironic.drivers import utils as driver_utils
from ironic import objects
LOG = log.getLogger(__name__)
@ -533,6 +534,10 @@ class AgentDeployMixin(object):
return task.driver.power.get_power_state(task)
node = task.node
if CONF.agent.deploy_logs_collect == 'always':
driver_utils.collect_ramdisk_logs(node)
# Whether ironic should power off the node via out-of-band or
# in-band methods
oob_power_off = strutils.bool_from_string(

View File

@ -229,3 +229,10 @@ class AgentClient(object):
method='standby.sync',
params={},
wait=True)
def collect_system_logs(self, node):
"""Collect and package diagnostic and support data from the ramdisk."""
return self._command(node=node,
method='log.collect_system_logs',
params={},
wait=True)

View File

@ -474,6 +474,10 @@ def set_failed_state(task, msg):
:param msg: the message to set in last_error of the node.
"""
node = task.node
if CONF.agent.deploy_logs_collect in ('on_failure', 'always'):
driver_utils.collect_ramdisk_logs(node)
try:
task.process_event('fail')
except exception.InvalidState:

View File

@ -12,18 +12,29 @@
# License for the specific language governing permissions and limitations
# under the License.
import base64
import os
import tempfile
from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import timeutils
import six
from ironic.common import exception
from ironic.common.i18n import _
from ironic.common.i18n import _LE
from ironic.common.i18n import _LW
from ironic.common import swift
from ironic.conductor import utils
from ironic.drivers import base
from ironic.drivers.modules import agent_client
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
class MixinVendorInterface(base.VendorInterface):
"""Wrapper around multiple VendorInterfaces."""
@ -251,3 +262,101 @@ def normalize_mac(mac):
:return: Normalized MAC address string.
"""
return mac.replace('-', '').replace(':', '').lower()
def get_ramdisk_logs_file_name(node):
"""Construct the log file name.
:param node: A node object.
:returns: The log file name.
"""
timestamp = timeutils.utcnow().strftime('%Y-%m-%d-%H:%M:%S')
file_name_fields = [node.uuid]
if node.instance_uuid:
file_name_fields.append(node.instance_uuid)
file_name_fields.append(timestamp)
return '_'.join(file_name_fields) + '.tar.gz'
def store_ramdisk_logs(node, logs):
"""Store the ramdisk logs.
This method stores the ramdisk logs according to the configured
storage backend.
:param node: A node object.
:param logs: A gzipped and base64 encoded string containing the
logs archive.
:raises: OSError if the directory to save the logs cannot be created.
:raises: IOError when the logs can't be saved to the local file system.
:raises: SwiftOperationError, if any operation with Swift fails.
"""
logs_file_name = get_ramdisk_logs_file_name(node)
data = base64.b64decode(logs)
if CONF.agent.deploy_logs_storage_backend == 'local':
if not os.path.exists(CONF.agent.deploy_logs_local_path):
os.makedirs(CONF.agent.deploy_logs_local_path)
log_path = os.path.join(CONF.agent.deploy_logs_local_path,
logs_file_name)
with open(log_path, 'wb') as f:
f.write(data)
elif CONF.agent.deploy_logs_storage_backend == 'swift':
with tempfile.NamedTemporaryFile(dir=CONF.tempdir) as f:
f.write(data)
f.flush()
# convert days to seconds
timeout = CONF.agent.deploy_logs_swift_days_to_expire * 86400
object_headers = {'X-Delete-After': timeout}
swift_api = swift.SwiftAPI()
swift_api.create_object(
CONF.agent.deploy_logs_swift_container, logs_file_name,
f.name, object_headers=object_headers)
def collect_ramdisk_logs(node):
"""Collect and store the system logs from the IPA ramdisk.
Collect and store the system logs from the IPA ramdisk. This method
makes a call to the IPA ramdisk to collect the logs and store it
according to the configured storage backend.
:param node: A node object.
"""
client = agent_client.AgentClient()
try:
result = client.collect_system_logs(node)
except exception.IronicException as e:
LOG.error(_LE('Failed to invoke collect_system_logs agent command '
'for node %(node)s. Error: %(error)s'),
{'node': node.uuid, 'error': e})
return
error = result.get('faultstring')
if error is not None:
LOG.error(_LE('Failed to collect logs from the node %(node)s '
'deployment. Error: %(error)s'),
{'node': node.uuid, 'error': error})
return
try:
store_ramdisk_logs(node, result['command_result']['system_logs'])
except exception.SwiftOperationError as e:
LOG.error(_LE('Failed to store the logs from the node %(node)s '
'deployment in Swift. Error: %(error)s'),
{'node': node.uuid, 'error': e})
except EnvironmentError as e:
LOG.exception(_LE('Failed to store the logs from the node %(node)s '
'deployment due a file-system related error. '
'Error: %(error)s'),
{'node': node.uuid, 'error': e})
except Exception as e:
LOG.exception(_LE('Unknown error when storing logs from the node '
'%(node)s deployment. Error: %(error)s'),
{'node': node.uuid, 'error': e})

View File

@ -28,6 +28,7 @@ from ironic.drivers.modules import agent_client
from ironic.drivers.modules.oneview import power
from ironic.drivers.modules.oneview import vendor
from ironic.drivers.modules import pxe
from ironic.drivers import utils as driver_utils
from ironic.tests.unit.conductor import mgr_utils
from ironic.tests.unit.db import base as db_base
from ironic.tests.unit.db import utils as db_utils
@ -152,6 +153,7 @@ class TestBaseAgentVendor(db_base.DbTestCase):
self.assertEqual(states.ACTIVE, task.node.provision_state)
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(power.OneViewPower, 'get_power_state',
@ -160,7 +162,7 @@ class TestBaseAgentVendor(db_base.DbTestCase):
spec=types.FunctionType)
def test_reboot_and_finish_deploy_power_action_fails(
self, power_off_mock, get_power_state_mock,
node_power_action_mock):
node_power_action_mock, collect_ramdisk_logs_mock):
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
@ -179,6 +181,7 @@ class TestBaseAgentVendor(db_base.DbTestCase):
mock.call(task, states.POWER_OFF)])
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
collect_ramdisk_logs_mock.assert_called_once_with(task.node)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(power.OneViewPower, 'get_power_state',

View File

@ -31,6 +31,7 @@ from ironic.drivers.modules import agent_client
from ironic.drivers.modules import deploy_utils
from ironic.drivers.modules import fake
from ironic.drivers.modules import pxe
from ironic.drivers import utils as driver_utils
from ironic.tests.unit.conductor import mgr_utils
from ironic.tests.unit.db import base as db_base
from ironic.tests.unit.db import utils as db_utils
@ -900,6 +901,7 @@ class TestAgentVendor(db_base.DbTestCase):
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
self.assertFalse(uuid_mock.called)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(agent.AgentVendorInterface, '_get_uuid_from_result',
autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@ -912,12 +914,10 @@ class TestAgentVendor(db_base.DbTestCase):
@mock.patch('ironic.drivers.modules.agent.AgentVendorInterface'
'.check_deploy_success', autospec=True)
@mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True)
def test_reboot_to_instance_boot_error(self, clean_pxe_mock,
check_deploy_mock,
prepare_mock, power_off_mock,
get_power_state_mock,
node_power_action_mock,
uuid_mock):
def test_reboot_to_instance_boot_error(
self, clean_pxe_mock, check_deploy_mock, prepare_mock,
power_off_mock, get_power_state_mock, node_power_action_mock,
uuid_mock, collect_ramdisk_logs_mock):
check_deploy_mock.return_value = "Error"
uuid_mock.return_value = None
self.node.provision_state = states.DEPLOYWAIT
@ -936,6 +936,7 @@ class TestAgentVendor(db_base.DbTestCase):
check_deploy_mock.assert_called_once_with(mock.ANY, task.node)
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
collect_ramdisk_logs_mock.assert_called_once_with(task.node)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'configure_local_boot', autospec=True)

View File

@ -32,6 +32,7 @@ from ironic.drivers.modules import agent_client
from ironic.drivers.modules import deploy_utils
from ironic.drivers.modules import fake
from ironic.drivers.modules import pxe
from ironic.drivers import utils as driver_utils
from ironic import objects
from ironic.tests.unit.conductor import mgr_utils
from ironic.tests.unit.db import base as db_base
@ -582,15 +583,17 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
mock_touch.assert_called_once_with(mock.ANY)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(fake.FakePower, 'get_power_state',
spec=types.FunctionType)
@mock.patch.object(agent_client.AgentClient, 'power_off',
spec=types.FunctionType)
def test_reboot_and_finish_deploy(self, power_off_mock,
get_power_state_mock,
node_power_action_mock):
def test_reboot_and_finish_deploy(
self, power_off_mock, get_power_state_mock,
node_power_action_mock, mock_collect):
cfg.CONF.set_override('deploy_logs_collect', 'always', 'agent')
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
@ -605,7 +608,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
task, states.POWER_ON)
self.assertEqual(states.ACTIVE, task.node.provision_state)
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
mock_collect.assert_called_once_with(task.node)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(fake.FakePower, 'get_power_state',
@ -619,7 +624,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
def test_reboot_and_finish_deploy_soft_poweroff_doesnt_complete(
self, configure_tenant_net_mock, remove_provisioning_net_mock,
power_off_mock, get_power_state_mock,
node_power_action_mock):
node_power_action_mock, mock_collect):
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
@ -637,7 +642,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
configure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
self.assertEqual(states.ACTIVE, task.node.provision_state)
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
self.assertFalse(mock_collect.called)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'power_off',
spec=types.FunctionType)
@ -647,7 +654,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
'configure_tenant_networks', spec_set=True, autospec=True)
def test_reboot_and_finish_deploy_soft_poweroff_fails(
self, configure_tenant_net_mock, remove_provisioning_net_mock,
power_off_mock, node_power_action_mock):
power_off_mock, node_power_action_mock, mock_collect):
power_off_mock.side_effect = RuntimeError("boom")
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
@ -664,7 +671,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
configure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
self.assertEqual(states.ACTIVE, task.node.provision_state)
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
self.assertFalse(mock_collect.called)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(fake.FakePower, 'get_power_state',
@ -677,7 +686,8 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
'configure_tenant_networks', spec_set=True, autospec=True)
def test_reboot_and_finish_deploy_get_power_state_fails(
self, configure_tenant_net_mock, remove_provisioning_net_mock,
power_off_mock, get_power_state_mock, node_power_action_mock):
power_off_mock, get_power_state_mock, node_power_action_mock,
mock_collect):
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
@ -695,7 +705,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
configure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
self.assertEqual(states.ACTIVE, task.node.provision_state)
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
self.assertFalse(mock_collect.called)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(fake.FakePower, 'get_power_state',
@ -704,7 +716,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
spec=types.FunctionType)
def test_reboot_and_finish_deploy_power_action_fails(
self, power_off_mock, get_power_state_mock,
node_power_action_mock):
node_power_action_mock, mock_collect):
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
@ -721,12 +733,14 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
mock.call(task, states.POWER_OFF)])
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
mock_collect.assert_called_once_with(task.node)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'sync',
spec=types.FunctionType)
def test_reboot_and_finish_deploy_power_action_oob_power_off(
self, sync_mock, node_power_action_mock):
self, sync_mock, node_power_action_mock, mock_collect):
# Enable force power off
driver_info = self.node.driver_info
driver_info['deploy_forces_oob_reboot'] = True
@ -746,13 +760,15 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
])
self.assertEqual(states.ACTIVE, task.node.provision_state)
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
self.assertFalse(mock_collect.called)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(agent_base_vendor.LOG, 'warning', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'sync',
spec=types.FunctionType)
def test_reboot_and_finish_deploy_power_action_oob_power_off_failed(
self, sync_mock, node_power_action_mock, log_mock):
self, sync_mock, node_power_action_mock, log_mock, mock_collect):
# Enable force power off
driver_info = self.node.driver_info
driver_info['deploy_forces_oob_reboot'] = True
@ -779,6 +795,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
'Failed to flush the file system prior to hard rebooting the '
'node %(node)s. Error: %(error)s',
{'node': task.node.uuid, 'error': log_error})
self.assertFalse(mock_collect.called)
@mock.patch.object(agent_client.AgentClient, 'install_bootloader',
autospec=True)
@ -841,10 +858,12 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
try_set_boot_device_mock.assert_called_once_with(
task, boot_devices.DISK)
@mock.patch.object(agent_client.AgentClient, 'collect_system_logs',
autospec=True)
@mock.patch.object(agent_client.AgentClient, 'install_bootloader',
autospec=True)
def test_configure_local_boot_boot_loader_install_fail(
self, install_bootloader_mock):
self, install_bootloader_mock, collect_logs_mock):
install_bootloader_mock.return_value = {
'command_status': 'FAILED', 'command_error': 'boom'}
self.node.provision_state = states.DEPLOYING
@ -859,14 +878,18 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
install_bootloader_mock.assert_called_once_with(
mock.ANY, task.node, root_uuid='some-root-uuid',
efi_system_part_uuid=None)
collect_logs_mock.assert_called_once_with(mock.ANY, task.node)
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
@mock.patch.object(agent_client.AgentClient, 'collect_system_logs',
autospec=True)
@mock.patch.object(deploy_utils, 'try_set_boot_device', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'install_bootloader',
autospec=True)
def test_configure_local_boot_set_boot_device_fail(
self, install_bootloader_mock, try_set_boot_device_mock):
self, install_bootloader_mock, try_set_boot_device_mock,
collect_logs_mock):
install_bootloader_mock.return_value = {
'command_status': 'SUCCESS', 'command_error': None}
try_set_boot_device_mock.side_effect = RuntimeError('error')
@ -884,6 +907,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
efi_system_part_uuid=None)
try_set_boot_device_mock.assert_called_once_with(
task, boot_devices.DISK)
collect_logs_mock.assert_called_once_with(mock.ANY, task.node)
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
self.assertEqual(states.ACTIVE, task.node.target_provision_state)

View File

@ -39,6 +39,7 @@ from ironic.drivers.modules import agent_client
from ironic.drivers.modules import deploy_utils as utils
from ironic.drivers.modules import image_cache
from ironic.drivers.modules import pxe
from ironic.drivers import utils as driver_utils
from ironic.tests import base as tests_base
from ironic.tests.unit.conductor import mgr_utils
from ironic.tests.unit.db import base as db_base
@ -1275,7 +1276,8 @@ class OtherFunctionTestCase(db_base.DbTestCase):
else:
self.assertFalse(mock_log.called)
def test_set_failed_state(self):
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
def test_set_failed_state(self, mock_collect):
exc_state = exception.InvalidState('invalid state')
exc_param = exception.InvalidParameterValue('invalid parameter')
mock_call = mock.call(mock.ANY)
@ -1290,8 +1292,10 @@ class OtherFunctionTestCase(db_base.DbTestCase):
self._test_set_failed_state(event_value=iter([exc_state] * len(calls)),
power_value=iter([exc_param] * len(calls)),
log_calls=calls)
self.assertEqual(4, mock_collect.call_count)
def test_set_failed_state_no_poweroff(self):
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
def test_set_failed_state_no_poweroff(self, mock_collect):
cfg.CONF.set_override('power_off_after_deploy_failure', False,
'deploy')
exc_state = exception.InvalidState('invalid state')
@ -1308,6 +1312,21 @@ class OtherFunctionTestCase(db_base.DbTestCase):
self._test_set_failed_state(event_value=iter([exc_state] * len(calls)),
power_value=iter([exc_param] * len(calls)),
log_calls=calls, poweroff=False)
self.assertEqual(4, mock_collect.call_count)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
def test_set_failed_state_collect_deploy_logs(self, mock_collect):
for opt in ('always', 'on_failure'):
cfg.CONF.set_override('deploy_logs_collect', opt, 'agent')
self._test_set_failed_state()
mock_collect.assert_called_once_with(mock.ANY)
mock_collect.reset_mock()
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
def test_set_failed_state_collect_deploy_logs_never(self, mock_collect):
cfg.CONF.set_override('deploy_logs_collect', 'never', 'agent')
self._test_set_failed_state()
self.assertFalse(mock_collect.called)
def test_get_boot_option(self):
self.node.instance_info = {'capabilities': '{"boot_option": "local"}'}

View File

@ -37,6 +37,7 @@ from ironic.drivers.modules import agent_client
from ironic.drivers.modules import deploy_utils
from ironic.drivers.modules import iscsi_deploy
from ironic.drivers.modules import pxe
from ironic.drivers import utils as driver_utils
from ironic.tests.unit.conductor import mgr_utils
from ironic.tests.unit.db import base as db_base
from ironic.tests.unit.db import utils as db_utils
@ -157,12 +158,14 @@ class IscsiDeployMethodsTestCase(db_base.DbTestCase):
mock_unlink.assert_called_once_with('/path/uuid/disk')
mock_rmtree.assert_called_once_with('/path/uuid')
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(iscsi_deploy, '_save_disk_layout', autospec=True)
@mock.patch.object(iscsi_deploy, 'InstanceImageCache', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(deploy_utils, 'deploy_partition_image', autospec=True)
def test_continue_deploy_fail(self, deploy_mock, power_mock,
mock_image_cache, mock_disk_layout):
def test_continue_deploy_fail(
self, deploy_mock, power_mock, mock_image_cache, mock_disk_layout,
mock_collect_logs):
kwargs = {'address': '123456', 'iqn': 'aaa-bbb'}
deploy_mock.side_effect = exception.InstanceDeployFailure(
"test deploy error")
@ -184,13 +187,16 @@ class IscsiDeployMethodsTestCase(db_base.DbTestCase):
mock_image_cache.assert_called_once_with()
mock_image_cache.return_value.clean_up.assert_called_once_with()
self.assertFalse(mock_disk_layout.called)
mock_collect_logs.assert_called_once_with(task.node)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(iscsi_deploy, '_save_disk_layout', autospec=True)
@mock.patch.object(iscsi_deploy, 'InstanceImageCache', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(deploy_utils, 'deploy_partition_image', autospec=True)
def test_continue_deploy_fail_no_root_uuid_or_disk_id(
self, deploy_mock, power_mock, mock_image_cache, mock_disk_layout):
self, deploy_mock, power_mock, mock_image_cache, mock_disk_layout,
mock_collect_logs):
kwargs = {'address': '123456', 'iqn': 'aaa-bbb'}
deploy_mock.return_value = {}
self.node.provision_state = states.DEPLOYWAIT
@ -211,13 +217,16 @@ class IscsiDeployMethodsTestCase(db_base.DbTestCase):
mock_image_cache.assert_called_once_with()
mock_image_cache.return_value.clean_up.assert_called_once_with()
self.assertFalse(mock_disk_layout.called)
mock_collect_logs.assert_called_once_with(task.node)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(iscsi_deploy, '_save_disk_layout', autospec=True)
@mock.patch.object(iscsi_deploy, 'InstanceImageCache', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(deploy_utils, 'deploy_partition_image', autospec=True)
def test_continue_deploy_fail_empty_root_uuid(
self, deploy_mock, power_mock, mock_image_cache, mock_disk_layout):
self, deploy_mock, power_mock, mock_image_cache,
mock_disk_layout, mock_collect_logs):
kwargs = {'address': '123456', 'iqn': 'aaa-bbb'}
deploy_mock.return_value = {'root uuid': ''}
self.node.provision_state = states.DEPLOYWAIT
@ -238,6 +247,7 @@ class IscsiDeployMethodsTestCase(db_base.DbTestCase):
mock_image_cache.assert_called_once_with()
mock_image_cache.return_value.clean_up.assert_called_once_with()
self.assertFalse(mock_disk_layout.called)
mock_collect_logs.assert_called_once_with(task.node)
@mock.patch.object(iscsi_deploy, '_save_disk_layout', autospec=True)
@mock.patch.object(iscsi_deploy, 'LOG', autospec=True)
@ -424,7 +434,9 @@ class IscsiDeployMethodsTestCase(db_base.DbTestCase):
agent_client_mock.start_iscsi_target.assert_called_once_with(
task.node, expected_iqn, 3260, wipe_disk_metadata=False)
def test_do_agent_iscsi_deploy_start_iscsi_failure(self):
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
def test_do_agent_iscsi_deploy_start_iscsi_failure(
self, mock_collect_logs):
agent_client_mock = mock.MagicMock(spec_set=agent_client.AgentClient)
agent_client_mock.start_iscsi_target.return_value = {
'command_status': 'FAILED', 'command_error': 'booom'}
@ -444,6 +456,7 @@ class IscsiDeployMethodsTestCase(db_base.DbTestCase):
self.assertEqual(states.DEPLOYFAIL, self.node.provision_state)
self.assertEqual(states.ACTIVE, self.node.target_provision_state)
self.assertIsNotNone(self.node.last_error)
mock_collect_logs.assert_called_once_with(task.node)
@mock.patch('ironic.drivers.modules.deploy_utils.get_ironic_api_url')
def test_validate_good_api_url(self, mock_get_url):

View File

@ -13,14 +13,22 @@
# License for the specific language governing permissions and limitations
# under the License.
import datetime
import os
import mock
from oslo_config import cfg
from oslo_utils import timeutils
from ironic.common import driver_factory
from ironic.common import exception
from ironic.common import swift
from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils
from ironic.drivers.modules import agent_client
from ironic.drivers.modules import fake
from ironic.drivers import utils as driver_utils
from ironic.tests import base as tests_base
from ironic.tests.unit.conductor import mgr_utils
from ironic.tests.unit.db import base as db_base
from ironic.tests.unit.objects import utils as obj_utils
@ -237,3 +245,136 @@ class UtilsTestCase(db_base.DbTestCase):
mac_raw = u"0A:1B-2C-3D:4F"
mac_clean = driver_utils.normalize_mac(mac_raw)
self.assertEqual("0a1b2c3d4f", mac_clean)
class UtilsRamdiskLogsTestCase(tests_base.TestCase):
def setUp(self):
super(UtilsRamdiskLogsTestCase, self).setUp()
self.node = obj_utils.get_test_node(self.context)
@mock.patch.object(timeutils, 'utcnow', autospec=True)
def test_get_ramdisk_logs_file_name(self, mock_utcnow):
mock_utcnow.return_value = datetime.datetime(2000, 1, 1, 0, 0)
name = driver_utils.get_ramdisk_logs_file_name(self.node)
expected_name = ('1be26c0b-03f2-4d2e-ae87-c02d7f33c123_'
'2000-01-01-00:00:00.tar.gz')
self.assertEqual(expected_name, name)
# with instance_info
instance_uuid = '7a5641ba-d264-424a-a9d7-e2a293ca482b'
node2 = obj_utils.get_test_node(
self.context, instance_uuid=instance_uuid)
name = driver_utils.get_ramdisk_logs_file_name(node2)
expected_name = ('1be26c0b-03f2-4d2e-ae87-c02d7f33c123_' +
instance_uuid + '_2000-01-01-00:00:00.tar.gz')
self.assertEqual(expected_name, name)
@mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True)
@mock.patch.object(agent_client.AgentClient,
'collect_system_logs', autospec=True)
def test_collect_ramdisk_logs(self, mock_collect, mock_store):
logs = 'Gary the Snail'
mock_collect.return_value = {'command_result': {'system_logs': logs}}
driver_utils.collect_ramdisk_logs(self.node)
mock_store.assert_called_once_with(self.node, logs)
@mock.patch.object(driver_utils.LOG, 'error', autospec=True)
@mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True)
@mock.patch.object(agent_client.AgentClient,
'collect_system_logs', autospec=True)
def test_collect_ramdisk_logs_IPA_command_fail(
self, mock_collect, mock_store, mock_log):
error_str = 'MR. KRABS! I WANNA GO TO BED!'
mock_collect.return_value = {'faultstring': error_str}
driver_utils.collect_ramdisk_logs(self.node)
# assert store was never invoked
self.assertFalse(mock_store.called)
mock_log.assert_called_once_with(
mock.ANY, {'node': self.node.uuid, 'error': error_str})
@mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True)
@mock.patch.object(agent_client.AgentClient,
'collect_system_logs', autospec=True)
def test_collect_ramdisk_logs_storage_command_fail(
self, mock_collect, mock_store):
mock_collect.side_effect = exception.IronicException('boom')
self.assertIsNone(driver_utils.collect_ramdisk_logs(self.node))
self.assertFalse(mock_store.called)
@mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True)
@mock.patch.object(agent_client.AgentClient,
'collect_system_logs', autospec=True)
def _collect_ramdisk_logs_storage_fail(
self, expected_exception, mock_collect, mock_store):
mock_store.side_effect = expected_exception
logs = 'Gary the Snail'
mock_collect.return_value = {'command_result': {'system_logs': logs}}
driver_utils.collect_ramdisk_logs(self.node)
mock_store.assert_called_once_with(self.node, logs)
@mock.patch.object(driver_utils.LOG, 'exception', autospec=True)
def test_collect_ramdisk_logs_storage_fail_fs(self, mock_log):
error = IOError('boom')
self._collect_ramdisk_logs_storage_fail(error)
mock_log.assert_called_once_with(
mock.ANY, {'node': self.node.uuid, 'error': error})
self.assertIn('file-system', mock_log.call_args[0][0])
@mock.patch.object(driver_utils.LOG, 'error', autospec=True)
def test_collect_ramdisk_logs_storage_fail_swift(self, mock_log):
error = exception.SwiftOperationError('boom')
self._collect_ramdisk_logs_storage_fail(error)
mock_log.assert_called_once_with(
mock.ANY, {'node': self.node.uuid, 'error': error})
self.assertIn('Swift', mock_log.call_args[0][0])
@mock.patch.object(driver_utils.LOG, 'exception', autospec=True)
def test_collect_ramdisk_logs_storage_fail_unkown(self, mock_log):
error = Exception('boom')
self._collect_ramdisk_logs_storage_fail(error)
mock_log.assert_called_once_with(
mock.ANY, {'node': self.node.uuid, 'error': error})
self.assertIn('Unknown error', mock_log.call_args[0][0])
@mock.patch.object(swift, 'SwiftAPI', autospec=True)
@mock.patch.object(driver_utils,
'get_ramdisk_logs_file_name', autospec=True)
def test_store_ramdisk_logs_swift(self, mock_logs_name, mock_swift):
container_name = 'ironic_test_container'
file_name = 'ironic_test_file.tar.gz'
b64str = 'ZW5jb2RlZHN0cmluZw==\n'
cfg.CONF.set_override('deploy_logs_storage_backend', 'swift', 'agent')
cfg.CONF.set_override(
'deploy_logs_swift_container', container_name, 'agent')
cfg.CONF.set_override('deploy_logs_swift_days_to_expire', 1, 'agent')
mock_logs_name.return_value = file_name
driver_utils.store_ramdisk_logs(self.node, b64str)
mock_swift.return_value.create_object.assert_called_once_with(
container_name, file_name, mock.ANY,
object_headers={'X-Delete-After': 86400})
mock_logs_name.assert_called_once_with(self.node)
@mock.patch.object(os, 'makedirs', autospec=True)
@mock.patch.object(driver_utils,
'get_ramdisk_logs_file_name', autospec=True)
def test_store_ramdisk_logs_local(self, mock_logs_name, mock_makedirs):
file_name = 'ironic_test_file.tar.gz'
b64str = 'ZW5jb2RlZHN0cmluZw==\n'
log_path = '/foo/bar'
cfg.CONF.set_override('deploy_logs_local_path', log_path, 'agent')
mock_logs_name.return_value = file_name
with mock.patch.object(driver_utils, 'open', new=mock.mock_open(),
create=True) as mock_open:
driver_utils.store_ramdisk_logs(self.node, b64str)
expected_path = os.path.join(log_path, file_name)
mock_open.assert_called_once_with(expected_path, 'wb')
mock_makedirs.assert_called_once_with(log_path)
mock_logs_name.assert_called_once_with(self.node)

View File

@ -0,0 +1,12 @@
---
features:
- Adds support for collecting deployment logs from the IPA
ramdisk. Five new configuration options were added, [agent]/group
deploy_logs_collect, [agent]deploy_logs_storage_backend,
[agent]/deploy_logs_local_path, [agent]/deploy_logs_swift_container and
[agent]/deploy_logs_swift_days_to_expire.
upgrades:
- Collecting logs on deploy failure is enabled by default and the logs
will be saved to the local disk. Operators upgrading may want to disable
this feature, enable some form of rotation for the logs or change the
configuration to store the logs in Swift to avoid disk space problems.