From 4fb1b813f4fcf8cfbb0422bdd5120ac2ccfad911 Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Tue, 13 Feb 2024 12:00:11 -0800 Subject: [PATCH] Special case lenovo UEFI boot setup Special cases boot/uefi record setup to focus on UEFI nvram updates instead of attempting nvram updates *and* setting the boot device to disk. Closes-Bug: 2053064 Change-Id: Ic6584479a47146577052d17fa3f697eef64ac73c --- ironic/drivers/modules/agent_base.py | 21 +++++++++++++-- ironic/drivers/modules/pxe_base.py | 13 +++++++++ .../unit/drivers/modules/test_agent_base.py | 27 +++++++++++++++++++ ironic/tests/unit/drivers/modules/test_pxe.py | 19 +++++++++++++ ...to-disk-calls-lenovo-39763bfc98f602d8.yaml | 13 +++++++++ 5 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/limit-boot-to-disk-calls-lenovo-39763bfc98f602d8.yaml diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index 201d0a7a7e..5fb8b0c1b6 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -1450,11 +1450,28 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): try: persistent = True + # NOTE(TheJulia): We *really* only should be doing this in bios + # boot mode. In UEFI this might just get disregarded, or cause + # issues/failures. if node.driver_info.get('force_persistent_boot_device', 'Default') == 'Never': persistent = False - deploy_utils.try_set_boot_device(task, boot_devices.DISK, - persistent=persistent) + + vendor = task.node.properties.get('vendor', None) + if not (vendor and vendor.lower() == 'lenovo' + and target_boot_mode == 'uefi'): + # Lenovo hardware is modeled on a "just update" + # UEFI nvram model of use, and if multiple actions + # get requested, you can end up in cases where NVRAM + # changes are deleted as the host "restores" to the + # backup. For more information see + # https://bugs.launchpad.net/ironic/+bug/2053064 + # NOTE(TheJulia): We likely just need to do this with + # all hosts in uefi mode, but libvirt VMs don't handle + # nvram only changes *and* this pattern is known to generally + # work for Ironic operators. + deploy_utils.try_set_boot_device(task, boot_devices.DISK, + persistent=persistent) except Exception as e: msg = (_("Failed to change the boot device to %(boot_dev)s " "when deploying node %(node)s: %(error)s") % diff --git a/ironic/drivers/modules/pxe_base.py b/ironic/drivers/modules/pxe_base.py index 380403d9a3..b01cd92291 100644 --- a/ironic/drivers/modules/pxe_base.py +++ b/ironic/drivers/modules/pxe_base.py @@ -335,6 +335,19 @@ class PXEBaseMixin(object): self._node_set_boot_device_for_network_boot(task, persistent=True) else: + vendor = task.node.properties.get('vendor', None) + boot_mode = boot_mode_utils.get_boot_mode(task.node) + if (task.node.provision_state == states.DEPLOYING + and vendor and vendor.lower() == 'lenovo' + and boot_mode == 'uefi' + and boot_device == boot_devices.DISK): + # Lenovo hardware is modeled on a "just update" + # UEFI nvram model of use, and if multiple actions + # get requested, you can end up in cases where NVRAM + # changes are deleted as the host "restores" to the + # backup. For more information see + # https://bugs.launchpad.net/ironic/+bug/2053064 + return manager_utils.node_set_boot_device(task, boot_device, persistent=True) diff --git a/ironic/tests/unit/drivers/modules/test_agent_base.py b/ironic/tests/unit/drivers/modules/test_agent_base.py index 6d285cb0cf..b139b1b575 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base.py @@ -1014,6 +1014,33 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): target_boot_mode='whatever', software_raid=False ) + @mock.patch.object(agent_client.AgentClient, 'install_bootloader', + autospec=True) + @mock.patch.object(deploy_utils, 'try_set_boot_device', autospec=True) + @mock.patch.object(boot_mode_utils, 'get_boot_mode', autospec=True, + return_value='uefi') + def test_configure_local_boot_lenovo(self, boot_mode_mock, + try_set_boot_device_mock, + install_bootloader_mock): + install_bootloader_mock.return_value = { + 'command_status': 'SUCCESS', 'command_error': None} + props = self.node.properties + props['vendor'] = 'Lenovo' + props['capabilities'] = 'boot_mode:uefi' + self.node.properties = props + self.node.save() + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + task.node.driver_internal_info['is_whole_disk_image'] = False + self.deploy.configure_local_boot(task, root_uuid='some-root-uuid') + try_set_boot_device_mock.assert_not_called() + boot_mode_mock.assert_called_once_with(task.node) + install_bootloader_mock.assert_called_once_with( + mock.ANY, task.node, root_uuid='some-root-uuid', + efi_system_part_uuid=None, prep_boot_part_uuid=None, + target_boot_mode='uefi', software_raid=False + ) + @mock.patch.object(agent_client.AgentClient, 'install_bootloader', autospec=True) @mock.patch.object(deploy_utils, 'try_set_boot_device', autospec=True) diff --git a/ironic/tests/unit/drivers/modules/test_pxe.py b/ironic/tests/unit/drivers/modules/test_pxe.py index cc48587bce..64ac79b5b1 100644 --- a/ironic/tests/unit/drivers/modules/test_pxe.py +++ b/ironic/tests/unit/drivers/modules/test_pxe.py @@ -471,6 +471,25 @@ class PXEBootTestCase(db_base.DbTestCase): persistent=True) secure_boot_mock.assert_called_once_with(task) + @mock.patch.object(boot_mode_utils, 'configure_secure_boot_if_needed', + autospec=True) + @mock.patch.object(manager_utils, 'node_set_boot_device', autospec=True) + @mock.patch.object(pxe_utils, 'clean_up_pxe_config', autospec=True) + def test_prepare_instance_lenovo(self, clean_up_pxe_config_mock, + set_boot_device_mock, secure_boot_mock): + props = self.node.properties + props['vendor'] = 'Lenovo' + props['capabilities'] = 'boot_mode:uefi' + self.node.properties = props + self.node.provision_state = states.DEPLOYING + self.node.save() + with task_manager.acquire(self.context, self.node.uuid) as task: + task.driver.boot.prepare_instance(task) + clean_up_pxe_config_mock.assert_called_once_with( + task, ipxe_enabled=False) + set_boot_device_mock.assert_not_called() + secure_boot_mock.assert_called_once_with(task) + @mock.patch.object(manager_utils, 'node_set_boot_device', autospec=True) @mock.patch.object(pxe_utils, 'clean_up_pxe_config', autospec=True) def test_prepare_instance_active(self, clean_up_pxe_config_mock, diff --git a/releasenotes/notes/limit-boot-to-disk-calls-lenovo-39763bfc98f602d8.yaml b/releasenotes/notes/limit-boot-to-disk-calls-lenovo-39763bfc98f602d8.yaml new file mode 100644 index 0000000000..51ad41dec0 --- /dev/null +++ b/releasenotes/notes/limit-boot-to-disk-calls-lenovo-39763bfc98f602d8.yaml @@ -0,0 +1,13 @@ +--- +fixes: + - | + Fixes issues with Lenovo hardware where the system firmware may display + a blue "Boot Option Restoration" screen after the agent writes an image + to the host in UEFI boot mode, requiring manual intervention before the + deployed node boots. This issue is rooted in multiple changes being made + to the underlying NVRAM configuration of the node. Lenovo engineers + have suggested to *only* change the UEFI NVRAM and not perform + any further changes via the BMC to configure the next boot. Ironic now + does such on Lenovo hardware. More information and background on this + issue can be discovered in + `bug 2053064 `_.