diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index f8107b555..a7b8ba3b8 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -203,6 +203,36 @@ def _get_component_devices(raid_device): return component_devices +def _get_actual_component_devices(raid_device): + """Get the component devices of a Software RAID device. + + Examine an md device and return its constituent devices. + + :param raid_device: A Software RAID block device name. + :returns: A list of the component devices. + """ + if not raid_device: + return [] + + try: + out, _ = utils.execute('mdadm', '--detail', raid_device, + use_standard_locale=True) + except processutils.ProcessExecutionError as e: + msg = ('Could not get component devices of %(dev)s: %(err)s' % + {'dev': raid_device, 'err': e}) + LOG.warning(msg) + return [] + + component_devices = [] + lines = out.splitlines() + # the first line contains the md device itself + for line in lines[1:]: + device = re.findall(r'/dev/\w+', line) + component_devices += device + + return component_devices + + def _calc_memory(sys_dict): physical = 0 for sys_child in sys_dict['children']: @@ -1866,6 +1896,20 @@ class GenericHardwareManager(HardwareManager): md_device, ' '.join(component_devices), e) raise errors.SoftwareRAIDError(msg) + # check for missing devices and re-add them + actual_components = _get_actual_component_devices(md_device) + missing = list(set(component_devices) - set(actual_components)) + for dev in missing: + try: + LOG.warning('Found %s to be missing from %s ' + '... re-adding!', dev, md_device) + utils.execute('mdadm', '--add', md_device, dev, + attempts=3, delay_on_retry=True) + except processutils.ProcessExecutionError as e: + msg = "Failed re-add {} to {}: {}".format( + dev, md_device, e) + raise errors.SoftwareRAIDError(msg) + LOG.info("Successfully created Software RAID") return raid_config diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index b9348912c..afa1b96fd 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -2960,11 +2960,13 @@ class TestGenericHardwareManager(base.IronicAgentTest): mocked_create.assert_called_once_with(self.hardware, self.node, [], raid_config) + @mock.patch.object(hardware, '_get_actual_component_devices', + autospec=True) @mock.patch.object(disk_utils, 'list_partitions', autospec=True) @mock.patch.object(utils, 'execute', autospec=True) @mock.patch.object(os.path, 'isdir', autospec=True, return_value=False) def test_create_configuration(self, mocked_os_path_isdir, mocked_execute, - mock_list_parts): + mock_list_parts, mocked_actual_comp): node = self.node raid_config = { @@ -3003,6 +3005,11 @@ class TestGenericHardwareManager(base.IronicAgentTest): None, None # mdadms ] + mocked_actual_comp.side_effect = [ + ('/dev/sda1', '/dev/sdb1'), + ('/dev/sda2', '/dev/sdb2'), + ] + result = self.hardware.create_configuration(node, []) mocked_os_path_isdir.assert_has_calls([ mock.call('/sys/firmware/efi') @@ -3037,12 +3044,14 @@ class TestGenericHardwareManager(base.IronicAgentTest): mock.call(x) for x in ['/dev/sda', '/dev/sdb'] ]) + @mock.patch.object(hardware, '_get_actual_component_devices', + autospec=True) @mock.patch.object(utils, 'get_node_boot_mode', lambda node: 'bios') @mock.patch.object(disk_utils, 'list_partitions', autospec=True, return_value=[]) @mock.patch.object(utils, 'execute', autospec=True) def test_create_configuration_raid_5(self, mocked_execute, - mock_list_parts): + mock_list_parts, mocked_actual_comp): node = self.node raid_config = { "logical_disks": [ @@ -3082,6 +3091,11 @@ class TestGenericHardwareManager(base.IronicAgentTest): None, None # mdadms ] + mocked_actual_comp.side_effect = [ + ('/dev/sda1', '/dev/sdb1', '/dev/sdc1'), + ('/dev/sda2', '/dev/sdb2', '/dev/sdc2'), + ] + result = self.hardware.create_configuration(node, []) mocked_execute.assert_has_calls([ @@ -3120,12 +3134,14 @@ class TestGenericHardwareManager(base.IronicAgentTest): '/dev/sda2', '/dev/sdb2', '/dev/sdc2')]) self.assertEqual(raid_config, result) + @mock.patch.object(hardware, '_get_actual_component_devices', + autospec=True) @mock.patch.object(utils, 'get_node_boot_mode', lambda node: 'bios') @mock.patch.object(disk_utils, 'list_partitions', autospec=True, return_value=[]) @mock.patch.object(utils, 'execute', autospec=True) def test_create_configuration_raid_6(self, mocked_execute, - mock_list_parts): + mock_list_parts, mocked_actual_comp): node = self.node raid_config = { "logical_disks": [ @@ -3170,6 +3186,11 @@ class TestGenericHardwareManager(base.IronicAgentTest): None, None # mdadms ] + mocked_actual_comp.side_effect = [ + ('/dev/sda1', '/dev/sdb1', '/dev/sdc1', '/dev/sdd1'), + ('/dev/sda2', '/dev/sdb2', '/dev/sdc2', '/dev/sdd2'), + ] + result = self.hardware.create_configuration(node, []) mocked_execute.assert_has_calls([ @@ -3217,12 +3238,15 @@ class TestGenericHardwareManager(base.IronicAgentTest): '/dev/sda2', '/dev/sdb2', '/dev/sdc2', '/dev/sdd2')]) self.assertEqual(raid_config, result) + @mock.patch.object(hardware, '_get_actual_component_devices', + autospec=True) @mock.patch.object(disk_utils, 'list_partitions', autospec=True, return_value=[]) @mock.patch.object(utils, 'execute', autospec=True) @mock.patch.object(os.path, 'isdir', autospec=True, return_value=True) def test_create_configuration_efi(self, mocked_os_path_isdir, - mocked_execute, mock_list_parts): + mocked_execute, mock_list_parts, + mocked_actual_comp): node = self.node raid_config = { @@ -3255,6 +3279,11 @@ class TestGenericHardwareManager(base.IronicAgentTest): None, None # mdadms ] + mocked_actual_comp.side_effect = [ + ('/dev/sda1', '/dev/sdb1'), + ('/dev/sda2', '/dev/sdb2'), + ] + result = self.hardware.create_configuration(node, []) mocked_os_path_isdir.assert_has_calls([ mock.call('/sys/firmware/efi') @@ -3282,12 +3311,15 @@ class TestGenericHardwareManager(base.IronicAgentTest): '/dev/sda2', '/dev/sdb2')]) self.assertEqual(raid_config, result) + @mock.patch.object(hardware, '_get_actual_component_devices', + autospec=True) @mock.patch.object(disk_utils, 'list_partitions', autospec=True, return_value=[]) @mock.patch.object(utils, 'execute', autospec=True) @mock.patch.object(os.path, 'isdir', autospec=True, return_value=False) def test_create_configuration_force_gpt_with_disk_label( - self, mocked_os_path_isdir, mocked_execute, mock_list_part): + self, mocked_os_path_isdir, mocked_execute, mock_list_part, + mocked_actual_comp): node = self.node raid_config = { @@ -3326,6 +3358,11 @@ class TestGenericHardwareManager(base.IronicAgentTest): None, None # mdadms ] + mocked_actual_comp.side_effect = [ + ('/dev/sda1', '/dev/sdb1'), + ('/dev/sda2', '/dev/sdb2'), + ] + result = self.hardware.create_configuration(node, []) mocked_os_path_isdir.assert_has_calls([ mock.call('/sys/firmware/efi') @@ -3353,12 +3390,14 @@ class TestGenericHardwareManager(base.IronicAgentTest): '/dev/sda2', '/dev/sdb2')]) self.assertEqual(raid_config, result) + @mock.patch.object(hardware, '_get_actual_component_devices', + autospec=True) @mock.patch.object(disk_utils, 'list_partitions', autospec=True, return_value=[]) @mock.patch.object(utils, 'execute', autospec=True) @mock.patch.object(os.path, 'isdir', autospec=True, return_value=False) def test_create_configuration_no_max(self, _mocked_isdir, mocked_execute, - mock_list_parts): + mock_list_parts, mocked_actual_comp): node = self.node raid_config = { "logical_disks": [ @@ -3381,6 +3420,11 @@ class TestGenericHardwareManager(base.IronicAgentTest): self.hardware.list_block_devices = mock.Mock() self.hardware.list_block_devices.return_value = [device1, device2] + mocked_actual_comp.side_effect = [ + ('/dev/sda1', '/dev/sdb1'), + ('/dev/sda2', '/dev/sdb2'), + ] + mocked_execute.side_effect = [ None, # mklabel sda ('42', None), # sgdisk -F sda @@ -3390,7 +3434,7 @@ class TestGenericHardwareManager(base.IronicAgentTest): None, None, # parted + partx sdb None, None, # parted + partx sda None, None, # parted + partx sdb - None, None # mdadms + None, None, # mdadms ] result = self.hardware.create_configuration(node, []) @@ -3420,13 +3464,16 @@ class TestGenericHardwareManager(base.IronicAgentTest): '/dev/sda2', '/dev/sdb2')]) self.assertEqual(raid_config, result) + @mock.patch.object(hardware, '_get_actual_component_devices', + autospec=True) @mock.patch.object(disk_utils, 'list_partitions', autospec=True, return_value=[]) @mock.patch.object(utils, 'execute', autospec=True) @mock.patch.object(os.path, 'isdir', autospec=True, return_value=False) def test_create_configuration_max_is_first_logical(self, _mocked_isdir, mocked_execute, - mock_list_parts): + mock_list_parts, + mocked_actual_comp): node = self.node raid_config = { "logical_disks": [ @@ -3461,6 +3508,11 @@ class TestGenericHardwareManager(base.IronicAgentTest): None, None # mdadms ] + mocked_actual_comp.side_effect = [ + ('/dev/sda1', '/dev/sdb1'), + ('/dev/sda2', '/dev/sdb2'), + ] + result = self.hardware.create_configuration(node, []) mocked_execute.assert_has_calls([ @@ -3488,12 +3540,15 @@ class TestGenericHardwareManager(base.IronicAgentTest): '/dev/sda2', '/dev/sdb2')]) self.assertEqual(raid_config, result) + @mock.patch.object(hardware, '_get_actual_component_devices', + autospec=True) @mock.patch.object(utils, 'get_node_boot_mode', lambda node: 'bios') @mock.patch.object(disk_utils, 'list_partitions', autospec=True, return_value=[]) @mock.patch.object(utils, 'execute', autospec=True) def test_create_configuration_with_hints(self, mocked_execute, - mock_list_parts): + mock_list_parts, + mocked_actual_comp): node = self.node raid_config = { "logical_disks": [ @@ -3538,6 +3593,11 @@ class TestGenericHardwareManager(base.IronicAgentTest): None, None # mdadms ] + mocked_actual_comp.side_effect = [ + ('/dev/sda1', '/dev/sdb1'), + ('/dev/sda2', '/dev/sdb2'), + ] + result = self.hardware.create_configuration(node, []) mocked_execute.assert_has_calls([ @@ -3818,9 +3878,11 @@ class TestGenericHardwareManager(base.IronicAgentTest): self.hardware.list_block_devices.side_effect = [ [device1, device2, device3], [device1, device2, device3]] + # pre-creation validation fails as insufficent number of devices found error_regex = ("Software RAID configuration is not possible for " "RAID level 6 with only 3 block devices found.") + # Execute is actually called for listing_block_devices self.assertFalse(mocked_execute.called) self.assertRaisesRegex(errors.SoftwareRAIDError, error_regex, @@ -3832,12 +3894,15 @@ class TestGenericHardwareManager(base.IronicAgentTest): result = self.hardware.create_configuration(self.node, []) self.assertEqual(result, {}) + @mock.patch.object(hardware, '_get_actual_component_devices', + autospec=True) @mock.patch.object(disk_utils, 'list_partitions', autospec=True, return_value=[]) @mock.patch.object(utils, 'execute', autospec=True) @mock.patch.object(os.path, 'isdir', autospec=True, return_value=True) def test_create_configuration_with_nvme(self, mocked_os_path_isdir, - mocked_execute, mock_list_parts): + mocked_execute, mock_list_parts, + mocked_actual_comp): raid_config = { "logical_disks": [ { @@ -3870,6 +3935,11 @@ class TestGenericHardwareManager(base.IronicAgentTest): None, None # mdadms ] + mocked_actual_comp.side_effect = [ + ('/dev/nvme0n1p1', '/dev/nvme1n1p1'), + ('/dev/nvme0n1p2', '/dev/nvme1n1p2'), + ] + result = self.hardware.create_configuration(self.node, []) mocked_execute.assert_has_calls([ @@ -3965,6 +4035,20 @@ class TestGenericHardwareManager(base.IronicAgentTest): self.hardware.create_configuration, self.node, []) + @mock.patch.object(utils, 'execute', autospec=True) + def test__get_actual_component_devices(self, mocked_execute): + mocked_execute.side_effect = [(MDADM_DETAIL_OUTPUT, '')] + component_devices = hardware._get_actual_component_devices( + '/dev/md0') + self.assertEqual(['/dev/vde1', '/dev/vdf1'], component_devices) + + @mock.patch.object(utils, 'execute', autospec=True) + def test__get_actual_component_devices_broken_raid0(self, mocked_execute): + mocked_execute.side_effect = [(MDADM_DETAIL_OUTPUT_BROKEN_RAID0, '')] + component_devices = hardware._get_actual_component_devices( + '/dev/md126') + self.assertEqual(['/dev/sda2'], component_devices) + @mock.patch.object(utils, 'execute', autospec=True) def test__get_md_uuid(self, mocked_execute): mocked_execute.side_effect = [(MDADM_DETAIL_OUTPUT, '')] diff --git a/releasenotes/notes/readd_missing_devs-2ed85805388b6e42.yaml b/releasenotes/notes/readd_missing_devs-2ed85805388b6e42.yaml new file mode 100644 index 000000000..11ff54ab5 --- /dev/null +++ b/releasenotes/notes/readd_missing_devs-2ed85805388b6e42.yaml @@ -0,0 +1,8 @@ +--- +fixes: + - | + Upon the creation of Software RAID devices, component devices are + sometimes kicked out immediately (for no apparent reason). This + fix re-adds devices in such cases in order to prevent the component + to be missing next time the device is assembled, which, for instance + may prevent the UEFI ESPs to be installed properly.