From bf673c276155222e35ddd2f5949d564f229255f8 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Mon, 22 Jan 2024 18:20:14 +0100 Subject: [PATCH] Account for nodes with the same BMC hostname in inspection lookup Currently, the code expects a given hostname to be used by one one node. This is not necessarily the case for Redfish where several Systems can co-exist under the same BMC. Use MAC addresses to distinguish them. Add more inline comments to explain the process. Change-Id: Ifc5a18bffc7cbcdd8bbbd660aba61fa11403e7e8 --- ironic/drivers/modules/inspect_utils.py | 49 ++++++++++++++----- .../drivers/modules/test_inspect_utils.py | 30 ++++++++++++ .../lookup-many-bmcs-b019f3599c8e8da7.yaml | 6 +++ 3 files changed, 73 insertions(+), 12 deletions(-) create mode 100644 releasenotes/notes/lookup-many-bmcs-b019f3599c8e8da7.yaml diff --git a/ironic/drivers/modules/inspect_utils.py b/ironic/drivers/modules/inspect_utils.py index adea73c1c0..7cb7b4095c 100644 --- a/ironic/drivers/modules/inspect_utils.py +++ b/ironic/drivers/modules/inspect_utils.py @@ -269,7 +269,17 @@ def lookup_node(context, mac_addresses, bmc_addresses, node_uuid=None): 'state': node.provision_state}) raise exception.NotFound() + # NOTE(dtantsur): in theory, if the node is found at this point, we could + # short-circuit the lookup process and return it without considering BMC + # addresses. However, I've seen cases where users ended up enrolling nodes + # with BMC addresses from different nodes. Continuing to process BMC + # addresses allows us to catch these situations that otherwise can lead + # to updating wrong nodes. + if bmc_addresses: + # NOTE(dtantsur): the same BMC hostname can be used by several nodes, + # e.g. in case of Redfish. Find all suitable nodes first. + nodes_by_bmc = set() for candidate in objects.Node.list( context, filters={'provision_state': states.INSPECTWAIT}, @@ -278,26 +288,41 @@ def lookup_node(context, mac_addresses, bmc_addresses, node_uuid=None): for addr in candidate.driver_internal_info.get( LOOKUP_CACHE_FIELD) or (): if addr in bmc_addresses: - break - else: - continue + nodes_by_bmc.add(candidate.uuid) - if node and candidate.uuid != node.uuid: - LOG.error('Conflict on inspection lookup: nodes %(node1)s ' - 'and %(node2)s both satisfy MAC addresses ' - '(%(macs)s) and BMC address(s) (%(bmc)s). The cause ' - 'may be ports attached to a wrong node.', - {'node1': node.uuid, - 'node2': candidate.uuid, + # NOTE(dtantsur): if none of the nodes found by the BMC match the one + # found by the MACs, something is definitely wrong. + if node and nodes_by_bmc and node.uuid not in nodes_by_bmc: + LOG.error('Conflict on inspection lookup: nodes %(node1)s ' + 'and %(node2)s both satisfy MAC addresses ' + '(%(macs)s) and BMC address(s) (%(bmc)s). The cause ' + 'may be ports attached to a wrong node.', + {'node1': ', '.join(nodes_by_bmc), + 'node2': node.uuid, + 'macs': ', '.join(mac_addresses), + 'bmc': ', '.join(bmc_addresses)}) + raise exception.NotFound() + + # NOTE(dtantsur): at this point, if the node was found by the MAC + # addresses, it also matches the BMC address. We only need to handle + # the case when the node was not found by the MAC addresses. + if not node and nodes_by_bmc: + if len(nodes_by_bmc) > 1: + LOG.error('Several nodes %(nodes)s satisfy BMC address(s) ' + '(%(bmc)s), but none of them satisfy MAC addresses ' + '(%(macs)s). Ports must be created for a successful ' + 'inspection in this case.', + {'nodes': ', '.join(nodes_by_bmc), 'macs': ', '.join(mac_addresses), 'bmc': ', '.join(bmc_addresses)}) raise exception.NotFound() + node_uuid = nodes_by_bmc.pop() try: # Fetch the complete object now. - node = objects.Node.get_by_uuid(context, candidate.uuid) + node = objects.Node.get_by_uuid(context, node_uuid) except exception.NotFound: - pass # Deleted in-between? + raise # Deleted in-between? if not node: LOG.error('No nodes satisfy MAC addresses (%(macs)s) and BMC ' diff --git a/ironic/tests/unit/drivers/modules/test_inspect_utils.py b/ironic/tests/unit/drivers/modules/test_inspect_utils.py index 52cfd9bad8..bc9738eae7 100644 --- a/ironic/tests/unit/drivers/modules/test_inspect_utils.py +++ b/ironic/tests/unit/drivers/modules/test_inspect_utils.py @@ -381,6 +381,36 @@ class LookupNodeTestCase(db_base.DbTestCase): self.assertRaises(exception.NotFound, utils.lookup_node, self.context, self.macs, [self.bmc2], None) + def test_duplicate_bmc(self): + # This can happen with Redfish. There is no way to resolve the conflict + # other than by using MACs. + obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver_internal_info={utils.LOOKUP_CACHE_FIELD: [self.bmc]}, + provision_state=states.INSPECTWAIT) + self.assertRaises(exception.NotFound, utils.lookup_node, + self.context, [], [self.bmc], None) + + def test_duplicate_bmc_and_unknown_mac(self): + obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver_internal_info={utils.LOOKUP_CACHE_FIELD: [self.bmc]}, + provision_state=states.INSPECTWAIT) + self.assertRaises(exception.NotFound, utils.lookup_node, + self.context, [self.unknown_mac], [self.bmc], None) + + def test_duplicate_bmc_resolved_by_macs(self): + obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver_internal_info={utils.LOOKUP_CACHE_FIELD: [self.bmc]}, + provision_state=states.INSPECTWAIT) + result = utils.lookup_node( + self.context, [self.macs[0]], [self.bmc], None) + self.assertEqual(self.node.uuid, result.uuid) + def test_by_uuid(self): result = utils.lookup_node(self.context, [], [], self.node.uuid) self.assertEqual(self.node.uuid, result.uuid) diff --git a/releasenotes/notes/lookup-many-bmcs-b019f3599c8e8da7.yaml b/releasenotes/notes/lookup-many-bmcs-b019f3599c8e8da7.yaml new file mode 100644 index 0000000000..34809af6c2 --- /dev/null +++ b/releasenotes/notes/lookup-many-bmcs-b019f3599c8e8da7.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fixes the inspection lookup to consider all nodes with the same BMC + hostname, as can happen with Redfish. In this case, the nodes are + distinguished by MAC addresses.