From ba0dc574bc47abae71e5a48b8868e8a8fdccea50 Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Tue, 14 Jul 2020 12:45:03 -0700 Subject: [PATCH] Follow-up on blocking port deletions A recent comment on https://review.opendev.org/#/c/665835 pointed out that we should likely make some changes and a fix a missing check for the introspection_vif_port_id which was likely introduced after this functionality was originally written. Also adds some documentation on the subject since we lack docs even pointing out how to delete a port. :\ Change-Id: I0ba8a3741eefa80eb56e25a1b339f8433b3fc0dc --- doc/source/admin/troubleshooting.rst | 77 +++++++++++++++++++++ doc/source/install/enrollment.rst | 7 ++ ironic/conductor/manager.py | 2 +- ironic/conductor/utils.py | 3 + ironic/tests/unit/conductor/test_manager.py | 6 +- ironic/tests/unit/conductor/test_utils.py | 6 ++ 6 files changed, 96 insertions(+), 5 deletions(-) diff --git a/doc/source/admin/troubleshooting.rst b/doc/source/admin/troubleshooting.rst index 1ac680e1f8..0c29343c89 100644 --- a/doc/source/admin/troubleshooting.rst +++ b/doc/source/admin/troubleshooting.rst @@ -559,3 +559,80 @@ waiting for an event that is never happening. In these cases, it might be helpful to connect to the IPA and inspect its logs, see the trouble shooting guide of the :ironic-python-agent-doc:`ironic-python-agent (IPA) <>` on how to do this. + +Deployments fail with "failed to update MAC address" +==================================================== + +The design of the integration with the Networking service (neutron) is such +that once virtual ports have been created in the API, their MAC address must +be updated in order for the DHCP server to be able to appropriately reply. + +This can sometimes result in errors being raised indicating that the MAC +address is already in use. This is because at some point in the past, a +virtual interface was orphaned either by accident or by some unexpected +glitch, and a previous entry is still present in Neutron. + +This error looks something like this when reported in the ironic-conductor +log output.: + + Failed to update MAC address on Neutron port 305beda7-0dd0-4fec-b4d2-78b7aa4e8e6a.: MacAddressInUseClient: Unable to complete operation for network 1e252627-6223-4076-a2b9-6f56493c9bac. The mac address 52:54:00:7c:c4:56 is in use. + +Because we have no idea about this entry, we fail the deployment process +as we can't make a number of assumptions in order to attempt to automatically +resolve the conflict. + +How did I get here? +------------------- + +Originally this was a fairly easy issue to encounter. The retry logic path +which resulted between the Orchestration (heat) and Compute (nova) services, +could sometimes result in additional un-necessary ports being created. + +Bugs of this class have been largely resolved since the Rocky development +cycle. Since then, the way this can become encountered is due to Networking +(neutron) VIF attachments not being removed or deleted prior to deleting a +port in the Bare Metal service. + +Ultimately, the key of this is that the port is being deleted. Under most +operating circumstances, there really is no need to delete the port, and +VIF attachments are stored on the port object, so deleting the port +*CAN* result in the VIF not being cleaned up from Neutron. + +Under normal circumstances, when deleting ports, a node should be in a +stable state, and the node should not be provisioned. If the +``openstack baremetal port delete`` command fails, this may indicate that +a known VIF is still attached. Generally if they are transitory from cleaning, +provisioning, rescuing, or even inspection, getting the node to the +``available`` state wil unblock your delete operation, that is unless there is +a tenant VIF attahment. In that case, the vif will need to be removed from +with-in the Bare Metal service using the +``openstack baremetal node vif detach`` command. + +A port can also be checked to see if there is a VIF attachment by consulting +the port's ``internal_info`` field. + +.. warning:: + The ``maintenance`` flag can be used to force the node's port to be + deleted, however this will disable any check that would normally block + the user from issuing a delete and accidently orphaning the VIF attachment + record. + +How do I resolve this? +---------------------- + +Generally, you need to identify the port with the offending MAC address. +Example: + + openstack port list --mac-address 52:54:00:7c:c4:56 + +From the command's output, you should be able to identify the ``id`` field. +Using that, you can delete the port. Example: + + openstack port delete + +.. warning:: + Before deleting a port, you should always verify that it is no longer in + use or no longer seems applicable/operable. If multiple deployments of + the Bare Metal service with a single Neutron, the possibility that a + inventory typo, or possibly even a duplicate MAC address exists, which + could also produce the same basic error message. diff --git a/doc/source/install/enrollment.rst b/doc/source/install/enrollment.rst index 4d6b0a4b23..1e0f9957e7 100644 --- a/doc/source/install/enrollment.rst +++ b/doc/source/install/enrollment.rst @@ -250,6 +250,13 @@ and may be combined if desired. $ openstack baremetal port create $MAC_ADDRESS --node $NODE_UUID + .. note:: + When it is time to remove the node from the Bare Metal service, the + command used to remove the port is ``openstack baremetal port delete + ``. When doing so, it is important to ensure that the + baremetal node is not in ``maintenance`` as guarding logic to prevent + orphaning Neutron Virtual Interfaces (VIFs) will be overriden. + .. _enrollment-scheduling: Adding scheduling information diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 54173e57b9..436b85b8b2 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -2058,7 +2058,7 @@ class ConductorManager(base_manager.BaseConductorManager): with task_manager.acquire(context, port.node_id, purpose='port deletion') as task: vif, vif_use = utils.get_attached_vif(port) - if vif: + if vif and not task.node.maintenance: msg = _("Cannot delete the port %(port)s as it is bound " "to VIF %(vif)s for %(use)s use.") raise exception.InvalidState( diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index fac2f7f3eb..8313b11d86 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -1189,4 +1189,7 @@ def get_attached_vif(port): rescue_vif = port.internal_info.get('rescuing_vif_port_id') if rescue_vif: return (rescue_vif, 'rescuing') + inspection_vif = port.internal_info.get('inspection_vif_port_id') + if inspection_vif: + return (inspection_vif, 'inspecting') return (None, None) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 91067d7705..f921562608 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -6465,10 +6465,8 @@ class DestroyPortTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): self.context, node_id=node.id, internal_info={'tenant_vif_port_id': 'fake-id'}) - exc = self.assertRaises(messaging.rpc.ExpectedException, - self.service.destroy_port, - self.context, port) - self.assertEqual(exception.InvalidState, exc.exc_info[0]) + self.service.destroy_port(self.context, port) + self.assertRaises(exception.PortNotFound, port.refresh) def test_destroy_port_node_active_and_maintenance_no_vif(self): instance_uuid = uuidutils.generate_uuid() diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index bb8027046c..abc216879f 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -2100,3 +2100,9 @@ class GetAttachedVifTestCase(db_base.DbTestCase): vif, use = conductor_utils.get_attached_vif(self.port) self.assertEqual('1', vif) self.assertEqual('rescuing', use) + + def test_get_attached_vif_inspecting(self): + self.port.internal_info = {'inspection_vif_port_id': '1'} + vif, use = conductor_utils.get_attached_vif(self.port) + self.assertEqual('1', vif) + self.assertEqual('inspecting', use)