Don't try to lock for vif detach

Historically, we did not have a prohibition upon removing
a VIF entry stored in the extra field, however the VIF
attachment/detachment feature resulted in a task being
created which by default attempts to pull a reservation
lock unless explicitly shared.

This is problematic as part of the process of undeploying
a node as exclusive locks are generated.

Presently, if any of those locked tasks run long, such as
a new image being required or for some crazy reason,
the BMC power request hangs for a few minutes, the VIF
record may be orphaned and never removed, as the
expectation is that nova deletes the VIF record from ironic.

This allows the VIF record to be removed when a node is
no longer in active use and possibly subject to a lock being
held for a long period of time, such as when setting up
for CLEANING.

Additionally, this patch moves the actual VIF record
deletion until after the detachment action in the
event that it fails. This allows for the state in
ironic to be consistent instead of the record
being removed before the detachment occurs.

Change-Id: Ib7544e43a2b26441d4f562b584bbc7fee6a11fea
Closes-Bug: #1743652
This commit is contained in:
Julia Kreger 2018-01-16 09:21:37 -08:00
parent 12d3157a96
commit 4f79cb3932
6 changed files with 45 additions and 11 deletions

View File

@ -2943,7 +2943,12 @@ class ConductorManager(base_manager.BaseConductorManager):
"""
LOG.debug("RPC vif_detach called for the node %(node_id)s with "
"vif_id %(vif_id)s", {'node_id': node_id, 'vif_id': vif_id})
# NOTE(TheJulia): This task is explicitly called without a lock as
# long lived locks occur during node tear-down as a node goes into
# cleaning. The lack of a lock allows nova to remove the VIF record.
# See: https://bugs.launchpad.net/ironic/+bug/1743652
with task_manager.acquire(context, node_id,
shared=True,
purpose='detach vif') as task:
task.driver.network.validate(task)
task.driver.network.vif_detach(task, vif_id)

View File

@ -1031,6 +1031,10 @@ class NetworkInterface(BaseInterface):
def vif_detach(self, task, vif_id):
"""Detach a virtual network interface from a node
Warning: This method is called by the conductor as a shared
task, to ensure that a vif can be detached during a long-lived lock.
Such as those that occur when a node is being unprovisioned.
:param task: A TaskManager instance.
:param vif_id: A VIF ID to detach
:raises: NetworkError, VifNotAttached

View File

@ -581,8 +581,8 @@ class NeutronVIFPortIDMixin(VIFPortIDMixin):
# attached, and fail if not.
port_like_obj = self._get_port_like_obj_by_vif_id(task, vif_id)
self._clear_vif_from_port_like_obj(port_like_obj)
# NOTE(vsaienko) allow to unplug VIFs from ACTIVE instance.
if task.node.provision_state == states.ACTIVE:
neutron.unbind_neutron_port(vif_id, context=task.context)
self._clear_vif_from_port_like_obj(port_like_obj)

View File

@ -35,6 +35,7 @@ from ironic.common import boot_devices
from ironic.common import driver_factory
from ironic.common import exception
from ironic.common import images
from ironic.common import neutron
from ironic.common import states
from ironic.common import swift
from ironic.conductor import manager
@ -4344,17 +4345,17 @@ class VifTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
mock_detach.assert_called_once_with(mock.ANY, "interface")
mock_valid.assert_called_once_with(mock.ANY, mock.ANY)
@mock.patch.object(n_flat.FlatNetwork, 'vif_detach', autpspec=True)
def test_vif_detach_node_locked(self, mock_detach, mock_valid):
@mock.patch.object(neutron, 'unbind_neutron_port', autpspec=True)
def test_vif_detach_node_is_locked(self, mock_detach, mock_valid):
node = obj_utils.create_test_node(self.context, driver='fake',
reservation='fake-reserv')
exc = self.assertRaises(messaging.rpc.ExpectedException,
self.service.vif_detach,
self.context, node.uuid, "interface")
# Compare true exception hidden by @messaging.expected_exceptions
self.assertEqual(exception.NodeLocked, exc.exc_info[0])
obj_utils.create_test_port(self.context,
node_id=node.id,
internal_info={
'tenant_vif_port_id': 'fake-id'})
self.service.vif_detach(self.context, node.uuid, 'fake-id')
self.assertFalse(mock_detach.called)
self.assertFalse(mock_valid.called)
self.assertTrue(mock_valid.called)
@mock.patch.object(n_flat.FlatNetwork, 'vif_detach', autpspec=True)
def test_vif_detach_raises_network_error(self, mock_detach,

View File

@ -941,12 +941,28 @@ class TestNeutronVifPortIDMixin(db_base.DbTestCase):
self.node.save()
mock_get.return_value = self.port
mock_unp.side_effect = exception.NetworkError
with task_manager.acquire(self.context, self.node.id) as task:
with task_manager.acquire(self.context, self.node.id,
shared=True) as task:
self.assertRaises(exception.NetworkError,
self.interface.vif_detach, task, 'fake_vif_id')
mock_unp.assert_called_once_with('fake_vif_id',
context=task.context)
mock_get.assert_called_once_with(task, 'fake_vif_id')
mock_clear.assert_not_called()
@mock.patch.object(common.VIFPortIDMixin, '_clear_vif_from_port_like_obj')
@mock.patch.object(neutron_common, 'unbind_neutron_port', autospec=True)
@mock.patch.object(common.VIFPortIDMixin, '_get_port_like_obj_by_vif_id')
def test_vif_detach_deleting_node_success(self, mock_get, mock_unp,
mock_clear):
self.node.provision_state = states.DELETING
self.node.save()
mock_get.return_value = self.port
with task_manager.acquire(self.context, self.node.id,
shared=True) as task:
self.interface.vif_detach(task, 'fake_vif_id')
self.assertFalse(mock_unp.called)
mock_get.assert_called_once_with(task, 'fake_vif_id')
mock_clear.assert_called_once_with(self.port)
@mock.patch.object(common_utils, 'warn_about_deprecated_extra_vif_port_id',

View File

@ -0,0 +1,8 @@
---
fixes:
- |
Addresses a condition where the Compute Service may have been unable to
remove VIF attachment records while a baremetal node is being unprovisiond. This
condition resulted in VIF records being orphaned, blocking future
deployments without manual intervention.
See `bug 1743652 <https://bugs.launchpad.net/ironic/+bug/1743652>`_ for more details.