Always cleanup stale devices on DHCP setup failure

If the DHCP port setup process fails in the DHCP agent device
manager, it will throw a conflict exception, which will bubble
all of the way up to the main DHCP agent. The issue is that, during
a 'restart' call, the config files are wiped out while maintaining
the VIF before calling setup. This means that, if setup fails, there
is no reference to the interface name anymore so a subsequent destroy
will not first unplug the VIF before destroying the namespace.
This leaves a bunch of orphaned tap ports behind in the OVS case
that don't have an accessible namespace.

This patch addresses the issue by cleaning up all ports inside of
a namespace on a 'setup' failure before reraising the exception.
This ensures that the namespace is clear if destroy is called in the
future without another successful setup.

Closes-Bug: #1625325
Change-Id: I0211422de51ce6acc6eb593eb890b606101cb9f0
This commit is contained in:
Kevin Benton 2016-09-19 16:02:45 -07:00
parent 9b58b55c1d
commit 70907b3e55
2 changed files with 33 additions and 10 deletions

View File

@ -1291,14 +1291,22 @@ class DeviceManager(object):
network.ports.append(port)
def _cleanup_stale_devices(self, network, dhcp_port):
"""Unplug any devices found in the namespace except for dhcp_port."""
LOG.debug("Cleaning stale devices for network %s", network.id)
dev_name = self.driver.get_device_name(dhcp_port)
skip_dev_name = (self.driver.get_device_name(dhcp_port)
if dhcp_port else None)
ns_ip = ip_lib.IPWrapper(namespace=network.namespace)
if not ns_ip.netns.exists(network.namespace):
return
for d in ns_ip.get_devices(exclude_loopback=True):
# delete all devices except current active DHCP port device
if d.name != dev_name:
if d.name != skip_dev_name:
LOG.debug("Found stale device %s, deleting", d.name)
self.unplug(d.name, network)
try:
self.unplug(d.name, network)
except Exception:
LOG.exception(_LE("Exception during stale "
"dhcp device cleanup"))
def plug(self, network, port, interface_name):
"""Plug device settings for the network's DHCP on this host."""
@ -1311,7 +1319,13 @@ class DeviceManager(object):
def setup(self, network):
"""Create and initialize a device for network's DHCP on this host."""
port = self.setup_dhcp_port(network)
try:
port = self.setup_dhcp_port(network)
except Exception:
with excutils.save_and_reraise_exception():
# clear everything out so we don't leave dangling interfaces
# if setup never succeeds in the future.
self._cleanup_stale_devices(network, dhcp_port=None)
self._update_dhcp_port(network, port)
interface_name = self.get_interface_name(network, port)
@ -1355,12 +1369,7 @@ class DeviceManager(object):
namespace=network.namespace)
self._set_default_route(network, interface_name)
try:
self._cleanup_stale_devices(network, port)
except Exception:
# catch everything as we don't want to fail because of
# cleanup step
LOG.error(_LE("Exception during stale dhcp device cleanup"))
self._cleanup_stale_devices(network, port)
return interface_name

View File

@ -1441,6 +1441,20 @@ class TestDeviceManager(base.BaseTestCase):
expected = [mock.call.add_rule('POSTROUTING', rule)]
self.mangle_inst.assert_has_calls(expected)
def test_setup_dhcp_port_doesnt_orphan_devices(self):
with mock.patch.object(dhcp.ip_lib, 'IPDevice') as mock_IPDevice:
plugin = mock.Mock()
device = mock.Mock()
mock_IPDevice.return_value = device
device.route.get_gateway.return_value = None
net = copy.deepcopy(fake_network)
plugin.create_dhcp_port.side_effect = exceptions.Conflict()
dh = dhcp.DeviceManager(cfg.CONF, plugin)
clean = mock.patch.object(dh, '_cleanup_stale_devices').start()
with testtools.ExpectedException(exceptions.Conflict):
dh.setup(net)
clean.assert_called_once_with(net, dhcp_port=None)
def test_setup_create_dhcp_port(self):
with mock.patch.object(dhcp.ip_lib, 'IPDevice') as mock_IPDevice:
plugin = mock.Mock()