From 9ac4df32e2e2d3111ca0d4e57ba8b22bcc1974c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Dulko?= Date: Tue, 7 Nov 2017 12:03:27 +0100 Subject: [PATCH] Fix kubelet retries issues When an error happens during kubelet's ADD request kubelet will trigger a retry. If aforementioned failure happened after the host-side veth interface was moved into the host netns, all further kubelet retries of the request will fail with conflict, as it will be impossible to move another iface of that name into the host netns. This commit aims to fix the issue by checking for existence of conflicting interface in the host netns and removing it if needed. Change-Id: I1596585c8b076d09a7f8c854bb524c2374d804e8 Closes-Bug: 1730644 --- kuryr_kubernetes/cni/binding/bridge.py | 15 ++++++++++ .../tests/unit/cni/test_binding.py | 30 ++++++++++++++----- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/kuryr_kubernetes/cni/binding/bridge.py b/kuryr_kubernetes/cni/binding/bridge.py index 17f20e85f..f2d33eb2a 100644 --- a/kuryr_kubernetes/cni/binding/bridge.py +++ b/kuryr_kubernetes/cni/binding/bridge.py @@ -15,14 +15,29 @@ import os +from oslo_log import log + from kuryr_kubernetes.cni.binding import base as b_base from kuryr_kubernetes import linux_net_utils as net_utils +LOG = log.getLogger(__name__) + class BaseBridgeDriver(object): def connect(self, vif, ifname, netns): host_ifname = vif.vif_name + with b_base.get_ipdb() as h_ipdb: + if host_ifname in h_ipdb.interfaces: + # NOTE(dulek): This most likely means that we already run + # connect for this iface and there's a leftover + # host-side vif. Let's remove it, its peer should + # get deleted automatically by the kernel. + LOG.debug('Found leftover host vif %s. Removing it before' + 'connecting.', host_ifname) + with h_ipdb.interfaces[host_ifname] as h_iface: + h_iface.remove() + with b_base.get_ipdb(netns) as c_ipdb: with c_ipdb.create(ifname=ifname, peer=host_ifname, kind='veth') as c_iface: diff --git a/kuryr_kubernetes/tests/unit/cni/test_binding.py b/kuryr_kubernetes/tests/unit/cni/test_binding.py index 43125fa3e..e58a3feae 100644 --- a/kuryr_kubernetes/tests/unit/cni/test_binding.py +++ b/kuryr_kubernetes/tests/unit/cni/test_binding.py @@ -16,12 +16,15 @@ import mock import uuid from os_vif import objects as osv_objects +from oslo_config import cfg from kuryr_kubernetes.cni.binding import base from kuryr_kubernetes import objects from kuryr_kubernetes.tests import base as test_base from kuryr_kubernetes.tests import fake +CONF = cfg.CONF + class TestDriverMixin(test_base.TestCase): def setUp(self): @@ -33,36 +36,40 @@ class TestDriverMixin(test_base.TestCase): # Mock IPDB context managers self.ipdbs = {} - self.m_bridge_iface = mock.Mock(__exit__=mock.Mock()) + self.m_bridge_iface = mock.Mock(__exit__=mock.Mock(return_value=None)) self.m_c_iface = mock.Mock() self.m_h_iface = mock.Mock() self.h_ipdb, self.h_ipdb_exit = self._mock_ipdb_context_manager(None) self.c_ipdb, self.c_ipdb_exit = self._mock_ipdb_context_manager( self.netns) self.m_create = mock.Mock() + self.h_ipdb.create = mock.Mock( + return_value=mock.Mock( + __enter__=mock.Mock(return_value=self.m_create), + __exit__=mock.Mock(return_value=None))) self.c_ipdb.create = mock.Mock( return_value=mock.Mock( __enter__=mock.Mock(return_value=self.m_create), - __exit__=mock.Mock())) + __exit__=mock.Mock(return_value=None))) def _mock_ipdb_context_manager(self, netns): mock_ipdb = mock.Mock( interfaces={ 'bridge': mock.Mock( __enter__=mock.Mock(return_value=self.m_bridge_iface), - __exit__=mock.Mock(), + __exit__=mock.Mock(return_value=None), ), 'c_interface': mock.Mock( __enter__=mock.Mock(return_value=self.m_c_iface), - __exit__=mock.Mock(), + __exit__=mock.Mock(return_value=None), ), 'h_interface': mock.Mock( __enter__=mock.Mock(return_value=self.m_h_iface), - __exit__=mock.Mock(), + __exit__=mock.Mock(return_value=None), ), } ) - mock_exit = mock.Mock() + mock_exit = mock.Mock(return_value=None) self.ipdbs[netns] = mock.Mock( __enter__=mock.Mock(return_value=mock_ipdb), __exit__=mock_exit) @@ -95,7 +102,7 @@ class TestOpenVSwitchDriver(TestDriverMixin, test_base.TestCase): @mock.patch('kuryr_kubernetes.linux_net_utils.create_ovs_vif_port') def test_connect(self, mock_create_ovs): self._test_connect() - self.assertEqual(1, self.h_ipdb_exit.call_count) + self.assertEqual(2, self.h_ipdb_exit.call_count) self.assertEqual(2, self.c_ipdb_exit.call_count) self.c_ipdb.create.assert_called_once_with( ifname=self.ifname, peer='h_interface', kind='veth') @@ -126,7 +133,9 @@ class TestBridgeDriver(TestDriverMixin, test_base.TestCase): def test_connect(self): self._test_connect() - self.assertEqual(2, self.h_ipdb_exit.call_count) + self.m_h_iface.remove.assert_called_once_with() + + self.assertEqual(3, self.h_ipdb_exit.call_count) self.assertEqual(2, self.c_ipdb_exit.call_count) self.c_ipdb.create.assert_called_once_with( ifname=self.ifname, peer='h_interface', kind='veth') @@ -148,6 +157,9 @@ class TestNestedVlanDriver(TestDriverMixin, test_base.TestCase): def setUp(self): super(TestNestedVlanDriver, self).setUp() self.vif = fake._fake_vif(objects.vif.VIFVlanNested) + self.vif.vlan_id = 7 + CONF.set_override('link_iface', 'bridge', group='binding') + self.addCleanup(CONF.clear_override, 'link_iface', group='binding') def test_connect(self): self._test_connect() @@ -168,6 +180,8 @@ class TestNestedMacvlanDriver(TestDriverMixin, test_base.TestCase): def setUp(self): super(TestNestedMacvlanDriver, self).setUp() self.vif = fake._fake_vif(objects.vif.VIFMacvlanNested) + CONF.set_override('link_iface', 'bridge', group='binding') + self.addCleanup(CONF.clear_override, 'link_iface', group='binding') def test_connect(self): self._test_connect()