From 72399374b222bd4a9d192081bfab2344af27fdd3 Mon Sep 17 00:00:00 2001 From: Swaminathan Vasudevan Date: Thu, 28 Jun 2018 11:56:39 -0700 Subject: [PATCH] neutron-lbaas haproxy agent prevent vif unplug when failover occurs When lbaas fails over after an agent is unresponsive, the dead agent on coming up should not unplug the vif port, if the lbaas is active on other agent and when failover is configured. This patch fixes the problem. Story: #2003672 Change-Id: I76c38b20eb72c1dba0a0a2a140bbe77053aa3ed0 --- neutron_lbaas/agent/agent_manager.py | 14 ++++++++------ .../drivers/haproxy/namespace_driver.py | 14 ++++++++++---- .../tests/unit/agent/test_agent_manager.py | 18 ++++++++++-------- .../drivers/haproxy/test_namespace_driver.py | 2 +- 4 files changed, 29 insertions(+), 19 deletions(-) diff --git a/neutron_lbaas/agent/agent_manager.py b/neutron_lbaas/agent/agent_manager.py index b9ffbe39f..108353694 100644 --- a/neutron_lbaas/agent/agent_manager.py +++ b/neutron_lbaas/agent/agent_manager.py @@ -159,7 +159,7 @@ class LbaasAgentManager(periodic_task.PeriodicTasks): ready_instances = set(self.plugin_rpc.get_ready_devices()) for deleted_id in known_instances - ready_instances: - self._destroy_loadbalancer(deleted_id) + self._destroy_loadbalancer(deleted_id, resync=True) for loadbalancer_id in ready_instances: self._reload_loadbalancer(loadbalancer_id) @@ -168,7 +168,7 @@ class LbaasAgentManager(periodic_task.PeriodicTasks): LOG.exception('Unable to retrieve ready devices') self.needs_resync = True - self.remove_orphans() + self.remove_orphans(resync=True) def _get_driver(self, loadbalancer_id): if loadbalancer_id not in self.instance_mapping: @@ -198,10 +198,11 @@ class LbaasAgentManager(periodic_task.PeriodicTasks): loadbalancer_id) self.needs_resync = True - def _destroy_loadbalancer(self, lb_id): + def _destroy_loadbalancer(self, lb_id, resync=False): driver = self._get_driver(lb_id) try: - driver.undeploy_instance(lb_id, delete_namespace=True) + driver.undeploy_instance(lb_id, delete_namespace=True, + resync=resync) del self.instance_mapping[lb_id] self.plugin_rpc.loadbalancer_destroyed(lb_id) except Exception: @@ -209,12 +210,13 @@ class LbaasAgentManager(periodic_task.PeriodicTasks): lb_id) self.needs_resync = True - def remove_orphans(self): + def remove_orphans(self, resync=False): for driver_name in self.device_drivers: lb_ids = [lb_id for lb_id in self.instance_mapping if self.instance_mapping[lb_id] == driver_name] try: - self.device_drivers[driver_name].remove_orphans(lb_ids) + self.device_drivers[driver_name].remove_orphans(lb_ids, + resync=resync) except NotImplementedError: pass # Not all drivers will support this diff --git a/neutron_lbaas/drivers/haproxy/namespace_driver.py b/neutron_lbaas/drivers/haproxy/namespace_driver.py index 5fa71200f..87826b704 100644 --- a/neutron_lbaas/drivers/haproxy/namespace_driver.py +++ b/neutron_lbaas/drivers/haproxy/namespace_driver.py @@ -156,8 +156,13 @@ class HaproxyNSDriver(agent_device_driver.AgentDeviceDriver): pids_path=pid_path, pid_file=pid_data) pm.disable() - # unplug the ports - if loadbalancer_id in self.deployed_loadbalancers: + # Before unplugging the port check if the LBaas + # is being active and see if it is a resync + # or failover is configured + resync = kwargs.get('resync', False) + failover_state = cfg.CONF.allow_automatic_lbaas_agent_failover + if (loadbalancer_id in self.deployed_loadbalancers and + not (resync and failover_state)): self._unplug(namespace, self.deployed_loadbalancers[loadbalancer_id].vip_port) @@ -181,7 +186,7 @@ class HaproxyNSDriver(agent_device_driver.AgentDeviceDriver): if loadbalancer_id in self.deployed_loadbalancers: del self.deployed_loadbalancers[loadbalancer_id] - def remove_orphans(self, known_loadbalancer_ids): + def remove_orphans(self, known_loadbalancer_ids, resync=False): if not os.path.exists(self.state_path): return @@ -189,7 +194,8 @@ class HaproxyNSDriver(agent_device_driver.AgentDeviceDriver): if lb_id not in known_loadbalancer_ids) for lb_id in orphans: if self.exists(lb_id): - self.undeploy_instance(lb_id, cleanup_namespace=True) + self.undeploy_instance(lb_id, cleanup_namespace=True, + resync=resync) def get_stats(self, loadbalancer_id): socket_path = self._get_state_file_path(loadbalancer_id, diff --git a/neutron_lbaas/tests/unit/agent/test_agent_manager.py b/neutron_lbaas/tests/unit/agent/test_agent_manager.py index 496f42ba4..0d2f89cbf 100644 --- a/neutron_lbaas/tests/unit/agent/test_agent_manager.py +++ b/neutron_lbaas/tests/unit/agent/test_agent_manager.py @@ -98,8 +98,9 @@ class TestManager(base.BaseTestCase): reload.assert_has_calls([mock.call(i) for i in reloaded], any_order=True) - destroy.assert_has_calls([mock.call(i) for i in destroyed], - any_order=True) + destroy.assert_has_calls( + [mock.call(i, resync=True) for i in destroyed], + any_order=True) self.assertFalse(self.mgr.needs_resync) def test_sync_state_all_known(self): @@ -180,7 +181,7 @@ class TestManager(base.BaseTestCase): self.mgr._destroy_loadbalancer(lb_id) self.driver_mock.undeploy_instance.assert_called_once_with( - lb_id, delete_namespace=True) + lb_id, delete_namespace=True, resync=False) self.assertNotIn(lb_id, self.mgr.instance_mapping) self.rpc_mock.loadbalancer_destroyed.assert_called_once_with(lb_id) self.assertFalse(self.mgr.needs_resync) @@ -193,7 +194,7 @@ class TestManager(base.BaseTestCase): self.mgr._destroy_loadbalancer(lb_id) self.driver_mock.undeploy_instance.assert_called_once_with( - lb_id, delete_namespace=True) + lb_id, delete_namespace=True, resync=False) self.assertIn(lb_id, self.mgr.instance_mapping) self.assertFalse(self.rpc_mock.loadbalancer_destroyed.called) self.assertTrue(self.log.exception.called) @@ -204,15 +205,16 @@ class TestManager(base.BaseTestCase): self.mgr._get_driver, 'unknown') def test_remove_orphans(self): - self.mgr.remove_orphans() - self.driver_mock.remove_orphans.assert_called_once_with(['1', '2']) + self.mgr.remove_orphans(resync=False) + self.driver_mock.remove_orphans.assert_called_once_with( + ['1', '2'], resync=False) def test_agent_disabled(self): payload = {'admin_state_up': False} self.mgr.agent_updated(mock.Mock(), payload) self.driver_mock.undeploy_instance.assert_has_calls( - [mock.call('1', delete_namespace=True), - mock.call('2', delete_namespace=True)], + [mock.call('1', delete_namespace=True, resync=False), + mock.call('2', delete_namespace=True, resync=False)], any_order=True ) diff --git a/neutron_lbaas/tests/unit/drivers/haproxy/test_namespace_driver.py b/neutron_lbaas/tests/unit/drivers/haproxy/test_namespace_driver.py index ba69178d7..67f94bebd 100644 --- a/neutron_lbaas/tests/unit/drivers/haproxy/test_namespace_driver.py +++ b/neutron_lbaas/tests/unit/drivers/haproxy/test_namespace_driver.py @@ -142,7 +142,7 @@ class TestHaproxyNSDriver(base.BaseTestCase): list_dir.assert_called_once_with(self.driver.state_path) self.driver.exists.assert_called_once_with('lb2') self.driver.undeploy_instance.assert_called_once_with( - 'lb2', cleanup_namespace=True) + 'lb2', cleanup_namespace=True, resync=False) def test_get_stats(self): # Shamelessly stolen from v1 namespace driver tests.