DVR: Clean stale snat-ns by checking its existence when agent restarts

At present there is no clear way to distinguish when the snat_namespace
object is initialized and when the actual namespace is created.
There is no way to check if the namespace already existed. The
code was only checking at the snat_namespace object instead of its
existence.

This patch addresses the issue by adding in an exists method to the
namespace object to identify the existence of the namespace in the
given agent.

This would allow us to check for the existence of the namespace,
and also allow us to identify the stale snat namespace and
delete the namespace when the gateway is cleared as the agent restarts.

This also applies for conditions when the router is manually moved
from one agent to another agent while the agent is dead. When the
agent wakes up it would clean up the stale snat namespace.

Closes-Bug: #1557909

(cherry picked from commit acd04d668b)

Conflicts:
	neutron/agent/l3/agent.py
	neutron/agent/l3/dvr_edge_ha_router.py
	neutron/agent/l3/dvr_edge_router.py
	neutron/tests/functional/agent/l3/test_dvr_router.py

Change-Id: Icb00297208813436c2a9e9a003275462293ad643
This commit is contained in:
Swaminathan Vasudevan 2016-06-07 13:31:56 -07:00
parent 395f34d47d
commit c334eb215a
8 changed files with 117 additions and 18 deletions

View File

@ -573,8 +573,12 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
# need to keep fip namespaces as well
ext_net_id = (r['external_gateway_info'] or {}).get(
'network_id')
is_snat_agent = (self.conf.agent_mode ==
l3_constants.L3_AGENT_MODE_DVR_SNAT)
if ext_net_id:
ns_manager.keep_ext_net(ext_net_id)
elif is_snat_agent:
ns_manager.ensure_snat_cleanup(r['id'])
update = queue.RouterUpdate(r['id'],
queue.PRIORITY_SYNC_ROUTERS_TASK,
router=r,

View File

@ -28,7 +28,8 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter):
def __init__(self, agent, host, *args, **kwargs):
super(DvrEdgeRouter, self).__init__(agent, host, *args, **kwargs)
self.snat_namespace = None
self.snat_namespace = dvr_snat_ns.SnatNamespace(
self.router_id, self.agent_conf, self.driver, self.use_ipv6)
self.snat_iptables_manager = None
def external_gateway_added(self, ex_gw_port, interface_name):
@ -41,19 +42,27 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter):
# the same routes to the snat namespace after the gateway port
# is added, we need to call routes_updated here.
self.routes_updated([], self.router['routes'])
elif self.snat_namespace.exists():
# This is the case where the snat was moved manually or
# rescheduled to a different agent when the agent was dead.
LOG.debug("SNAT was moved or rescheduled to a different host "
"and does not match with the current host. This is "
"a stale namespace %s and will be cleared from the "
"current dvr_snat host.", self.snat_namespace.name)
self.external_gateway_removed(ex_gw_port, interface_name)
def external_gateway_updated(self, ex_gw_port, interface_name):
if not self._is_this_snat_host():
# no centralized SNAT gateway for this node/agent
LOG.debug("not hosting snat for router: %s", self.router['id'])
if self.snat_namespace:
if self.snat_namespace.exists():
LOG.debug("SNAT was rescheduled to host %s. Clearing snat "
"namespace.", self.router.get('gw_port_host'))
return self.external_gateway_removed(
ex_gw_port, interface_name)
return
if not self.snat_namespace:
if not self.snat_namespace.exists():
# SNAT might be rescheduled to this agent; need to process like
# newly created gateway
return self.external_gateway_added(ex_gw_port, interface_name)
@ -66,7 +75,7 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter):
def external_gateway_removed(self, ex_gw_port, interface_name):
super(DvrEdgeRouter, self).external_gateway_removed(ex_gw_port,
interface_name)
if not self._is_this_snat_host() and not self.snat_namespace:
if not self._is_this_snat_host() and not self.snat_namespace.exists():
# no centralized SNAT gateway for this node/agent
LOG.debug("not hosting snat for router: %s", self.router['id'])
return
@ -75,8 +84,9 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter):
bridge=self.agent_conf.external_network_bridge,
namespace=self.snat_namespace.name,
prefix=router.EXTERNAL_DEV_PREFIX)
self.snat_namespace.delete()
self.snat_namespace = None
if self.snat_namespace.exists():
self.snat_namespace.delete()
def internal_network_added(self, port):
super(DvrEdgeRouter, self).internal_network_added(port)
@ -147,10 +157,6 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter):
# TODO(mlavalle): in the near future, this method should contain the
# code in the L3 agent that creates a gateway for a dvr. The first step
# is to move the creation of the snat namespace here
self.snat_namespace = dvr_snat_ns.SnatNamespace(self.router['id'],
self.agent_conf,
self.driver,
self.use_ipv6)
self.snat_namespace.create()
return self.snat_namespace
@ -185,12 +191,10 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter):
def update_routing_table(self, operation, route):
if self.get_ex_gw_port() and self._is_this_snat_host():
ns_name = dvr_snat_ns.SnatNamespace.get_snat_ns_name(
self.router['id'])
ns_name = self.snat_namespace.name
# NOTE: For now let us apply the static routes both in SNAT
# namespace and Router Namespace, to reduce the complexity.
ip_wrapper = ip_lib.IPWrapper(namespace=ns_name)
if ip_wrapper.netns.exists(ns_name):
if self.snat_namespace.exists():
super(DvrEdgeRouter, self)._update_routing_table(
operation, route, namespace=ns_name)
else:
@ -200,5 +204,5 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter):
def delete(self, agent):
super(DvrEdgeRouter, self).delete(agent)
if self.snat_namespace:
if self.snat_namespace.exists():
self.snat_namespace.delete()

View File

@ -25,6 +25,7 @@ class DvrRouterBase(router.RouterInfo):
self.agent = agent
self.host = host
self.snat_ports = None
def process(self, agent):
super(DvrRouterBase, self).process(agent)

View File

@ -132,6 +132,10 @@ class NamespaceManager(object):
ns_prefix, ns_id = self.get_prefix_and_id(ns)
self._cleanup(ns_prefix, ns_id)
def ensure_snat_cleanup(self, router_id):
prefix = dvr_snat_ns.SNAT_NS_PREFIX
self._cleanup(prefix, router_id)
def _cleanup(self, ns_prefix, ns_id):
ns_class = self.ns_prefix_to_class_map[ns_prefix]
ns = ns_class(ns_id, self.agent_conf, self.driver, use_ipv6=False)

View File

@ -83,6 +83,9 @@ class Namespace(object):
msg = _LE('Failed trying to delete namespace: %s')
LOG.exception(msg, self.name)
def exists(self):
return self.ip_wrapper_root.netns.exists(self.name)
class RouterNamespace(Namespace):

View File

@ -1199,6 +1199,23 @@ class TestDvrRouter(L3AgentTestFramework):
self._delete_router(self.agent, router.router_id)
self._assert_fip_namespace_deleted(ext_gateway_port)
def test_dvr_unused_snat_ns_deleted_when_agent_restarts_after_move(self):
"""Test to validate the stale snat namespace delete with snat move.
This test validates the stale snat namespace cleanup when
the agent restarts after the gateway port has been moved
from the agent.
"""
self.agent.conf.agent_mode = 'dvr_snat'
router_info = self.generate_dvr_router_info()
router1 = self.manage_router(self.agent, router_info)
self._assert_snat_namespace_exists(router1)
restarted_agent = neutron_l3_agent.L3NATAgentWithStateReport(
self.agent.host, self.agent.conf)
router1.router['gw_port_host'] = "my-new-host"
restarted_router = self.manage_router(restarted_agent, router1.router)
self._assert_snat_namespace_does_not_exist(restarted_router)
def test_dvr_update_floatingip_statuses(self):
self.agent.conf.agent_mode = 'dvr'
self._test_update_floatingip_statuses(self.generate_dvr_router_info())

View File

@ -617,6 +617,68 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
def test_external_gateway_updated_dual_stack(self):
self._test_external_gateway_updated(dual_stack=True)
def test_dvr_edge_router_init_for_snat_namespace_object(self):
router = {'id': _uuid()}
ri = dvr_router.DvrEdgeRouter(mock.Mock(),
HOSTNAME,
router['id'],
router,
**self.ri_kwargs)
# Make sure that ri.snat_namespace object is created when the
# router is initialized
self.assertIsNotNone(ri.snat_namespace)
def test_ext_gw_updated_calling_snat_ns_delete_if_gw_port_host_none(
self):
"""Test to check the impact of snat_namespace object.
This function specifically checks the impact of the snat
namespace object value on external_gateway_removed for deleting
snat_namespace when the gw_port_host mismatches or none.
"""
router = l3_test_common.prepare_router_data(num_internal_ports=2)
ri = dvr_router.DvrEdgeRouter(mock.Mock(),
HOSTNAME,
router['id'],
router,
**self.ri_kwargs)
with mock.patch.object(dvr_snat_ns.SnatNamespace,
'delete') as snat_ns_delete:
interface_name, ex_gw_port = l3_test_common.prepare_ext_gw_test(
self, ri)
router['gw_port_host'] = ''
ri.external_gateway_updated(ex_gw_port, interface_name)
if router['gw_port_host'] != ri.host:
self.assertEqual(1, snat_ns_delete.call_count)
@mock.patch.object(namespaces.Namespace, 'delete')
def test_snat_ns_delete_not_called_when_snat_namespace_does_not_exist(
self, mock_ns_del):
"""Test to check the impact of snat_namespace object.
This function specifically checks the impact of the snat
namespace object initialization without the actual creation
of snat_namespace. When deletes are issued to the snat
namespace based on the snat namespace object existence, it
should be checking for the valid namespace existence before
it tries to delete.
"""
router = l3_test_common.prepare_router_data(num_internal_ports=2)
ri = dvr_router.DvrEdgeRouter(mock.Mock(),
HOSTNAME,
router['id'],
router,
**self.ri_kwargs)
# Make sure we set a return value to emulate the non existence
# of the namespace.
self.mock_ip.netns.exists.return_value = False
self.assertIsNotNone(ri.snat_namespace)
interface_name, ex_gw_port = l3_test_common.prepare_ext_gw_test(self,
ri)
ri._external_gateway_removed = mock.Mock()
ri.external_gateway_removed(ex_gw_port, interface_name)
self.assertFalse(mock_ns_del.called)
def _test_ext_gw_updated_dvr_edge_router(self, host_match,
snat_hosted_before=True):
"""
@ -635,8 +697,6 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
if snat_hosted_before:
ri._create_snat_namespace()
snat_ns_name = ri.snat_namespace.name
else:
self.assertIsNone(ri.snat_namespace)
interface_name, ex_gw_port = l3_test_common.prepare_ext_gw_test(self,
ri)
@ -656,7 +716,6 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
bridge=self.conf.external_network_bridge,
namespace=snat_ns_name,
prefix=l3_agent.EXTERNAL_DEV_PREFIX)
self.assertIsNone(ri.snat_namespace)
else:
if not snat_hosted_before:
self.assertIsNotNone(ri.snat_namespace)

View File

@ -92,6 +92,13 @@ class TestNamespaceManager(NamespaceManagerTestCaseFramework):
retrieved_ns_names = self.ns_manager.list_all()
self.assertFalse(retrieved_ns_names)
def test_ensure_snat_cleanup(self):
router_id = _uuid()
with mock.patch.object(self.ns_manager, '_cleanup') as mock_cleanup:
self.ns_manager.ensure_snat_cleanup(router_id)
mock_cleanup.assert_called_once_with(dvr_snat_ns.SNAT_NS_PREFIX,
router_id)
def test_ensure_router_cleanup(self):
router_id = _uuid()
ns_names = [namespaces.NS_PREFIX + _uuid() for _ in range(5)]