From b411b5ff59ef182b1e392d0a41dfa02d5e0ecc6b Mon Sep 17 00:00:00 2001 From: LIU Yulong Date: Fri, 28 Sep 2018 18:33:28 +0800 Subject: [PATCH] Fix dvr ha router gateway goes wrong host During L3 agent restart, the dvr ha router gateway port binding host may change because the multiple ha router scheduled hosts. After this patch, we return the 'master' ha binding host directly during the gateway port create. And do not let the original 'master' (current is backup) host override the gateway port binding host. Closes-Bug: #1793529 Change-Id: Icb2112c7f0bd42c4f4b1cf32d6b83b6d97f85ef7 (cherry picked from commit 1973a037c29b0fc8bf6347771ed930726d1648f5) --- neutron/db/l3_dvr_db.py | 36 ++++++++++++---------- neutron/db/l3_hamode_db.py | 35 +++++++++++++++++++++ neutron/tests/unit/db/test_l3_dvr_db.py | 34 ++++++++++++++++++++ neutron/tests/unit/db/test_l3_hamode_db.py | 36 ++++++++++++++++++++++ 4 files changed, 124 insertions(+), 17 deletions(-) diff --git a/neutron/db/l3_dvr_db.py b/neutron/db/l3_dvr_db.py index 0a1bfd783e0..76f651a3e0f 100644 --- a/neutron/db/l3_dvr_db.py +++ b/neutron/db/l3_dvr_db.py @@ -610,29 +610,31 @@ class _DVRAgentInterfaceMixin(object): LOG.debug("Return the SNAT ports: %s", interfaces) return interfaces + def _get_gateway_port_host(self, context, router, gw_ports): + binding_objs = rb_obj.RouterL3AgentBinding.get_objects( + context, router_id=[router['id']]) + if not binding_objs: + return + + gw_port_id = router['gw_port_id'] + # Collect gw ports only if available + if gw_port_id and gw_ports.get(gw_port_id): + l3_agent = ag_obj.Agent.get_object(context, + id=binding_objs[0].l3_agent_id) + return l3_agent.host + def _build_routers_list(self, context, routers, gw_ports): # Perform a single query up front for all routers routers = super(_DVRAgentInterfaceMixin, self)._build_routers_list( context, routers, gw_ports) if not routers: return [] - router_ids = [r['id'] for r in routers] - binding_objs = rb_obj.RouterL3AgentBinding.get_objects( - context, router_id=router_ids) - bindings = dict((b.router_id, b) for b in binding_objs) - for rtr in routers: - gw_port_id = rtr['gw_port_id'] - # Collect gw ports only if available - if gw_port_id and gw_ports.get(gw_port_id): - binding = bindings.get(rtr['id']) - if not binding: - rtr['gw_port_host'] = None - LOG.debug('No snat is bound to router %s', rtr['id']) - continue - - l3_agent = ag_obj.Agent.get_object(context, - id=binding.l3_agent_id) - rtr['gw_port_host'] = l3_agent.host + for router in routers: + gw_port_host = self._get_gateway_port_host( + context, router, gw_ports) + LOG.debug("Set router %s gateway port host: %s", + router['id'], gw_port_host) + router['gw_port_host'] = gw_port_host return routers diff --git a/neutron/db/l3_hamode_db.py b/neutron/db/l3_hamode_db.py index 2bec742ce50..c3050754557 100644 --- a/neutron/db/l3_hamode_db.py +++ b/neutron/db/l3_hamode_db.py @@ -718,6 +718,41 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin, # Take concurrently deleted interfaces in to account pass + def _get_gateway_port_host(self, context, router, gw_ports): + if not router.get('ha'): + return super(L3_HA_NAT_db_mixin, self)._get_gateway_port_host( + context, router, gw_ports) + + gw_port_id = router['gw_port_id'] + gateway_port = gw_ports.get(gw_port_id) + if not gw_port_id or not gateway_port: + return + gateway_port_status = gateway_port['status'] + gateway_port_binding_host = gateway_port[portbindings.HOST_ID] + + admin_ctx = context.elevated() + router_id = router['id'] + ha_bindings = self.get_l3_bindings_hosting_router_with_ha_states( + admin_ctx, router_id) + LOG.debug("HA router %(router_id)s gateway port %(gw_port_id)s " + "binding host: %(host)s, status: %(status)s", + {"router_id": router_id, + "gw_port_id": gateway_port['id'], + "host": gateway_port_binding_host, + "status": gateway_port_status}) + for ha_binding_agent, ha_binding_state in ha_bindings: + if ha_binding_state != n_const.HA_ROUTER_STATE_ACTIVE: + continue + # For create router gateway, the gateway port may not be ACTIVE + # yet, so we return 'master' host directly. + if gateway_port_status != constants.PORT_STATUS_ACTIVE: + return ha_binding_agent.host + # Do not let the original 'master' (current is backup) host, + # override the gateway port binding host. + if (gateway_port_status == constants.PORT_STATUS_ACTIVE and + ha_binding_agent.host == gateway_port_binding_host): + return ha_binding_agent.host + def is_ha_router(router): """Return True if router to be handled is ha.""" diff --git a/neutron/tests/unit/db/test_l3_dvr_db.py b/neutron/tests/unit/db/test_l3_dvr_db.py index 4706a2f8ac7..f9aa26fb2a8 100644 --- a/neutron/tests/unit/db/test_l3_dvr_db.py +++ b/neutron/tests/unit/db/test_l3_dvr_db.py @@ -33,6 +33,7 @@ from neutron.db import l3_dvrscheduler_db from neutron.db.models import l3 as l3_models from neutron.db import models_v2 from neutron.objects import agent as agent_obj +from neutron.objects import l3agent as rb_obj from neutron.objects import router as router_obj from neutron.tests.unit.db import test_db_base_plugin_v2 @@ -1006,3 +1007,36 @@ class L3DvrTestCase(test_db_base_plugin_v2.NeutronDbPluginV2TestCase): self.assertEqual(port_dict['device_id'], port_info['device_id']) self.assertEqual(port_dict['device_owner'], port_info['device_owner']) + + def test__get_sync_routers_check_gw_port_host(self): + router_dict = {'name': 'test_router', 'admin_state_up': True, + 'distributed': True} + router = self._create_router(router_dict) + with self.network() as public,\ + self.subnet() as subnet: + ext_net_1_id = public['network']['id'] + self.core_plugin.update_network( + self.ctx, ext_net_1_id, + {'network': {'router:external': True}}) + self.mixin.update_router( + self.ctx, router['id'], + {'router': {'external_gateway_info': + {'network_id': ext_net_1_id}}}) + self.mixin.add_router_interface(self.ctx, router['id'], + {'subnet_id': subnet['subnet']['id']}) + routers = self.mixin._get_sync_routers(self.ctx, + router_ids=[router['id']]) + self.assertIsNone(routers[0]['gw_port_host']) + + agent = mock.Mock() + agent.host = "fake-host" + bind = mock.Mock() + bind.l3_agent_id = "fake-id" + with mock.patch.object( + rb_obj.RouterL3AgentBinding, 'get_objects', + return_value=[bind]), mock.patch.object( + agent_obj.Agent, 'get_object', + return_value=agent): + routers = self.mixin._get_sync_routers( + self.ctx, router_ids=[router['id']]) + self.assertEqual("fake-host", routers[0]['gw_port_host']) diff --git a/neutron/tests/unit/db/test_l3_hamode_db.py b/neutron/tests/unit/db/test_l3_hamode_db.py index 4c13db369ba..c5e98f090a2 100644 --- a/neutron/tests/unit/db/test_l3_hamode_db.py +++ b/neutron/tests/unit/db/test_l3_hamode_db.py @@ -1325,6 +1325,42 @@ class L3HAModeDbTestCase(L3HATestFramework): self.assertEqual(constants.DEVICE_OWNER_ROUTER_INTF, routerport.port.device_owner) + def test__get_sync_routers_with_state_change_and_check_gw_port_host(self): + ext_net = self._create_network(self.core_plugin, self.admin_ctx, + external=True) + network_id = self._create_network(self.core_plugin, self.admin_ctx) + subnet = self._create_subnet(self.core_plugin, self.admin_ctx, + network_id) + interface_info = {'subnet_id': subnet['id']} + + router = self._create_router() + self.plugin._update_router_gw_info(self.admin_ctx, router['id'], + {'network_id': ext_net}) + self.plugin.add_router_interface(self.admin_ctx, + router['id'], + interface_info) + + self.plugin.update_routers_states( + self.admin_ctx, {router['id']: n_const.HA_ROUTER_STATE_ACTIVE}, + self.agent1['host']) + self.plugin.update_routers_states( + self.admin_ctx, {router['id']: n_const.HA_ROUTER_STATE_STANDBY}, + self.agent2['host']) + + routers = self.plugin._get_sync_routers(self.admin_ctx, + router_ids=[router['id']]) + self.assertEqual(self.agent1['host'], routers[0]['gw_port_host']) + + self.plugin.update_routers_states( + self.admin_ctx, {router['id']: n_const.HA_ROUTER_STATE_STANDBY}, + self.agent1['host']) + self.plugin.update_routers_states( + self.admin_ctx, {router['id']: n_const.HA_ROUTER_STATE_ACTIVE}, + self.agent2['host']) + routers = self.plugin._get_sync_routers(self.admin_ctx, + router_ids=[router['id']]) + self.assertEqual(self.agent2['host'], routers[0]['gw_port_host']) + class L3HAUserTestCase(L3HATestFramework):