Fix dvr ha router gateway goes wrong host

During L3 agent restart, the dvr ha router gateway port
binding host may change because the multiple ha router
scheduled hosts.

After this patch, we return the 'master' ha binding host
directly during the gateway port create. And do not let
the original 'master' (current is backup) host override
the gateway port binding host.

Closes-Bug: #1793529
Change-Id: Icb2112c7f0bd42c4f4b1cf32d6b83b6d97f85ef7
This commit is contained in:
LIU Yulong 2018-09-28 18:33:28 +08:00 committed by LIU Yulong
parent f253d676b8
commit 1973a037c2
4 changed files with 124 additions and 17 deletions

View File

@ -606,29 +606,31 @@ class _DVRAgentInterfaceMixin(object):
LOG.debug("Return the SNAT ports: %s", interfaces)
return interfaces
def _get_gateway_port_host(self, context, router, gw_ports):
binding_objs = rb_obj.RouterL3AgentBinding.get_objects(
context, router_id=[router['id']])
if not binding_objs:
return
gw_port_id = router['gw_port_id']
# Collect gw ports only if available
if gw_port_id and gw_ports.get(gw_port_id):
l3_agent = ag_obj.Agent.get_object(context,
id=binding_objs[0].l3_agent_id)
return l3_agent.host
def _build_routers_list(self, context, routers, gw_ports):
# Perform a single query up front for all routers
routers = super(_DVRAgentInterfaceMixin, self)._build_routers_list(
context, routers, gw_ports)
if not routers:
return []
router_ids = [r['id'] for r in routers]
binding_objs = rb_obj.RouterL3AgentBinding.get_objects(
context, router_id=router_ids)
bindings = dict((b.router_id, b) for b in binding_objs)
for rtr in routers:
gw_port_id = rtr['gw_port_id']
# Collect gw ports only if available
if gw_port_id and gw_ports.get(gw_port_id):
binding = bindings.get(rtr['id'])
if not binding:
rtr['gw_port_host'] = None
LOG.debug('No snat is bound to router %s', rtr['id'])
continue
l3_agent = ag_obj.Agent.get_object(context,
id=binding.l3_agent_id)
rtr['gw_port_host'] = l3_agent.host
for router in routers:
gw_port_host = self._get_gateway_port_host(
context, router, gw_ports)
LOG.debug("Set router %s gateway port host: %s",
router['id'], gw_port_host)
router['gw_port_host'] = gw_port_host
return routers

View File

@ -715,6 +715,41 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
# Take concurrently deleted interfaces in to account
pass
def _get_gateway_port_host(self, context, router, gw_ports):
if not router.get('ha'):
return super(L3_HA_NAT_db_mixin, self)._get_gateway_port_host(
context, router, gw_ports)
gw_port_id = router['gw_port_id']
gateway_port = gw_ports.get(gw_port_id)
if not gw_port_id or not gateway_port:
return
gateway_port_status = gateway_port['status']
gateway_port_binding_host = gateway_port[portbindings.HOST_ID]
admin_ctx = context.elevated()
router_id = router['id']
ha_bindings = self.get_l3_bindings_hosting_router_with_ha_states(
admin_ctx, router_id)
LOG.debug("HA router %(router_id)s gateway port %(gw_port_id)s "
"binding host: %(host)s, status: %(status)s",
{"router_id": router_id,
"gw_port_id": gateway_port['id'],
"host": gateway_port_binding_host,
"status": gateway_port_status})
for ha_binding_agent, ha_binding_state in ha_bindings:
if ha_binding_state != n_const.HA_ROUTER_STATE_ACTIVE:
continue
# For create router gateway, the gateway port may not be ACTIVE
# yet, so we return 'master' host directly.
if gateway_port_status != constants.PORT_STATUS_ACTIVE:
return ha_binding_agent.host
# Do not let the original 'master' (current is backup) host,
# override the gateway port binding host.
if (gateway_port_status == constants.PORT_STATUS_ACTIVE and
ha_binding_agent.host == gateway_port_binding_host):
return ha_binding_agent.host
def is_ha_router(router):
"""Return True if router to be handled is ha."""

View File

@ -33,6 +33,7 @@ from neutron.db import l3_dvrscheduler_db
from neutron.db.models import l3 as l3_models
from neutron.db import models_v2
from neutron.objects import agent as agent_obj
from neutron.objects import l3agent as rb_obj
from neutron.objects import router as router_obj
from neutron.tests.unit.db import test_db_base_plugin_v2
@ -1007,3 +1008,36 @@ class L3DvrTestCase(test_db_base_plugin_v2.NeutronDbPluginV2TestCase):
self.assertEqual(port_dict['device_id'], port_info['device_id'])
self.assertEqual(port_dict['device_owner'],
port_info['device_owner'])
def test__get_sync_routers_check_gw_port_host(self):
router_dict = {'name': 'test_router', 'admin_state_up': True,
'distributed': True}
router = self._create_router(router_dict)
with self.network() as public,\
self.subnet() as subnet:
ext_net_1_id = public['network']['id']
self.core_plugin.update_network(
self.ctx, ext_net_1_id,
{'network': {'router:external': True}})
self.mixin.update_router(
self.ctx, router['id'],
{'router': {'external_gateway_info':
{'network_id': ext_net_1_id}}})
self.mixin.add_router_interface(self.ctx, router['id'],
{'subnet_id': subnet['subnet']['id']})
routers = self.mixin._get_sync_routers(self.ctx,
router_ids=[router['id']])
self.assertIsNone(routers[0]['gw_port_host'])
agent = mock.Mock()
agent.host = "fake-host"
bind = mock.Mock()
bind.l3_agent_id = "fake-id"
with mock.patch.object(
rb_obj.RouterL3AgentBinding, 'get_objects',
return_value=[bind]), mock.patch.object(
agent_obj.Agent, 'get_object',
return_value=agent):
routers = self.mixin._get_sync_routers(
self.ctx, router_ids=[router['id']])
self.assertEqual("fake-host", routers[0]['gw_port_host'])

View File

@ -1325,6 +1325,42 @@ class L3HAModeDbTestCase(L3HATestFramework):
self.assertEqual(constants.DEVICE_OWNER_ROUTER_INTF,
routerport.port.device_owner)
def test__get_sync_routers_with_state_change_and_check_gw_port_host(self):
ext_net = self._create_network(self.core_plugin, self.admin_ctx,
external=True)
network_id = self._create_network(self.core_plugin, self.admin_ctx)
subnet = self._create_subnet(self.core_plugin, self.admin_ctx,
network_id)
interface_info = {'subnet_id': subnet['id']}
router = self._create_router()
self.plugin._update_router_gw_info(self.admin_ctx, router['id'],
{'network_id': ext_net})
self.plugin.add_router_interface(self.admin_ctx,
router['id'],
interface_info)
self.plugin.update_routers_states(
self.admin_ctx, {router['id']: n_const.HA_ROUTER_STATE_ACTIVE},
self.agent1['host'])
self.plugin.update_routers_states(
self.admin_ctx, {router['id']: n_const.HA_ROUTER_STATE_STANDBY},
self.agent2['host'])
routers = self.plugin._get_sync_routers(self.admin_ctx,
router_ids=[router['id']])
self.assertEqual(self.agent1['host'], routers[0]['gw_port_host'])
self.plugin.update_routers_states(
self.admin_ctx, {router['id']: n_const.HA_ROUTER_STATE_STANDBY},
self.agent1['host'])
self.plugin.update_routers_states(
self.admin_ctx, {router['id']: n_const.HA_ROUTER_STATE_ACTIVE},
self.agent2['host'])
routers = self.plugin._get_sync_routers(self.admin_ctx,
router_ids=[router['id']])
self.assertEqual(self.agent2['host'], routers[0]['gw_port_host'])
class L3HAUserTestCase(L3HATestFramework):