Update port bindings for master router

An HA port needs to point to the correct host (where the master router
is running) in order for L2Population to work.

Hence, this patch introduces two fixes:
* When a port owned by an HA router is up we make sure it points to the
  right node where the master is running, or a random node if there is
  no master yet (This corner case is fixed by the 2nd bullet point).

* When a L3 agent reports it's hosting a master, we need to update the
  port binding to the host the master is now running on. This fixes
  both routers with no elected master (Yet) and failovers.

This patch also changes the L3 HA failover test to use l2pop.
Note that the test does not pass when using l2pop without this patch.

Closes-Bug: #1365476
Co-Authored-By: Assaf Muller <amuller@redhat.com>
Change-Id: I8475548947526d8ea736ed7aa754fd0ca475cae2
This commit is contained in:
Mike Kolesnik 2015-04-07 19:54:09 -04:00
parent c5359276f6
commit 91d3a0219a
4 changed files with 179 additions and 18 deletions

View File

@ -104,33 +104,70 @@ class L3RpcCallback(object):
router.get('gw_port_host'),
p, router['id'])
else:
self._ensure_host_set_on_port(context, host,
router.get('gw_port'),
router['id'])
self._ensure_host_set_on_port(
context, host,
router.get('gw_port'),
router['id'],
ha_router_port=router.get('ha'))
for interface in router.get(constants.INTERFACE_KEY, []):
self._ensure_host_set_on_port(context, host,
interface, router['id'])
self._ensure_host_set_on_port(
context,
host,
interface,
router['id'],
ha_router_port=router.get('ha'))
interface = router.get(constants.HA_INTERFACE_KEY)
if interface:
self._ensure_host_set_on_port(context, host, interface,
router['id'])
def _ensure_host_set_on_port(self, context, host, port, router_id=None):
def _ensure_host_set_on_port(self, context, host, port, router_id=None,
ha_router_port=False):
if (port and host is not None and
(port.get('device_owner') !=
constants.DEVICE_OWNER_DVR_INTERFACE and
port.get(portbindings.HOST_ID) != host or
port.get(portbindings.VIF_TYPE) ==
portbindings.VIF_TYPE_BINDING_FAILED)):
# All ports, including ports created for SNAT'ing for
# DVR are handled here
try:
self.plugin.update_port(context, port['id'],
{'port': {portbindings.HOST_ID: host}})
except exceptions.PortNotFound:
LOG.debug("Port %(port)s not found while updating "
"agent binding for router %(router)s.",
{"port": port['id'], "router": router_id})
# Ports owned by non-HA routers are bound again if they're
# already bound but the router moved to another host.
if not ha_router_port:
# All ports, including ports created for SNAT'ing for
# DVR are handled here
try:
self.plugin.update_port(
context,
port['id'],
{'port': {portbindings.HOST_ID: host}})
except exceptions.PortNotFound:
LOG.debug("Port %(port)s not found while updating "
"agent binding for router %(router)s.",
{"port": port['id'], "router": router_id})
# Ports owned by HA routers should only be bound once, if
# they are unbound. These ports are moved when an agent reports
# that one of its routers moved to the active state.
else:
if not port.get(portbindings.HOST_ID):
active_host = (
self.l3plugin.get_active_host_for_ha_router(
context, router_id))
if active_host:
host = active_host
# If there is currently no active router instance (For
# example it's a new router), the host that requested
# the routers (Essentially a random host) will do. The
# port binding will be corrected when an active is
# elected.
try:
self.plugin.update_port(
context,
port['id'],
{'port': {portbindings.HOST_ID: host}})
except exceptions.PortNotFound:
LOG.debug("Port %(port)s not found while updating "
"agent binding for router %(router)s.",
{"port": port['id'], "router": router_id})
elif (port and
port.get('device_owner') ==
constants.DEVICE_OWNER_DVR_INTERFACE):

View File

@ -64,6 +64,8 @@ HA_NETWORK_NAME = 'HA network tenant %s'
HA_SUBNET_NAME = 'HA subnet tenant %s'
HA_PORT_NAME = 'HA port tenant %s'
MINIMUM_AGENTS_FOR_HA = 2
HA_ROUTER_STATE_ACTIVE = 'active'
HA_ROUTER_STATE_STANDBY = 'standby'
IPv4 = 'IPv4'
IPv6 = 'IPv6'

View File

@ -29,6 +29,7 @@ from neutron.db import l3_dvr_db
from neutron.db import model_base
from neutron.db import models_v2
from neutron.extensions import l3_ext_ha_mode as l3_ha
from neutron.extensions import portbindings
from neutron.i18n import _LI
VR_ID_RANGE = set(range(1, 255))
@ -80,9 +81,11 @@ class L3HARouterAgentPortBinding(model_base.BASEV2):
ondelete='CASCADE'))
agent = orm.relationship(agents_db.Agent)
state = sa.Column(sa.Enum('active', 'standby', name='l3_ha_states'),
default='standby',
server_default='standby')
state = sa.Column(sa.Enum(constants.HA_ROUTER_STATE_ACTIVE,
constants.HA_ROUTER_STATE_STANDBY,
name='l3_ha_states'),
default=constants.HA_ROUTER_STATE_STANDBY,
server_default=constants.HA_ROUTER_STATE_STANDBY)
class L3HARouterNetwork(model_base.BASEV2):
@ -452,6 +455,20 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin):
bindings = self.get_ha_router_port_bindings(context, [router_id])
return [(binding.agent, binding.state) for binding in bindings]
def get_active_host_for_ha_router(self, context, router_id):
bindings = self.get_l3_bindings_hosting_router_with_ha_states(
context, router_id)
# TODO(amuller): In case we have two or more actives, this method
# needs to return the last agent to become active. This requires
# timestamps for state changes. Otherwise, if a host goes down
# and another takes over, we'll have two actives. In this case,
# if an interface is added to a router, its binding might be wrong
# and l2pop would not work correctly.
return next(
(agent.host for agent, state in bindings
if state == constants.HA_ROUTER_STATE_ACTIVE),
None)
def _process_sync_ha_data(self, context, routers, host):
routers_dict = dict((router['id'], router) for router in routers)
@ -503,3 +520,22 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin):
bindings = self.get_ha_router_port_bindings(
context, router_ids=states.keys(), host=host)
self._set_router_states(context, bindings, states)
self._update_router_port_bindings(context, states, host)
def _update_router_port_bindings(self, context, states, host):
admin_ctx = context.elevated()
device_filter = {'device_id': states.keys(),
'device_owner':
[constants.DEVICE_OWNER_ROUTER_INTF]}
ports = self._core_plugin.get_ports(admin_ctx, filters=device_filter)
active_ports = (port for port in ports
if states[port['device_id']] == constants.HA_ROUTER_STATE_ACTIVE)
for port in active_ports:
port[portbindings.HOST_ID] = host
try:
self._core_plugin.update_port(admin_ctx, port['id'],
{attributes.PORT: port})
except (orm.exc.StaleDataError, orm.exc.ObjectDeletedError):
# Take concurrently deleted interfaces in to account
pass

View File

@ -18,6 +18,7 @@ from oslo_config import cfg
from oslo_utils import timeutils
from oslo_utils import uuidutils
from neutron.api.rpc.handlers import l3_rpc
from neutron.api.v2 import attributes
from neutron.common import constants
from neutron import context
@ -27,6 +28,7 @@ from neutron.db import l3_agentschedulers_db
from neutron.db import l3_hamode_db
from neutron.extensions import l3
from neutron.extensions import l3_ext_ha_mode
from neutron.extensions import portbindings
from neutron import manager
from neutron.scheduler import l3_agent_scheduler
from neutron.tests.common import helpers
@ -417,6 +419,20 @@ class L3HATestCase(L3HATestFramework):
routers_after = self.plugin.get_routers(self.admin_ctx)
self.assertEqual(routers_before, routers_after)
def test_get_active_host_for_ha_router(self):
router = self._create_router()
self._bind_router(router['id'])
self.assertEqual(
None,
self.plugin.get_active_host_for_ha_router(
self.admin_ctx, router['id']))
self.plugin.update_routers_states(
self.admin_ctx, {router['id']: 'active'}, self.agent2['host'])
self.assertEqual(
self.agent2['host'],
self.plugin.get_active_host_for_ha_router(
self.admin_ctx, router['id']))
def test_update_routers_states(self):
router1 = self._create_router()
self._bind_router(router1['id'])
@ -518,6 +534,76 @@ class L3HAModeDbTestCase(L3HATestFramework):
self.admin_ctx, [router['id']])
self.assertEqual(2, len(bindings))
def test_update_router_port_bindings_no_ports(self):
self.plugin._update_router_port_bindings(
self.admin_ctx, {}, self.agent1['host'])
def _get_first_interface(self, router_id):
device_filter = {'device_id': [router_id],
'device_owner':
[constants.DEVICE_OWNER_ROUTER_INTF]}
return self.core_plugin.get_ports(
self.admin_ctx,
filters=device_filter)[0]
def test_update_router_port_bindings_updates_host(self):
network_id = self._create_network(self.core_plugin, self.admin_ctx)
subnet = self._create_subnet(self.core_plugin, self.admin_ctx,
network_id)
interface_info = {'subnet_id': subnet['id']}
router = self._create_router()
self._bind_router(router['id'])
self.plugin.add_router_interface(self.admin_ctx,
router['id'],
interface_info)
self.plugin._update_router_port_bindings(
self.admin_ctx, {router['id']: 'active'}, self.agent1['host'])
port = self._get_first_interface(router['id'])
self.assertEqual(self.agent1['host'], port[portbindings.HOST_ID])
self.plugin._update_router_port_bindings(
self.admin_ctx, {router['id']: 'active'}, self.agent2['host'])
port = self._get_first_interface(router['id'])
self.assertEqual(self.agent2['host'], port[portbindings.HOST_ID])
def test_ensure_host_set_on_ports_binds_correctly(self):
network_id = self._create_network(self.core_plugin, self.admin_ctx)
subnet = self._create_subnet(self.core_plugin, self.admin_ctx,
network_id)
interface_info = {'subnet_id': subnet['id']}
router = self._create_router()
self._bind_router(router['id'])
self.plugin.add_router_interface(self.admin_ctx,
router['id'],
interface_info)
port = self._get_first_interface(router['id'])
self.assertEqual('', port[portbindings.HOST_ID])
# Update the router object to include the first interface
router = (
self.plugin.list_active_sync_routers_on_active_l3_agent(
self.admin_ctx, self.agent1['host'], [router['id']]))[0]
# ensure_host_set_on_ports binds an unbound port
callback = l3_rpc.L3RpcCallback()
callback._l3plugin = self.plugin
callback._ensure_host_set_on_ports(
self.admin_ctx, self.agent1['host'], [router])
port = self._get_first_interface(router['id'])
self.assertEqual(self.agent1['host'], port[portbindings.HOST_ID])
# ensure_host_set_on_ports does not rebind a bound port
router = (
self.plugin.list_active_sync_routers_on_active_l3_agent(
self.admin_ctx, self.agent1['host'], [router['id']]))[0]
callback._ensure_host_set_on_ports(
self.admin_ctx, self.agent2['host'], [router])
port = self._get_first_interface(router['id'])
self.assertEqual(self.agent1['host'], port[portbindings.HOST_ID])
class L3HAUserTestCase(L3HATestFramework):