From 5c1afcaf2b9cb1bd09267a26dd4f5d7f7e99bf85 Mon Sep 17 00:00:00 2001 From: Slawek Kaplonski Date: Mon, 29 Jul 2019 18:15:33 +0200 Subject: [PATCH] [DVR] Add lock during creation of FIP agent gateway port In case when new external network is set as gateway network for dvr router, neutron tries to create floating IP agent gateway port. There should be always max 1 such port per network per L3 agent but sometimes when there are 2 requests to set external gateway for 2 different routers executed almost in same time it may happend that there will be 2 such ports created. That will cause error with configuration of one of routers on L3 agent and this will cause e.g. problems with access from VMs to metadata service. Such issues are visible in DVR CI jobs from time to time. Please check related bug for details. This patch adds lock mechanism during creation of such FIP gateway port. Such solution isn't fully solving exising race condition as if 2 requests will be processed by api workers running on 2 different nodes than this race can still happend. But this should mitigate the issue a bit and solve problem in U/S gates at least. For proper fix we should probably add some constraint on database level to prevent creation of 2 such ports for one network and one host but such solution will not be easy to backport to stable branches so I would prefer first to go with this easy workaround. Conflicts: neutron/db/l3_dvr_db.py Change-Id: Iabab7e4d36c7d6a876b2b74423efd7106a5f63f6 Related-Bug: #1830763 (cherry picked from commit 7b81c1bc67d2d85e03b4c96a8c1c558a2f909836) (cherry picked from commit f7532f0c927a551ea40e81a35818eb83faba4b5a) --- neutron/db/l3_dvr_db.py | 76 ++++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 32 deletions(-) diff --git a/neutron/db/l3_dvr_db.py b/neutron/db/l3_dvr_db.py index d35f5da1681..fef38b64dc4 100644 --- a/neutron/db/l3_dvr_db.py +++ b/neutron/db/l3_dvr_db.py @@ -30,6 +30,7 @@ from neutron_lib.exceptions import l3 as l3_exc from neutron_lib.plugins import constants as plugin_constants from neutron_lib.plugins import directory from neutron_lib.plugins import utils as plugin_utils +from oslo_concurrency import lockutils from oslo_config import cfg from oslo_log import helpers as log_helper from oslo_log import log as logging @@ -982,13 +983,9 @@ class _DVRAgentInterfaceMixin(object): def create_fip_agent_gw_port_if_not_exists( self, context, network_id, host): - """Function to return the FIP Agent GW port. - - This function will create a FIP Agent GW port - if required. If the port already exists, it - will return the existing port and will not - create a new one. - """ + # TODO(slaweq): add proper constraint on database level to avoid + # creation of duplicated Floating IP gateway ports for same network and + # same L3 agent. When this will be done, we can get rid of this lock. try: l3_agent_db = self._get_agent_by_type_and_host( context, const.AGENT_TYPE_L3, host) @@ -1001,31 +998,46 @@ class _DVRAgentInterfaceMixin(object): l3_agent_mode = self._get_agent_mode(l3_agent_db) if l3_agent_mode == const.L3_AGENT_MODE_DVR_NO_EXTERNAL: return - if l3_agent_db: - LOG.debug("Agent ID exists: %s", l3_agent_db['id']) - f_port = self._get_agent_gw_ports_exist_for_network( - context, network_id, host, l3_agent_db['id']) - if not f_port: - LOG.info('Agent Gateway port does not exist,' - ' so create one: %s', f_port) - port_data = {'tenant_id': '', - 'network_id': network_id, - 'device_id': l3_agent_db['id'], - 'device_owner': const.DEVICE_OWNER_AGENT_GW, - portbindings.HOST_ID: host, - 'admin_state_up': True, - 'name': ''} - agent_port = plugin_utils.create_port( - self._core_plugin, context, {'port': port_data}) - if agent_port: - self._populate_mtu_and_subnets_for_ports(context, - [agent_port]) - return agent_port - msg = _("Unable to create the Agent Gateway Port") - raise n_exc.BadRequest(resource='router', msg=msg) - else: - self._populate_mtu_and_subnets_for_ports(context, [f_port]) - return f_port + if not l3_agent_db: + return + lock_name = 'fip-gw-lock-' + network_id + '-' + host + with lockutils.lock(lock_name, external=True): + return self._create_fip_agent_gw_port_if_not_exists( + context, network_id, host, l3_agent_db) + + def _create_fip_agent_gw_port_if_not_exists(self, context, network_id, + host, l3_agent_db): + """Function to return the FIP Agent GW port. + + This function will create a FIP Agent GW port + if required. If the port already exists, it + will return the existing port and will not + create a new one. + """ + LOG.debug("Agent ID exists: %s", l3_agent_db['id']) + f_port = self._get_agent_gw_ports_exist_for_network( + context, network_id, host, l3_agent_db['id']) + if not f_port: + LOG.info('Agent Gateway port does not exist,' + ' so create one: %s', f_port) + port_data = {'tenant_id': '', + 'network_id': network_id, + 'device_id': l3_agent_db['id'], + 'device_owner': const.DEVICE_OWNER_AGENT_GW, + portbindings.HOST_ID: host, + 'admin_state_up': True, + 'name': ''} + agent_port = plugin_utils.create_port( + self._core_plugin, context, {'port': port_data}) + if agent_port: + self._populate_mtu_and_subnets_for_ports(context, + [agent_port]) + return agent_port + msg = _("Unable to create the Agent Gateway Port") + raise n_exc.BadRequest(resource='router', msg=msg) + else: + self._populate_mtu_and_subnets_for_ports(context, [f_port]) + return f_port def _generate_arp_table_and_notify_agent( self, context, fixed_ip, mac_address, notifier):