From c74265efb9339bee6011851d8a160067854a2818 Mon Sep 17 00:00:00 2001 From: Oleg Bondarev Date: Tue, 26 Jan 2016 18:01:50 +0300 Subject: [PATCH] DVR: avoid race on dvr serviceable port deletion Please see race details in the bug report. The fix is to check for dvr routers that needs to be removed after actual port deletion. As a bonus we can get rid of dvr logic in ml2 plugin delete_port(). It's hard to come up with a test here as race can only be reproduced with multiple API workers. Closes-Bug: #1538163 Change-Id: I7ab1b0c18e5daa6f35a37370b9be79e689e2ba50 --- neutron/db/l3_agentschedulers_db.py | 7 +-- neutron/db/l3_dvrscheduler_db.py | 57 +++++++++---------- neutron/plugins/ml2/plugin.py | 11 +--- neutron/tests/unit/plugins/ml2/test_plugin.py | 48 +++++++--------- .../unit/scheduler/test_l3_agent_scheduler.py | 18 +++--- 5 files changed, 59 insertions(+), 82 deletions(-) diff --git a/neutron/db/l3_agentschedulers_db.py b/neutron/db/l3_agentschedulers_db.py index 7b1d821d731..07d4c8ffd91 100644 --- a/neutron/db/l3_agentschedulers_db.py +++ b/neutron/db/l3_agentschedulers_db.py @@ -446,15 +446,12 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, if agentschedulers_db.AgentSchedulerDbMixin.is_eligible_agent( active, l3_agent)] - def check_dvr_serviceable_ports_on_host( - self, context, host, subnet_ids, except_port=None): + def check_dvr_serviceable_ports_on_host(self, context, host, subnet_ids): """Check for existence of dvr serviceable ports on host :param context: request context :param host: host to look ports on :param subnet_ids: IDs of subnets to look ports on - :param except_port: ID of the port to ignore (used when checking if - DVR router should be removed from host before actual port remove) :return: return True if dvr serviceable port exists on host, otherwise return False """ @@ -472,8 +469,6 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, constants.DEVICE_OWNER_COMPUTE_PREFIX), models_v2.Port.device_owner.in_( n_utils.get_other_dvr_serviced_device_owners())) - if except_port: - ports_query = ports_query.filter(models_v2.Port.id != except_port) ports_query = ports_query.filter(owner_filter) return ports_query.first() is not None diff --git a/neutron/db/l3_dvrscheduler_db.py b/neutron/db/l3_dvrscheduler_db.py index d8766b00f46..fd011233433 100644 --- a/neutron/db/l3_dvrscheduler_db.py +++ b/neutron/db/l3_dvrscheduler_db.py @@ -88,8 +88,8 @@ class L3_DVRsch_db_mixin(l3agent_sch_db.L3AgentSchedulerDbMixin): if not l3_agent_on_host: return - ips = port['fixed_ips'] - router_ids = self.get_dvr_routers_by_portid(context, port['id'], ips) + subnet_ids = [ip['subnet_id'] for ip in port['fixed_ips']] + router_ids = self.get_dvr_routers_by_subnet_ids(context, subnet_ids) if router_ids: LOG.debug('DVR: Handle new service port, host %(host)s, ' 'router ids %(router_ids)s', @@ -97,21 +97,19 @@ class L3_DVRsch_db_mixin(l3agent_sch_db.L3AgentSchedulerDbMixin): self.l3_rpc_notifier.routers_updated_on_host( context, router_ids, port_host) - def get_dvr_routers_by_portid(self, context, port_id, fixed_ips=None): + def get_dvr_routers_by_subnet_ids(self, context, subnet_ids): """Gets the dvr routers on vmport subnets.""" + if not subnet_ids: + return set() + router_ids = set() - if fixed_ips is None: - port_dict = self._core_plugin.get_port(context, port_id) - fixed_ips = port_dict['fixed_ips'] - for fixedip in fixed_ips: - vm_subnet = fixedip['subnet_id'] - filter_sub = {'fixed_ips': {'subnet_id': [vm_subnet]}, - 'device_owner': - [n_const.DEVICE_OWNER_DVR_INTERFACE]} - subnet_ports = self._core_plugin.get_ports( - context, filters=filter_sub) - for subnet_port in subnet_ports: - router_ids.add(subnet_port['device_id']) + filter_sub = {'fixed_ips': {'subnet_id': subnet_ids}, + 'device_owner': + [n_const.DEVICE_OWNER_DVR_INTERFACE]} + subnet_ports = self._core_plugin.get_ports( + context, filters=filter_sub) + for subnet_port in subnet_ports: + router_ids.add(subnet_port['device_id']) return router_ids def get_subnet_ids_on_router(self, context, router_id): @@ -129,24 +127,24 @@ class L3_DVRsch_db_mixin(l3agent_sch_db.L3AgentSchedulerDbMixin): 'for router %s', router_id) return subnet_ids - def get_dvr_routers_to_remove(self, context, port_id, port_host=None): - """Returns info about which routers should be removed from + def get_dvr_routers_to_remove(self, context, deleted_port): + """Returns info about which routers should be removed - In case dvr serviceable port is about to be deleted we need to check + In case dvr serviceable port was deleted we need to check if any dvr routers should be removed from l3 agent on port's host """ + if not n_utils.is_dvr_serviced(deleted_port['device_owner']): + return [] + admin_context = context.elevated() - router_ids = self.get_dvr_routers_by_portid(admin_context, port_id) - if not port_host: - port_host = ml2_db.get_port_binding_host(admin_context.session, - port_id) - if not port_host: - LOG.debug('Host name not found for port %s', port_id) - return [] + port_host = deleted_port[portbindings.HOST_ID] + subnet_ids = [ip['subnet_id'] for ip in deleted_port['fixed_ips']] + router_ids = self.get_dvr_routers_by_subnet_ids(admin_context, + subnet_ids) if not router_ids: LOG.debug('No DVR routers for this DVR port %(port)s ' - 'on host %(host)s', {'port': port_id, + 'on host %(host)s', {'port': deleted_port['id'], 'host': port_host}) return [] agent = self._get_agent_by_type_and_host( @@ -163,7 +161,7 @@ class L3_DVRsch_db_mixin(l3agent_sch_db.L3AgentSchedulerDbMixin): subnet_ids = self.get_subnet_ids_on_router(admin_context, router_id) if self.check_dvr_serviceable_ports_on_host( - admin_context, port_host, subnet_ids, except_port=port_id): + admin_context, port_host, subnet_ids): continue filter_rtr = {'device_id': [router_id], 'device_owner': @@ -310,10 +308,10 @@ def _notify_l3_agent_new_port(resource, event, trigger, **kwargs): def _notify_port_delete(event, resource, trigger, **kwargs): context = kwargs['context'] port = kwargs['port'] - removed_routers = kwargs['removed_routers'] l3plugin = manager.NeutronManager.get_service_plugins().get( service_constants.L3_ROUTER_NAT) l3plugin.delete_arp_entry_for_dvr_service_port(context, port) + removed_routers = l3plugin.get_dvr_routers_to_remove(context, port) for info in removed_routers: l3plugin.l3_rpc_notifier.router_removed_from_agent( context, info['router_id'], info['host']) @@ -339,8 +337,7 @@ def _notify_l3_agent_port_update(resource, event, trigger, **kwargs): if is_port_no_longer_serviced or is_port_moved: removed_routers = l3plugin.get_dvr_routers_to_remove( context, - original_port['id'], - port_host=original_port[portbindings.HOST_ID]) + original_port) if removed_routers: removed_router_args = { 'context': context, diff --git a/neutron/plugins/ml2/plugin.py b/neutron/plugins/ml2/plugin.py index 4f32e499988..68d2c4e278f 100644 --- a/neutron/plugins/ml2/plugin.py +++ b/neutron/plugins/ml2/plugin.py @@ -1353,12 +1353,9 @@ class Ml2Plugin(db_base_plugin_v2.NeutronDbPluginV2, def delete_port(self, context, id, l3_port_check=True): self._pre_delete_port(context, id, l3_port_check) # TODO(armax): get rid of the l3 dependency in the with block - removed_routers = [] router_ids = [] l3plugin = manager.NeutronManager.get_service_plugins().get( service_constants.L3_ROUTER_NAT) - is_dvr_enabled = utils.is_extension_supported( - l3plugin, const.L3_DISTRIBUTED_EXT_ALIAS) session = context.session with session.begin(subtransactions=True): @@ -1385,9 +1382,6 @@ class Ml2Plugin(db_base_plugin_v2.NeutronDbPluginV2, binding.host) mech_context = driver_context.PortContext( self, context, port, network, binding, levels) - if is_dvr_enabled and utils.is_dvr_serviced(device_owner): - removed_routers = l3plugin.get_dvr_routers_to_remove( - context, id) self.mechanism_manager.delete_port_precommit(mech_context) bound_mech_contexts.append(mech_context) if l3plugin: @@ -1399,15 +1393,14 @@ class Ml2Plugin(db_base_plugin_v2.NeutronDbPluginV2, super(Ml2Plugin, self).delete_port(context, id) self._post_delete_port( - context, port, router_ids, removed_routers, bound_mech_contexts) + context, port, router_ids, bound_mech_contexts) def _post_delete_port( - self, context, port, router_ids, removed_routers, bound_mech_contexts): + self, context, port, router_ids, bound_mech_contexts): kwargs = { 'context': context, 'port': port, 'router_ids': router_ids, - 'removed_routers': removed_routers } registry.notify(resources.PORT, events.AFTER_DELETE, self, **kwargs) try: diff --git a/neutron/tests/unit/plugins/ml2/test_plugin.py b/neutron/tests/unit/plugins/ml2/test_plugin.py index a9c40f8c6fa..5414b5a6b5a 100644 --- a/neutron/tests/unit/plugins/ml2/test_plugin.py +++ b/neutron/tests/unit/plugins/ml2/test_plugin.py @@ -27,7 +27,9 @@ from oslo_utils import uuidutils from sqlalchemy.orm import exc as sqla_exc from neutron._i18n import _ +from neutron.callbacks import events from neutron.callbacks import registry +from neutron.callbacks import resources from neutron.common import constants from neutron.common import exceptions as exc from neutron.common import utils @@ -892,47 +894,37 @@ class TestMl2DvrPortsV2(TestMl2PortsV2): mock.PropertyMock(return_value=extensions)) self.service_plugins = {'L3_ROUTER_NAT': self.l3plugin} - def _test_delete_dvr_serviced_port(self, device_owner, floating_ip=False): + def test_delete_port_notifies_l3_plugin(self, floating_ip=False): ns_to_delete = {'host': 'myhost', 'agent_id': 'vm_l3_agent', 'router_id': 'my_router'} - fip_set = set() + router_ids = set() if floating_ip: - fip_set.add(ns_to_delete['router_id']) + router_ids.add(ns_to_delete['router_id']) with mock.patch.object(manager.NeutronManager, 'get_service_plugins', return_value=self.service_plugins),\ - self.port(device_owner=device_owner) as port,\ + self.port() as port,\ mock.patch.object(registry, 'notify') as notify,\ mock.patch.object(self.l3plugin, 'disassociate_floatingips', - return_value=fip_set),\ - mock.patch.object( - self.l3plugin, - 'get_dvr_routers_to_remove', - return_value=[ns_to_delete]) as get_dvr_routers_to_remove: - + return_value=router_ids): port_id = port['port']['id'] self.plugin.delete_port(self.context, port_id) + self.assertEqual(2, notify.call_count) + # needed for a full match in the assertion below + port['port']['extra_dhcp_opts'] = [] + expected = [mock.call(resources.PORT, events.BEFORE_DELETE, + mock.ANY, context=self.context, + port_id=port['port']['id'], port_check=True), + mock.call(resources.PORT, events.AFTER_DELETE, + mock.ANY, context=self.context, + port=port['port'], + router_ids=router_ids)] + notify.assert_has_calls(expected) - self.assertTrue(notify.call_count) - get_dvr_routers_to_remove.assert_called_once_with( - self.context, port['port']['id']) - - def test_delete_last_vm_port(self): - self._test_delete_dvr_serviced_port(device_owner=DEVICE_OWNER_COMPUTE) - - def test_delete_last_vm_port_with_floatingip(self): - self._test_delete_dvr_serviced_port(device_owner=DEVICE_OWNER_COMPUTE, - floating_ip=True) - - def test_delete_lbaas_vip_port(self): - self._test_delete_dvr_serviced_port( - device_owner=constants.DEVICE_OWNER_LOADBALANCER) - - def test_delete_lbaasv2_vip_port(self): - self._test_delete_dvr_serviced_port( - device_owner=constants.DEVICE_OWNER_LOADBALANCERV2) + def test_delete_port_with_floatingip_notifies_l3_plugin(self): + self.test_delete_port_notifies_l3_plugin(floating_ip=True) def test_concurrent_csnat_port_delete(self): plugin = manager.NeutronManager.get_service_plugins()[ diff --git a/neutron/tests/unit/scheduler/test_l3_agent_scheduler.py b/neutron/tests/unit/scheduler/test_l3_agent_scheduler.py index f53a868f777..5ee2db47393 100644 --- a/neutron/tests/unit/scheduler/test_l3_agent_scheduler.py +++ b/neutron/tests/unit/scheduler/test_l3_agent_scheduler.py @@ -1090,12 +1090,11 @@ class L3DvrSchedulerTestCase(testlib_api.SqlTestCase): kwargs = { 'context': self.adminContext, 'port': mock.ANY, - 'removed_routers': [ - {'agent_id': 'foo_agent', - 'router_id': 'foo_id', - 'host': 'foo_host'}, - ], } + removed_routers = [{'agent_id': 'foo_agent', + 'router_id': 'foo_id', + 'host': 'foo_host'}] + l3plugin.get_dvr_routers_to_remove.return_value = removed_routers l3_dvrscheduler_db._notify_port_delete( 'port', 'after_delete', plugin, **kwargs) l3plugin.delete_arp_entry_for_dvr_service_port.\ @@ -1162,14 +1161,15 @@ class L3DvrSchedulerTestCase(testlib_api.SqlTestCase): self.adminContext, {'r1', 'r2'}, 'host1')) self.assertFalse(self.dut.l3_rpc_notifier.routers_updated.called) - def test_get_dvr_routers_by_portid(self): + def test_get_dvr_routers_by_subnet_ids(self): + subnet_id = '80947d4a-fbc8-484b-9f92-623a6bfcf3e0' dvr_port = { 'id': 'dvr_port1', 'device_id': 'r1', 'device_owner': constants.DEVICE_OWNER_DVR_INTERFACE, 'fixed_ips': [ { - 'subnet_id': '80947d4a-fbc8-484b-9f92-623a6bfcf3e0', + 'subnet_id': subnet_id, 'ip_address': '10.10.10.1' } ] @@ -1184,8 +1184,8 @@ class L3DvrSchedulerTestCase(testlib_api.SqlTestCase): return_value=dvr_port),\ mock.patch('neutron.db.db_base_plugin_v2.NeutronDbPluginV2' '.get_ports', return_value=[dvr_port]): - router_id = self.dut.get_dvr_routers_by_portid(self.adminContext, - dvr_port['id']) + router_id = self.dut.get_dvr_routers_by_subnet_ids( + self.adminContext, [subnet_id]) self.assertEqual(router_id.pop(), r1['id']) def test_get_subnet_ids_on_router(self):