From 7f7dee73248b561bdefb289573990e8455a666c4 Mon Sep 17 00:00:00 2001 From: Slawek Kaplonski Date: Wed, 10 Apr 2019 12:49:49 +0200 Subject: [PATCH] Choose random value for HA routes' vr_id HA routers are using keepalived and needs to have virtual_router_id configured. As routers which belongs to same tenant are using same ha network, those values have to be different for each router. Before this patch this value was always taken as first available value from available_vr_ids range. In some (rare) cases, when more than one router is created in parallel for same tenant it may happen that those routers would have same vr_id choosen so keepalived would treat them as single application and only one router would be ACTIVE on one of L3 agents. This patch changes this behaviour that now random value from available vr_ids will be chosen instead of taking first value always. That should mittigate this rare race condition that it will be (almost) not noticable for users. However, proper fix should be probably done as some additional constraint in database layer. But such solution wouldn't be possible to backport to stable branches so I decided to propose this easy patch first. Conflicts: neutron/db/l3_hamode_db.py Change-Id: Idb0ed744e54976dca23593fb2d7317bf77442e65 Related-Bug: #1823314 (cherry picked from commit a8d0f557d504957aeb91f451105cca9eee2d6adb) (cherry picked from commit ee2ed681c495c4fc5086d761853731b7dc2fd34f) (cherry picked from commit 72c9a7ef8416f894a85a36c6b5bbf995e48599d1) --- neutron/db/l3_hamode_db.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/neutron/db/l3_hamode_db.py b/neutron/db/l3_hamode_db.py index 82a91687863..340df86f0c4 100644 --- a/neutron/db/l3_hamode_db.py +++ b/neutron/db/l3_hamode_db.py @@ -14,6 +14,7 @@ # import functools +import random import netaddr from neutron_lib.api.definitions import port as port_def @@ -125,6 +126,16 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin, return allocated_vr_ids + def _get_vr_id(self, context, network_id): + allocated_vr_ids = self._get_allocated_vr_id(context, + network_id) + available_vr_ids = VR_ID_RANGE - allocated_vr_ids + + if not available_vr_ids: + return None + + return random.choice(list(available_vr_ids)) + @db_api.retry_if_session_inactive() def _ensure_vr_id(self, context, router_db, ha_network): router_id = router_db.id @@ -145,16 +156,13 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin, 'ha_vr_id': router_db.extra_attributes.ha_vr_id}) return - allocated_vr_ids = self._get_allocated_vr_id(context, - network_id) - available_vr_ids = VR_ID_RANGE - allocated_vr_ids - - if not available_vr_ids: + vr_id = self._get_vr_id(context, network_id) + if vr_id is None: raise l3_ha.NoVRIDAvailable(router_id=router_id) allocation = l3ha_model.L3HARouterVRIdAllocation() allocation.network_id = network_id - allocation.vr_id = available_vr_ids.pop() + allocation.vr_id = vr_id context.session.add(allocation) router_db.extra_attributes.ha_vr_id = allocation.vr_id