From e795a3fcf882ad8130018f32b57f2f887a1d20da Mon Sep 17 00:00:00 2001 From: LIU Yulong Date: Thu, 11 Aug 2016 16:58:48 +0800 Subject: [PATCH] Make the HA router state change notification more faster HA router state change takes too much time to notify neutron server. It takes almost 16s, by default ha_vrrp_advert_int 2s, for a single HA router state change. In this 16s time, assuming that a HA router meets 8 times HA router state change. After this 16s, the first change dequeue and notify the neutron server, then the 2nd, 3rd, and so on. Things are now becoming interesting, after this 16 seconds if you run `neutron l3-agent-list-hosting-router ha_router_id`, you may see the router state in one specific agent is alternatively changing in active and standby. It's not stay in the real state, because of the delay notification. This patch sets the BatchNotifier interval to ha_vrrp_advert_int (default 2s) to make the HA router state change notification more faster. NOTE: the BatchNotifier event queue is needed, because the HA router state change needs to be sent in a proper order. Then the neutron server could set the HA state properly. Closes-Bug: #1612069 Change-Id: Ife687038d31bd1e1ee264ff8b6ae1264fdd05489 --- neutron/agent/l3/ha.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/neutron/agent/l3/ha.py b/neutron/agent/l3/ha.py index c87e1745049..6bd3fde0345 100644 --- a/neutron/agent/l3/ha.py +++ b/neutron/agent/l3/ha.py @@ -80,6 +80,8 @@ class AgentMixin(object): def __init__(self, host): self._init_ha_conf_path() super(AgentMixin, self).__init__(host) + # BatchNotifier queue is needed to ensure that the HA router + # state change sequence is under the proper order. self.state_change_notifier = batch_notifier.BatchNotifier( self._calculate_batch_duration(), self.notify_server) eventlet.spawn(self._start_keepalived_notifications_server) @@ -90,14 +92,9 @@ class AgentMixin(object): state_change_server.run() def _calculate_batch_duration(self): - # Slave becomes the master after not hearing from it 3 times - detection_time = self.conf.ha_vrrp_advert_int * 3 - - # Keepalived takes a couple of seconds to configure the VIPs - configuration_time = 2 - - # Give it enough slack to batch all events due to the same failure - return (detection_time + configuration_time) * 2 + # Set the BatchNotifier interval to ha_vrrp_advert_int, + # default 2 seconds. + return self.conf.ha_vrrp_advert_int def enqueue_state_change(self, router_id, state): LOG.info(_LI('Router %(router_id)s transitioned to %(state)s'),