[HA] Do not add initial state change delay in HA router

The initial state ("primary", "backup") should be set immediately.
in [1], a transition delay to "primary" was introduced. This delay
is unnecesary when the first state happens.

Closes-Bug: #1945512

[1]https://review.opendev.org/q/I70037da9cdd0f8448e0af8dd96b4e3f5de5728ad

Change-Id: Ibe9178c4126977f1321e414676d67f28e5ec9b57
This commit is contained in:
Rodolfo Alonso Hernandez 2021-09-29 16:28:28 +00:00
parent 7c87ecb679
commit c20f2e5136
2 changed files with 43 additions and 2 deletions

View File

@ -17,6 +17,9 @@ import os
import threading
import eventlet
from neutron_lib.callbacks import events
from neutron_lib.callbacks import registry
from neutron_lib.callbacks import resources
from neutron_lib import constants
from oslo_log import log as logging
from oslo_utils import fileutils
@ -76,6 +79,7 @@ class L3AgentKeepalivedStateChangeServer(object):
server.wait()
@registry.has_registry_receivers
class AgentMixin(object):
def __init__(self, host):
self._init_ha_conf_path()
@ -87,6 +91,13 @@ class AgentMixin(object):
eventlet.spawn(self._start_keepalived_notifications_server)
self._transition_states = {}
self._transition_state_mutex = threading.Lock()
self._initial_state_change_per_router = set()
def initial_state_change(self, router_id):
initial_state = router_id not in self._initial_state_change_per_router
if initial_state:
self._initial_state_change_per_router.add(router_id)
return initial_state
def _get_router_info(self, router_id):
try:
@ -95,6 +106,13 @@ class AgentMixin(object):
LOG.info('Router %s is not managed by this agent. It was '
'possibly deleted concurrently.', router_id)
@registry.receives(resources.ROUTER, [events.AFTER_DELETE])
def _delete_router(self, resource, event, trigger, payload):
try:
self._initial_state_change_per_router.remove(payload.resource_id)
except KeyError:
pass
def check_ha_state_for_router(self, router_id, current_state):
ri = self._get_router_info(router_id)
if not ri:
@ -148,7 +166,7 @@ class AgentMixin(object):
def _enqueue_state_change(self, router_id, state):
# NOTE(ralonsoh): move 'primary' and 'backup' constants to n-lib
if state == 'primary':
if state == 'primary' and not self.initial_state_change(router_id):
eventlet.sleep(self.conf.ha_vrrp_advert_int)
transition_state = self._update_transition_state(router_id)
if transition_state != state:

View File

@ -263,7 +263,15 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
self._enqueue_state_change_transitions(['backup'], 1)
def test_enqueue_state_change_from_none_to_primary_to_backup(self):
self._enqueue_state_change_transitions(['primary', 'backup'], 0)
# First transition (to primary), won't have a delay.
self._enqueue_state_change_transitions(['primary', 'backup'], 2)
def test_enqueue_state_change_from_none_to_primary_to_backup_twice(self):
# Second transition to primary will have a delay and will be overridden
# by the second transition to backup; that means the transition from
# backup (second state) to primary (third state) is dismissed.
self._enqueue_state_change_transitions(
['primary', 'backup', 'primary', 'backup'], 2)
def test_enqueue_state_change_from_none_to_backup_to_primary(self):
self._enqueue_state_change_transitions(['backup', 'primary'], 2)
@ -2563,6 +2571,21 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
agent.router_removed_from_agent(None, {'router_id': FAKE_ID})
self.assertEqual(1, agent._queue.add.call_count)
@mock.patch.object(metadata_driver.MetadataDriver,
'destroy_monitored_metadata_proxy')
def test__router_removed(self, *args):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
ri = mock.Mock(router={'id': 'router_id'})
agent._initial_state_change_per_router.add('router_id')
self.assertEqual({'router_id'}, agent._initial_state_change_per_router)
for _ in range(2):
# The second call will trigger a KeyError exception in
# AgentMixin._delete_router that should be dismissed.
agent.router_info['router_id'] = mock.ANY
agent.pd = mock.Mock(routers={'router_id': {'subnets': []}})
agent._router_removed(ri, 'router_id')
self.assertEqual(set([]), agent._initial_state_change_per_router)
def test_added_to_agent(self):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
agent._queue = mock.Mock()