From c20f2e5136fd241f4be5c37403ab1ed54cdaefb5 Mon Sep 17 00:00:00 2001 From: Rodolfo Alonso Hernandez Date: Wed, 29 Sep 2021 16:28:28 +0000 Subject: [PATCH] [HA] Do not add initial state change delay in HA router The initial state ("primary", "backup") should be set immediately. in [1], a transition delay to "primary" was introduced. This delay is unnecesary when the first state happens. Closes-Bug: #1945512 [1]https://review.opendev.org/q/I70037da9cdd0f8448e0af8dd96b4e3f5de5728ad Change-Id: Ibe9178c4126977f1321e414676d67f28e5ec9b57 --- neutron/agent/l3/ha.py | 20 +++++++++++++++++- neutron/tests/unit/agent/l3/test_agent.py | 25 ++++++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/neutron/agent/l3/ha.py b/neutron/agent/l3/ha.py index dfb56c5787a..062f94818af 100644 --- a/neutron/agent/l3/ha.py +++ b/neutron/agent/l3/ha.py @@ -17,6 +17,9 @@ import os import threading import eventlet +from neutron_lib.callbacks import events +from neutron_lib.callbacks import registry +from neutron_lib.callbacks import resources from neutron_lib import constants from oslo_log import log as logging from oslo_utils import fileutils @@ -76,6 +79,7 @@ class L3AgentKeepalivedStateChangeServer(object): server.wait() +@registry.has_registry_receivers class AgentMixin(object): def __init__(self, host): self._init_ha_conf_path() @@ -87,6 +91,13 @@ class AgentMixin(object): eventlet.spawn(self._start_keepalived_notifications_server) self._transition_states = {} self._transition_state_mutex = threading.Lock() + self._initial_state_change_per_router = set() + + def initial_state_change(self, router_id): + initial_state = router_id not in self._initial_state_change_per_router + if initial_state: + self._initial_state_change_per_router.add(router_id) + return initial_state def _get_router_info(self, router_id): try: @@ -95,6 +106,13 @@ class AgentMixin(object): LOG.info('Router %s is not managed by this agent. It was ' 'possibly deleted concurrently.', router_id) + @registry.receives(resources.ROUTER, [events.AFTER_DELETE]) + def _delete_router(self, resource, event, trigger, payload): + try: + self._initial_state_change_per_router.remove(payload.resource_id) + except KeyError: + pass + def check_ha_state_for_router(self, router_id, current_state): ri = self._get_router_info(router_id) if not ri: @@ -148,7 +166,7 @@ class AgentMixin(object): def _enqueue_state_change(self, router_id, state): # NOTE(ralonsoh): move 'primary' and 'backup' constants to n-lib - if state == 'primary': + if state == 'primary' and not self.initial_state_change(router_id): eventlet.sleep(self.conf.ha_vrrp_advert_int) transition_state = self._update_transition_state(router_id) if transition_state != state: diff --git a/neutron/tests/unit/agent/l3/test_agent.py b/neutron/tests/unit/agent/l3/test_agent.py index b60e08aadbd..f3eb4647c63 100644 --- a/neutron/tests/unit/agent/l3/test_agent.py +++ b/neutron/tests/unit/agent/l3/test_agent.py @@ -263,7 +263,15 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework): self._enqueue_state_change_transitions(['backup'], 1) def test_enqueue_state_change_from_none_to_primary_to_backup(self): - self._enqueue_state_change_transitions(['primary', 'backup'], 0) + # First transition (to primary), won't have a delay. + self._enqueue_state_change_transitions(['primary', 'backup'], 2) + + def test_enqueue_state_change_from_none_to_primary_to_backup_twice(self): + # Second transition to primary will have a delay and will be overridden + # by the second transition to backup; that means the transition from + # backup (second state) to primary (third state) is dismissed. + self._enqueue_state_change_transitions( + ['primary', 'backup', 'primary', 'backup'], 2) def test_enqueue_state_change_from_none_to_backup_to_primary(self): self._enqueue_state_change_transitions(['backup', 'primary'], 2) @@ -2563,6 +2571,21 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework): agent.router_removed_from_agent(None, {'router_id': FAKE_ID}) self.assertEqual(1, agent._queue.add.call_count) + @mock.patch.object(metadata_driver.MetadataDriver, + 'destroy_monitored_metadata_proxy') + def test__router_removed(self, *args): + agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) + ri = mock.Mock(router={'id': 'router_id'}) + agent._initial_state_change_per_router.add('router_id') + self.assertEqual({'router_id'}, agent._initial_state_change_per_router) + for _ in range(2): + # The second call will trigger a KeyError exception in + # AgentMixin._delete_router that should be dismissed. + agent.router_info['router_id'] = mock.ANY + agent.pd = mock.Mock(routers={'router_id': {'subnets': []}}) + agent._router_removed(ri, 'router_id') + self.assertEqual(set([]), agent._initial_state_change_per_router) + def test_added_to_agent(self): agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) agent._queue = mock.Mock()