Force L3 agent to resync router it could not configure

If the L3 agent fails to configure a router, commit:
4957b5b435 changed it so
that instead of performing an expensive full sync, only that
router is reconfigured. However, it tries to reconfigure the
cached router. This is a change of behavior from the fullsync
days. The retry is more likely to succeed if the
router is retrieved from the server, instead of using
the locally cached version, in case the user or operator
fixed bad input, or if the router was retrieved in a bad
state due to a server-side race condition.

Note that this is only relevant to full syncs, as those retrieve
routers from the server and queue updates with the router object.
Incremental updates queue up updates without router objects,
so if one of those fails it would always be resynced on a
second attempt.

Related-Bug: #1494682
Change-Id: Id0565e11b3023a639589f2734488029f194e2f9d
This commit is contained in:
Assaf Muller 2015-12-19 14:13:43 -05:00
parent 495f8c606e
commit 822ad5f06b
2 changed files with 26 additions and 0 deletions

View File

@ -441,6 +441,7 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
priority=queue.PRIORITY_SYNC_ROUTERS_TASK):
router_update.timestamp = timeutils.utcnow()
router_update.priority = priority
router_update.router = None # Force the agent to resync the router
self._queue.add(router_update)
def _process_router_update(self):

View File

@ -22,6 +22,7 @@ import mock
import netaddr
from oslo_log import log
import oslo_messaging
from oslo_utils import timeutils
from oslo_utils import uuidutils
import six
from testtools import matchers
@ -36,6 +37,7 @@ from neutron.agent.l3 import legacy_router
from neutron.agent.l3 import link_local_allocator as lla
from neutron.agent.l3 import namespaces
from neutron.agent.l3 import router_info as l3router
from neutron.agent.l3 import router_processing_queue
from neutron.agent.linux import dibbler
from neutron.agent.linux import external_process
from neutron.agent.linux import interface
@ -1818,6 +1820,29 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
oslo_messaging.MessagingTimeout)
self._test_process_routers_update_rpc_timeout()
def test_process_routers_update_resyncs_failed_router(self):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
# Attempting to configure the router will fail
agent._process_router_if_compatible = mock.MagicMock()
agent._process_router_if_compatible.side_effect = RuntimeError()
# Queue an update from a full sync
update = router_processing_queue.RouterUpdate(
42,
router_processing_queue.PRIORITY_SYNC_ROUTERS_TASK,
router=mock.Mock(),
timestamp=timeutils.utcnow())
agent._queue.add(update)
agent._process_router_update()
# The update contained the router object, get_routers won't be called
self.assertFalse(agent.plugin_rpc.get_routers.called)
# The update failed, assert that get_routers was called
agent._process_router_update()
self.assertTrue(agent.plugin_rpc.get_routers.called)
def test_process_routers_update_rpc_timeout_on_get_ext_net(self):
self._test_process_routers_update_rpc_timeout(ext_net_call=True,
ext_net_call_failed=True)