Not remove the running router when MQ is unreachable
When the L3 agent get a router update notification, it will try to
retrieve the router info from neutron server. But at this time, if
the message queue is down/unreachable. It will get exceptions related
message queue. The resync actions will be run then. Sometimes, rabbitMQ
cluster is not so much easy to recover. Then Long time MQ recover time
will cause the router info sync RPC never get successful until it meets
the max retry time. Then the bad thing happens, L3 agent is trying to
remove the router now. It basically shutdown all the existing L3 traffic
of this router.
This patch directly removes the final router removal action, let the
router run as it is.
Closes-Bug: #1871850
Change-Id: I9062638366b45a7a930f31185cd6e23901a43957
(cherry picked from commit 12b9149e20
)
This commit is contained in:
parent
8ff90c2bd4
commit
a96ad52c7e
|
@ -658,9 +658,6 @@ class L3NATAgent(ha.AgentMixin,
|
||||||
if router_update.hit_retry_limit():
|
if router_update.hit_retry_limit():
|
||||||
LOG.warning("Hit retry limit with router update for %s, action %s",
|
LOG.warning("Hit retry limit with router update for %s, action %s",
|
||||||
router_update.id, router_update.action)
|
router_update.id, router_update.action)
|
||||||
if router_update.action != DELETE_ROUTER:
|
|
||||||
LOG.debug("Deleting router %s", router_update.id)
|
|
||||||
self._safe_router_removed(router_update.id)
|
|
||||||
return
|
return
|
||||||
router_update.timestamp = timeutils.utcnow()
|
router_update.timestamp = timeutils.utcnow()
|
||||||
router_update.priority = priority
|
router_update.priority = priority
|
||||||
|
|
|
@ -2751,35 +2751,59 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
|
||||||
self._test_process_routers_update_rpc_timeout(ext_net_call=True,
|
self._test_process_routers_update_rpc_timeout(ext_net_call=True,
|
||||||
ext_net_call_failed=True)
|
ext_net_call_failed=True)
|
||||||
|
|
||||||
@mock.patch.object(pd, 'remove_router')
|
def test_process_routers_update_router_update(self):
|
||||||
def _test_process_routers_update_router_deleted(self, remove_router,
|
|
||||||
error=False):
|
|
||||||
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||||
agent._queue = mock.Mock()
|
agent._queue = mock.Mock()
|
||||||
update = mock.Mock()
|
update = mock.Mock()
|
||||||
update.resource = None
|
update.resource = None
|
||||||
update.action = 1 # ROUTER_DELETED
|
update.action = l3_agent.ADD_UPDATE_ROUTER
|
||||||
router_info = mock.MagicMock()
|
router_info = mock.MagicMock()
|
||||||
agent.router_info[update.id] = router_info
|
agent.router_info[update.id] = router_info
|
||||||
router_processor = mock.Mock()
|
router_processor = mock.Mock()
|
||||||
agent._queue.each_update_to_next_resource.side_effect = [
|
agent._queue.each_update_to_next_resource.side_effect = [
|
||||||
[(router_processor, update)]]
|
[(router_processor, update)]]
|
||||||
agent._resync_router = mock.Mock()
|
agent._resync_router = mock.Mock()
|
||||||
|
agent._safe_router_removed = mock.Mock()
|
||||||
|
agent.plugin_rpc = mock.MagicMock()
|
||||||
|
agent.plugin_rpc.get_routers.side_effect = (
|
||||||
|
Exception("Failed to get router info"))
|
||||||
|
# start test
|
||||||
|
agent._process_router_update()
|
||||||
|
router_info.delete.assert_not_called()
|
||||||
|
self.assertFalse(router_info.delete.called)
|
||||||
|
self.assertTrue(agent.router_info)
|
||||||
|
self.assertTrue(agent._resync_router.called)
|
||||||
|
self.assertFalse(agent._safe_router_removed.called)
|
||||||
|
|
||||||
|
def _test_process_routers_update_router_deleted(self,
|
||||||
|
error=False):
|
||||||
|
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||||
|
agent._queue = mock.Mock()
|
||||||
|
update = mock.Mock()
|
||||||
|
update.resource = None
|
||||||
|
update.action = l3_agent.DELETE_ROUTER
|
||||||
|
router_info = mock.MagicMock()
|
||||||
|
agent.router_info[update.id] = router_info
|
||||||
|
router_processor = mock.Mock()
|
||||||
|
agent._queue.each_update_to_next_resource.side_effect = [
|
||||||
|
[(router_processor, update)]]
|
||||||
|
agent._resync_router = mock.Mock()
|
||||||
|
agent._safe_router_removed = mock.Mock()
|
||||||
if error:
|
if error:
|
||||||
agent._safe_router_removed = mock.Mock()
|
|
||||||
agent._safe_router_removed.return_value = False
|
agent._safe_router_removed.return_value = False
|
||||||
agent._process_router_update()
|
agent._process_router_update()
|
||||||
if error:
|
if error:
|
||||||
self.assertFalse(router_processor.fetched_and_processed.called)
|
self.assertFalse(router_processor.fetched_and_processed.called)
|
||||||
agent._resync_router.assert_called_with(update)
|
agent._resync_router.assert_called_with(update)
|
||||||
self.assertFalse(remove_router.called)
|
self.assertTrue(agent._safe_router_removed.called)
|
||||||
else:
|
else:
|
||||||
router_info.delete.assert_called_once_with()
|
router_info.delete.assert_not_called()
|
||||||
self.assertFalse(agent.router_info)
|
self.assertFalse(router_info.delete.called)
|
||||||
|
self.assertTrue(agent.router_info)
|
||||||
self.assertFalse(agent._resync_router.called)
|
self.assertFalse(agent._resync_router.called)
|
||||||
router_processor.fetched_and_processed.assert_called_once_with(
|
router_processor.fetched_and_processed.assert_called_once_with(
|
||||||
update.timestamp)
|
update.timestamp)
|
||||||
self.assertTrue(remove_router.called)
|
self.assertTrue(agent._safe_router_removed.called)
|
||||||
|
|
||||||
def test_process_routers_update_router_deleted_success(self):
|
def test_process_routers_update_router_deleted_success(self):
|
||||||
self._test_process_routers_update_router_deleted()
|
self._test_process_routers_update_router_deleted()
|
||||||
|
|
Loading…
Reference in New Issue