Avoid router ri.process if initialize() fails
When router_info initialize() fails(with trace) some resources( like keepalived process) may not be created. While handling this exception, l3 agent calls _process_updated_router instead of again calling _process_added_router, which also fails trying to access resources which are not created. In this change, agent will have new router_info(i.e self.router_info[router_id] = ri) only when initialize() succeeds. When initialize() fails, as router_info is not part of agent, "_process_router_if_compatible" will again call initialize(). We also cleanup router_info when initialize() fails. Closes-bug: #1662804 Change-Id: I278ac83de57713c93d6e50846d79034d774c5d47
This commit is contained in:
parent
f6546bcb1d
commit
3e1ed94e38
neutron
@ -343,7 +343,20 @@ class L3NATAgent(ha.AgentMixin,
|
||||
|
||||
self.router_info[router_id] = ri
|
||||
|
||||
ri.initialize(self.process_monitor)
|
||||
# If initialize() fails, cleanup and retrigger complete sync
|
||||
try:
|
||||
ri.initialize(self.process_monitor)
|
||||
except Exception:
|
||||
with excutils.save_and_reraise_exception():
|
||||
del self.router_info[router_id]
|
||||
LOG.exception(_LE('Error while initializing router %s'),
|
||||
router_id)
|
||||
self.namespaces_manager.ensure_router_cleanup(router_id)
|
||||
try:
|
||||
ri.delete()
|
||||
except Exception:
|
||||
LOG.exception(_LE('Error while deleting router %s'),
|
||||
router_id)
|
||||
|
||||
def _safe_router_removed(self, router_id):
|
||||
"""Try to delete a router and return True if successful."""
|
||||
|
@ -20,7 +20,7 @@ import netaddr
|
||||
from neutron_lib import constants as n_consts
|
||||
from oslo_log import log as logging
|
||||
|
||||
from neutron._i18n import _LE
|
||||
from neutron._i18n import _, _LE
|
||||
from neutron.agent.l3 import namespaces
|
||||
from neutron.agent.l3 import router_info as router
|
||||
from neutron.agent.linux import external_process
|
||||
@ -105,12 +105,13 @@ class HaRouter(router.RouterInfo):
|
||||
return False
|
||||
|
||||
def initialize(self, process_monitor):
|
||||
super(HaRouter, self).initialize(process_monitor)
|
||||
ha_port = self.router.get(n_consts.HA_INTERFACE_KEY)
|
||||
if not ha_port:
|
||||
LOG.error(_LE('Unable to process HA router %s without HA port'),
|
||||
self.router_id)
|
||||
return
|
||||
msg = _("Unable to process HA router %s without "
|
||||
"HA port") % self.router_id
|
||||
LOG.exception(msg)
|
||||
raise Exception(msg)
|
||||
super(HaRouter, self).initialize(process_monitor)
|
||||
|
||||
self.ha_port = ha_port
|
||||
self._init_keepalived_manager(process_monitor)
|
||||
|
@ -1715,6 +1715,54 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
|
||||
mock_update_fip_status.assert_called_once_with(
|
||||
mock.ANY, ri.router_id, {fip2['id']: 'ACTIVE'})
|
||||
|
||||
@mock.patch.object(l3_agent.LOG, 'exception')
|
||||
def _retrigger_initialize(self, log_exception, delete_fail=False):
|
||||
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||
router = {'id': _uuid(),
|
||||
'external_gateway_info': {'network_id': 'aaa'}}
|
||||
self.plugin_api.get_routers.return_value = [router]
|
||||
update = router_processing_queue.RouterUpdate(
|
||||
router['id'],
|
||||
router_processing_queue.PRIORITY_SYNC_ROUTERS_TASK,
|
||||
router=router,
|
||||
timestamp=timeutils.utcnow())
|
||||
agent._queue.add(update)
|
||||
|
||||
ri = legacy_router.LegacyRouter(agent, router['id'], router,
|
||||
**self.ri_kwargs)
|
||||
calls = [mock.call('Error while initializing router %s',
|
||||
router['id'])]
|
||||
if delete_fail:
|
||||
# if delete fails, then also retrigger initialize
|
||||
ri.delete = mock.Mock(side_effect=RuntimeError())
|
||||
calls.append(
|
||||
mock.call('Error while deleting router %s',
|
||||
router['id']))
|
||||
else:
|
||||
ri.delete = mock.Mock()
|
||||
calls.append(
|
||||
mock.call('Failed to process compatible router: %s' %
|
||||
router['id']))
|
||||
ri.process = mock.Mock()
|
||||
ri.initialize = mock.Mock(side_effect=RuntimeError())
|
||||
agent._create_router = mock.Mock(return_value=ri)
|
||||
agent._process_router_update()
|
||||
log_exception.assert_has_calls(calls)
|
||||
|
||||
ri.initialize.side_effect = None
|
||||
agent._process_router_update()
|
||||
self.assertTrue(ri.delete.called)
|
||||
self.assertEqual(2, ri.initialize.call_count)
|
||||
self.assertEqual(2, agent._create_router.call_count)
|
||||
self.assertEqual(1, ri.process.call_count)
|
||||
self.assertIn(ri.router_id, agent.router_info)
|
||||
|
||||
def test_initialize_fail_retrigger_initialize(self):
|
||||
self._retrigger_initialize()
|
||||
|
||||
def test_initialize_and_delete_fail_retrigger_initialize(self):
|
||||
self._retrigger_initialize(delete_fail=True)
|
||||
|
||||
def test_process_router_floatingip_status_update_if_processed(self):
|
||||
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||
router = l3_test_common.prepare_router_data(num_internal_ports=1)
|
||||
|
Loading…
x
Reference in New Issue
Block a user