Avoid router ri.process if initialize() fails

When router_info initialize() fails(with trace) some resources(
like keepalived process) may not be created. While handling this
exception, l3 agent calls _process_updated_router instead of
again calling _process_added_router, which also fails trying to
access resources which are not created.

In this change, agent will have new router_info(i.e
self.router_info[router_id] = ri) only when initialize() succeeds.
When initialize() fails, as router_info is not part of agent,
"_process_router_if_compatible" will again call initialize().
We also cleanup router_info when initialize() fails.

Closes-bug: #1662804
Change-Id: I278ac83de57713c93d6e50846d79034d774c5d47
This commit is contained in:
venkata anil 2017-02-08 15:49:47 +00:00
parent f6546bcb1d
commit 3e1ed94e38
3 changed files with 68 additions and 6 deletions

View File

@ -343,7 +343,20 @@ class L3NATAgent(ha.AgentMixin,
self.router_info[router_id] = ri
ri.initialize(self.process_monitor)
# If initialize() fails, cleanup and retrigger complete sync
try:
ri.initialize(self.process_monitor)
except Exception:
with excutils.save_and_reraise_exception():
del self.router_info[router_id]
LOG.exception(_LE('Error while initializing router %s'),
router_id)
self.namespaces_manager.ensure_router_cleanup(router_id)
try:
ri.delete()
except Exception:
LOG.exception(_LE('Error while deleting router %s'),
router_id)
def _safe_router_removed(self, router_id):
"""Try to delete a router and return True if successful."""

View File

@ -20,7 +20,7 @@ import netaddr
from neutron_lib import constants as n_consts
from oslo_log import log as logging
from neutron._i18n import _LE
from neutron._i18n import _, _LE
from neutron.agent.l3 import namespaces
from neutron.agent.l3 import router_info as router
from neutron.agent.linux import external_process
@ -105,12 +105,13 @@ class HaRouter(router.RouterInfo):
return False
def initialize(self, process_monitor):
super(HaRouter, self).initialize(process_monitor)
ha_port = self.router.get(n_consts.HA_INTERFACE_KEY)
if not ha_port:
LOG.error(_LE('Unable to process HA router %s without HA port'),
self.router_id)
return
msg = _("Unable to process HA router %s without "
"HA port") % self.router_id
LOG.exception(msg)
raise Exception(msg)
super(HaRouter, self).initialize(process_monitor)
self.ha_port = ha_port
self._init_keepalived_manager(process_monitor)

View File

@ -1715,6 +1715,54 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
mock_update_fip_status.assert_called_once_with(
mock.ANY, ri.router_id, {fip2['id']: 'ACTIVE'})
@mock.patch.object(l3_agent.LOG, 'exception')
def _retrigger_initialize(self, log_exception, delete_fail=False):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
router = {'id': _uuid(),
'external_gateway_info': {'network_id': 'aaa'}}
self.plugin_api.get_routers.return_value = [router]
update = router_processing_queue.RouterUpdate(
router['id'],
router_processing_queue.PRIORITY_SYNC_ROUTERS_TASK,
router=router,
timestamp=timeutils.utcnow())
agent._queue.add(update)
ri = legacy_router.LegacyRouter(agent, router['id'], router,
**self.ri_kwargs)
calls = [mock.call('Error while initializing router %s',
router['id'])]
if delete_fail:
# if delete fails, then also retrigger initialize
ri.delete = mock.Mock(side_effect=RuntimeError())
calls.append(
mock.call('Error while deleting router %s',
router['id']))
else:
ri.delete = mock.Mock()
calls.append(
mock.call('Failed to process compatible router: %s' %
router['id']))
ri.process = mock.Mock()
ri.initialize = mock.Mock(side_effect=RuntimeError())
agent._create_router = mock.Mock(return_value=ri)
agent._process_router_update()
log_exception.assert_has_calls(calls)
ri.initialize.side_effect = None
agent._process_router_update()
self.assertTrue(ri.delete.called)
self.assertEqual(2, ri.initialize.call_count)
self.assertEqual(2, agent._create_router.call_count)
self.assertEqual(1, ri.process.call_count)
self.assertIn(ri.router_id, agent.router_info)
def test_initialize_fail_retrigger_initialize(self):
self._retrigger_initialize()
def test_initialize_and_delete_fail_retrigger_initialize(self):
self._retrigger_initialize(delete_fail=True)
def test_process_router_floatingip_status_update_if_processed(self):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
router = l3_test_common.prepare_router_data(num_internal_ports=1)