From 30a60d04f098581340f83b38b7a79104308c66bc Mon Sep 17 00:00:00 2001 From: Slawek Kaplonski Date: Thu, 1 Aug 2019 18:10:19 +0200 Subject: [PATCH] Add 3 retry attempts to initialize ovs agent In case when TimeoutException will be raised by ovsdbapp during initialization of neutron-ovs-agent, it will now try to create object of agents class 3 times before agent will be terminated. Such timeouts shouldn't happend usually but if for some reason it happens e.g. once, e.g. in CI job, it should be better to just retry and initialize this agent instead of left it dead on the node. Change-Id: I93e8d21d612e343479f26f8adc4477473579bab1 Closes-Bug: #1838563 --- .../openvswitch/agent/ovs_neutron_agent.py | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py b/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py index f3ab474b32b..2a0e8d2a4d8 100644 --- a/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py +++ b/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py @@ -45,6 +45,7 @@ from oslo_service import loopingcall from oslo_service import systemd from oslo_utils import netutils from osprofiler import profiler +from ovsdbapp import exceptions as ovs_exceptions import six from neutron._i18n import _ @@ -83,6 +84,8 @@ cfg.CONF.import_group('AGENT', 'neutron.plugins.ml2.drivers.openvswitch.' cfg.CONF.import_group('OVS', 'neutron.plugins.ml2.drivers.openvswitch.agent.' 'common.config') +INIT_MAX_TRIES = 3 + class _mac_mydialect(netaddr.mac_unix): word_fmt = '%.2x' @@ -2568,10 +2571,22 @@ def main(bridge_classes): validate_tunnel_config(cfg.CONF.AGENT.tunnel_types, cfg.CONF.OVS.local_ip) - try: - agent = OVSNeutronAgent(bridge_classes, ext_mgr, cfg.CONF) - capabilities.notify_init_event(n_const.AGENT_TYPE_OVS, agent) - except (RuntimeError, ValueError) as e: - LOG.error("%s Agent terminated!", e) - sys.exit(1) + init_try = 1 + while True: + try: + agent = OVSNeutronAgent(bridge_classes, ext_mgr, cfg.CONF) + capabilities.notify_init_event(n_const.AGENT_TYPE_OVS, agent) + break + except ovs_exceptions.TimeoutException as e: + if init_try < INIT_MAX_TRIES: + LOG.warning("Ovsdb command timeout!") + init_try += 1 + else: + LOG.error("%(err)s agent terminated after %(attempts)s " + "initialization attempts!", + {'err': e, 'attempts': init_try}) + sys.exit(1) + except (RuntimeError, ValueError) as e: + LOG.error("%s agent terminated!", e) + sys.exit(1) agent.daemon_loop()