Don't give up when an Exception happens in idl.run

It's possible that idl.run() could have a bug where it raises an
Exception for an extended period of time while ovsdb-server is
down, but recover once ovsdb-server comes back up. Specifically,
python-ovs currently doesn't properly catch an exception when the
socket type is 'ssl' that it catches for other protocols.

Conflicts:
  ovsdbapp/backend/ovs_idl/connection.py

Change-Id: Ia068650d2db3d5d8642771a6df5a260d692aea20
Closes-Bug: #1895727
(cherry picked from commit 83cf7aa6c8)
(cherry picked from commit 4807809ba7)
This commit is contained in:
Terry Wilson 2020-09-15 13:42:08 -05:00
parent 9168ddcfa8
commit b49239d024
1 changed files with 15 additions and 9 deletions

View File

@ -15,6 +15,7 @@
import logging
import os
import threading
import time
import traceback
from ovs.db import idl
@ -93,9 +94,9 @@ class Connection(object):
while self._is_running:
# If we fail in an Idl call, we could have missed an update
# from the server, leaving us out of sync with ovsdb-server.
# It is not safe to continue without restarting the connection,
# though it is likely that the error is unrecoverable, so only try
# a few times before bailing completely.
# It is not safe to continue without restarting the connection.
# Though it is likely that the error is unrecoverable, keep trying
# indefinitely just in case.
try:
self.idl.wait(self.poller)
self.poller.fd_wait(self.txns.alert_fileno, poller.POLLIN)
@ -107,13 +108,18 @@ class Connection(object):
# in python-ovs
errors += 1
LOG.exception(e)
if errors <= 3:
with self.lock:
self.idl.force_reconnect()
with self.lock:
self.idl.force_reconnect()
try:
idlutils.wait_for_change(self.idl, self.timeout)
continue
self._is_running = False
break
except Exception as e:
# This could throw the same exception as idl.run()
# or Exception("timeout"), either way continue
LOG.exception(e)
sleep = min(2 ** errors, 60)
LOG.info("Trying to recover, sleeping %s seconds", sleep)
time.sleep(sleep)
continue
errors = 0
txn = self.txns.get_nowait()
if txn is not None: