Handle retry logic for timeouts with multiple LDAP servers

It is currently possible to specify multiple LDAP server URIs
for failover purposes when using LDAP connection pooling, as this
functionality is provided in the underlying python-ldap module.
Unfortunately, failover does not work properly if LDAP timeout
issue are encountered due to the way python-LDAP works.  If multiple
URLs are provided, the first URL that results in a successful TCP
connection is considered to be a successful LDAP connection.  If the
initial bind operation fails due to a timeout waiting for an LDAP
response from the server, it will never failover to additional
URIs.  It is easy to demonstrate this behavior by forcing an LDAP
server to hang (attach with gdb to halt the process), then using
that server as the first URI when creating a connection pool.

This patch adds proper failover logic to ldappool.  If multiple URIs
are provided, we split them and attempt to connect to them one-by-one
until we have either had a successful LDAP bind operation, or we have
exhausted the list of URIs.  The connection retry logic is processed
per-URI as well, meaning we will attempt to reconnect to the first
URI up to the requested retry limit, then we will failover to the
next URI and reset the retry count.

The ldap.TIMEOUT exception was not raised to the caller like some
of the other common LDAP exceptions we might encounter.  We should
raise the TIMEOUT exception instead of the more generic BackendError
exception to provide more detail to the calling code.

Change-Id: Iabc13363d2425e70a53163249e5389d336274533
This commit is contained in:
Nathan Kinder 2018-10-31 09:26:23 -07:00
parent a74cf70ff4
commit 3f0ea8533a
3 changed files with 199 additions and 36 deletions

View File

@ -51,6 +51,10 @@ Here are the options you can use when instanciating the pool:
- **use_pool**: activates the pool. If False, will recreate a connector
each time. **default: True**
The **uri** option will accept a comma or whitespace separated list of LDAP
server URIs to allow for failover behavior when connection errors are
encountered. Connections will be attempted against the servers in order,
with **retry_max** attempts per URI before failing over to the next server.
The **connection** method takes two options:

View File

@ -43,6 +43,7 @@ import time
import ldap
from ldap.ldapobject import ReconnectLDAPObject
import re
import six
from six import PY2
@ -239,50 +240,69 @@ class ConnectionManager(object):
:returns: StateConnector
:raises BackendError: If unable to connect to LDAP
"""
tries = 0
connected = False
if passwd is not None:
if PY2:
passwd = utf8_encode(passwd)
exc = None
conn = None
# trying retry_max times in a row with a fresh connector
while tries < self.retry_max and not connected:
try:
log.debug('Attempting to create a new connector '
'to %s (attempt %d)', self.uri, tries + 1)
conn = self.connector_cls(self.uri, retry_max=self.retry_max,
retry_delay=self.retry_delay)
conn.timeout = self.timeout
self._bind(conn, bind, passwd)
connected = True
except ldap.INVALID_CREDENTIALS as error:
exc = error
log.error('Invalid credentials. Cancelling retry',
exc_info=True)
break
except ldap.LDAPError as error:
exc = error
tries += 1
if tries < self.retry_max:
log.info('Failure attempting to create and bind '
'connector; will retry after %r seconds',
self.retry_delay, exc_info=True)
time.sleep(self.retry_delay)
else:
log.error('Failure attempting to create and bind '
'connector', exc_info=True)
# If multiple server URIs have been provided, loop through
# each one in turn in case of connection failures (server down,
# timeout, etc.). URIs can be delimited by either commas or
# whitespace.
for server in re.split('[\s,]+', self.uri):
tries = 0
exc = None
conn = None
# trying retry_max times in a row with a fresh connector
while tries < self.retry_max and not connected:
try:
log.debug('Attempting to create a new connector '
'to %s (attempt %d)', server, tries + 1)
conn = self.connector_cls(server, retry_max=self.retry_max,
retry_delay=self.retry_delay)
conn.timeout = self.timeout
self._bind(conn, bind, passwd)
connected = True
except ldap.INVALID_CREDENTIALS as error:
# Treat this as a hard failure instead of retrying to
# avoid locking out the LDAP account due to successive
# failed bind attempts. We also don't want to try
# connecting to additional servers if multiple URIs were
# provide, as failed bind attempts may be replicated
# across multiple LDAP servers.
exc = error
log.error('Invalid credentials. Cancelling retry',
exc_info=True)
raise exc
except ldap.LDAPError as error:
exc = error
tries += 1
if tries < self.retry_max:
log.info('Failure attempting to create and bind '
'connector; will retry after %r seconds',
self.retry_delay, exc_info=True)
time.sleep(self.retry_delay)
else:
log.error('Failure attempting to create and bind '
'connector', exc_info=True)
# We successfully connected to one of the servers, so
# we can just return the connection and stop processing
# any additional URIs.
if connected:
return conn
# We failed to connect to any of the servers,
# so raise an appropriate exception.
if not connected:
if isinstance(exc, (ldap.NO_SUCH_OBJECT,
ldap.INVALID_CREDENTIALS,
ldap.SERVER_DOWN)):
ldap.SERVER_DOWN,
ldap.TIMEOUT)):
raise exc
# that's something else
raise BackendError(str(exc), backend=conn)
return conn
# that's something else
raise BackendError(str(exc), backend=conn)
def _get_connection(self, bind=None, passwd=None):
if bind is None:

View File

@ -51,14 +51,51 @@ def _bind_fails(self, who='', cred='', **kw):
raise ldap.LDAPError('LDAP connection invalid')
def _bind_fails2(self, who='', cred='', **kw):
def _bind_fails_server_down(self, who='', cred='', **kw):
raise ldap.SERVER_DOWN('LDAP connection invalid')
def _bind_fails_server_down_failover(self, who='', cred='', **kw):
# Raise a server down error unless the URI is 'ldap://GOOD'
if self._uri == 'ldap://GOOD':
self.connected = True
self.who = who
self.cred = cred
return 1
else:
raise ldap.SERVER_DOWN('LDAP connection invalid')
def _bind_fails_timeout(self, who='', cred='', **kw):
raise ldap.TIMEOUT('LDAP connection timeout')
def _bind_fails_timeout_failover(self, who='', cred='', **kw):
# Raise a timeout error unless the URI is 'ldap://GOOD'
if self._uri == 'ldap://GOOD':
self.connected = True
self.who = who
self.cred = cred
return 1
else:
raise ldap.TIMEOUT('LDAP connection timeout')
def _bind_fails_invalid_credentials(self, who='', cred='', **kw):
raise ldap.INVALID_CREDENTIALS('LDAP connection invalid')
def _bind_fails_invalid_credentials_failover(self, who='', cred='', **kw):
# Raise invalid credentials erorr unless the URI is 'ldap://GOOD'
if self._uri == 'ldap://GOOD':
self.connected = True
self.who = who
self.cred = cred
return 1
else:
raise ldap.INVALID_CREDENTIALS('LDAP connection invalid')
def _start_tls_s(self):
if self.start_tls_already_called_flag:
raise ldap.LOCAL_ERROR
@ -146,7 +183,7 @@ class TestLDAPConnection(unittest.TestCase):
unbinds.append(1)
# the binding fails with an LDAPError
ldappool.StateConnector.simple_bind_s = _bind_fails2
ldappool.StateConnector.simple_bind_s = _bind_fails_server_down
ldappool.StateConnector.unbind_s = _unbind
uri = ''
dn = 'uid=adminuser,ou=logins,dc=mozilla'
@ -162,6 +199,80 @@ class TestLDAPConnection(unittest.TestCase):
else:
raise AssertionError()
def test_simple_bind_fails_failover(self):
unbinds = []
def _unbind(self):
unbinds.append(1)
# the binding to any server other than 'ldap://GOOD' fails
# with ldap.SERVER_DOWN
ldappool.StateConnector.simple_bind_s = \
_bind_fails_server_down_failover
ldappool.StateConnector.unbind_s = _unbind
uri = 'ldap://BAD,ldap://GOOD'
dn = 'uid=adminuser,ou=logins,dc=mozilla'
passwd = 'adminuser'
cm = ldappool.ConnectionManager(uri, dn, passwd, use_pool=True, size=2)
self.assertEqual(len(cm), 0)
try:
with cm.connection('dn', 'pass') as conn:
# Ensure we failed over to the second URI
self.assertTrue(conn.active)
self.assertEqual(conn._uri, 'ldap://GOOD')
pass
except Exception:
raise AssertionError()
def test_simple_bind_fails_timeout(self):
unbinds = []
def _unbind(self):
unbinds.append(1)
# the binding fails with ldap.TIMEOUT
ldappool.StateConnector.simple_bind_s = _bind_fails_timeout
ldappool.StateConnector.unbind_s = _unbind
uri = ''
dn = 'uid=adminuser,ou=logins,dc=mozilla'
passwd = 'adminuser'
cm = ldappool.ConnectionManager(uri, dn, passwd, use_pool=True, size=2)
self.assertEqual(len(cm), 0)
try:
with cm.connection('dn', 'pass'):
pass
except ldap.TIMEOUT:
pass
else:
raise AssertionError()
def test_simple_bind_fails_timeout_failover(self):
unbinds = []
def _unbind(self):
unbinds.append(1)
# the binding to any server other than 'ldap://GOOD' fails
# with ldap.TIMEOUT
ldappool.StateConnector.simple_bind_s = _bind_fails_timeout_failover
ldappool.StateConnector.unbind_s = _unbind
uri = 'ldap://BAD,ldap://GOOD'
dn = 'uid=adminuser,ou=logins,dc=mozilla'
passwd = 'adminuser'
cm = ldappool.ConnectionManager(uri, dn, passwd, use_pool=True, size=2)
self.assertEqual(len(cm), 0)
try:
with cm.connection('dn', 'pass') as conn:
# Ensure we failed over to the second URI
self.assertTrue(conn.active)
self.assertEqual(conn._uri, 'ldap://GOOD')
pass
except Exception:
raise AssertionError()
def test_simple_bind_fails_invalid_credentials(self):
unbinds = []
@ -184,3 +295,31 @@ class TestLDAPConnection(unittest.TestCase):
pass
else:
raise AssertionError()
def test_simple_bind_fails_invalid_credentials_failover(self):
unbinds = []
def _unbind(self):
unbinds.append(1)
# the binding to any server other than 'ldap://GOOD' fails
# with ldap.INVALID_CREDENTIALS
ldappool.StateConnector.simple_bind_s = \
_bind_fails_invalid_credentials_failover
ldappool.StateConnector.unbind_s = _unbind
uri = 'ldap://BAD,ldap://GOOD'
dn = 'uid=adminuser,ou=logins,dc=mozilla'
passwd = 'adminuser'
cm = ldappool.ConnectionManager(uri, dn, passwd, use_pool=True, size=2)
self.assertEqual(len(cm), 0)
try:
# We expect this to throw an INVALID_CREDENTIALS exception for the
# first URI, as this is a hard-failure where we don't want failover
# to occur to subsequent URIs.
with cm.connection('dn', 'pass'):
pass
except ldap.INVALID_CREDENTIALS:
pass
else:
raise AssertionError()