From 3f0ea8533a9144b95c7ea24a940a0ccea211108b Mon Sep 17 00:00:00 2001 From: Nathan Kinder Date: Wed, 31 Oct 2018 09:26:23 -0700 Subject: [PATCH] Handle retry logic for timeouts with multiple LDAP servers It is currently possible to specify multiple LDAP server URIs for failover purposes when using LDAP connection pooling, as this functionality is provided in the underlying python-ldap module. Unfortunately, failover does not work properly if LDAP timeout issue are encountered due to the way python-LDAP works. If multiple URLs are provided, the first URL that results in a successful TCP connection is considered to be a successful LDAP connection. If the initial bind operation fails due to a timeout waiting for an LDAP response from the server, it will never failover to additional URIs. It is easy to demonstrate this behavior by forcing an LDAP server to hang (attach with gdb to halt the process), then using that server as the first URI when creating a connection pool. This patch adds proper failover logic to ldappool. If multiple URIs are provided, we split them and attempt to connect to them one-by-one until we have either had a successful LDAP bind operation, or we have exhausted the list of URIs. The connection retry logic is processed per-URI as well, meaning we will attempt to reconnect to the first URI up to the requested retry limit, then we will failover to the next URI and reset the retry count. The ldap.TIMEOUT exception was not raised to the caller like some of the other common LDAP exceptions we might encounter. We should raise the TIMEOUT exception instead of the more generic BackendError exception to provide more detail to the calling code. Change-Id: Iabc13363d2425e70a53163249e5389d336274533 --- README.rst | 4 + ldappool/__init__.py | 88 ++++++++++------ ldappool/tests/test_ldapconnection.py | 143 +++++++++++++++++++++++++- 3 files changed, 199 insertions(+), 36 deletions(-) diff --git a/README.rst b/README.rst index f0fbad0..33ab65b 100644 --- a/README.rst +++ b/README.rst @@ -51,6 +51,10 @@ Here are the options you can use when instanciating the pool: - **use_pool**: activates the pool. If False, will recreate a connector each time. **default: True** +The **uri** option will accept a comma or whitespace separated list of LDAP +server URIs to allow for failover behavior when connection errors are +encountered. Connections will be attempted against the servers in order, +with **retry_max** attempts per URI before failing over to the next server. The **connection** method takes two options: diff --git a/ldappool/__init__.py b/ldappool/__init__.py index 0428ff1..348c0dd 100644 --- a/ldappool/__init__.py +++ b/ldappool/__init__.py @@ -43,6 +43,7 @@ import time import ldap from ldap.ldapobject import ReconnectLDAPObject +import re import six from six import PY2 @@ -239,50 +240,69 @@ class ConnectionManager(object): :returns: StateConnector :raises BackendError: If unable to connect to LDAP """ - tries = 0 connected = False if passwd is not None: if PY2: passwd = utf8_encode(passwd) - exc = None - conn = None - # trying retry_max times in a row with a fresh connector - while tries < self.retry_max and not connected: - try: - log.debug('Attempting to create a new connector ' - 'to %s (attempt %d)', self.uri, tries + 1) - conn = self.connector_cls(self.uri, retry_max=self.retry_max, - retry_delay=self.retry_delay) - conn.timeout = self.timeout - self._bind(conn, bind, passwd) - connected = True - except ldap.INVALID_CREDENTIALS as error: - exc = error - log.error('Invalid credentials. Cancelling retry', - exc_info=True) - break - except ldap.LDAPError as error: - exc = error - tries += 1 - if tries < self.retry_max: - log.info('Failure attempting to create and bind ' - 'connector; will retry after %r seconds', - self.retry_delay, exc_info=True) - time.sleep(self.retry_delay) - else: - log.error('Failure attempting to create and bind ' - 'connector', exc_info=True) + # If multiple server URIs have been provided, loop through + # each one in turn in case of connection failures (server down, + # timeout, etc.). URIs can be delimited by either commas or + # whitespace. + for server in re.split('[\s,]+', self.uri): + tries = 0 + exc = None + conn = None + # trying retry_max times in a row with a fresh connector + while tries < self.retry_max and not connected: + try: + log.debug('Attempting to create a new connector ' + 'to %s (attempt %d)', server, tries + 1) + conn = self.connector_cls(server, retry_max=self.retry_max, + retry_delay=self.retry_delay) + conn.timeout = self.timeout + self._bind(conn, bind, passwd) + connected = True + except ldap.INVALID_CREDENTIALS as error: + # Treat this as a hard failure instead of retrying to + # avoid locking out the LDAP account due to successive + # failed bind attempts. We also don't want to try + # connecting to additional servers if multiple URIs were + # provide, as failed bind attempts may be replicated + # across multiple LDAP servers. + exc = error + log.error('Invalid credentials. Cancelling retry', + exc_info=True) + raise exc + except ldap.LDAPError as error: + exc = error + tries += 1 + if tries < self.retry_max: + log.info('Failure attempting to create and bind ' + 'connector; will retry after %r seconds', + self.retry_delay, exc_info=True) + time.sleep(self.retry_delay) + else: + log.error('Failure attempting to create and bind ' + 'connector', exc_info=True) + + # We successfully connected to one of the servers, so + # we can just return the connection and stop processing + # any additional URIs. + if connected: + return conn + + # We failed to connect to any of the servers, + # so raise an appropriate exception. if not connected: if isinstance(exc, (ldap.NO_SUCH_OBJECT, - ldap.INVALID_CREDENTIALS, - ldap.SERVER_DOWN)): + ldap.SERVER_DOWN, + ldap.TIMEOUT)): raise exc - # that's something else - raise BackendError(str(exc), backend=conn) - return conn + # that's something else + raise BackendError(str(exc), backend=conn) def _get_connection(self, bind=None, passwd=None): if bind is None: diff --git a/ldappool/tests/test_ldapconnection.py b/ldappool/tests/test_ldapconnection.py index 46d033c..4c61518 100644 --- a/ldappool/tests/test_ldapconnection.py +++ b/ldappool/tests/test_ldapconnection.py @@ -51,14 +51,51 @@ def _bind_fails(self, who='', cred='', **kw): raise ldap.LDAPError('LDAP connection invalid') -def _bind_fails2(self, who='', cred='', **kw): +def _bind_fails_server_down(self, who='', cred='', **kw): raise ldap.SERVER_DOWN('LDAP connection invalid') +def _bind_fails_server_down_failover(self, who='', cred='', **kw): + # Raise a server down error unless the URI is 'ldap://GOOD' + if self._uri == 'ldap://GOOD': + self.connected = True + self.who = who + self.cred = cred + return 1 + else: + raise ldap.SERVER_DOWN('LDAP connection invalid') + + +def _bind_fails_timeout(self, who='', cred='', **kw): + raise ldap.TIMEOUT('LDAP connection timeout') + + +def _bind_fails_timeout_failover(self, who='', cred='', **kw): + # Raise a timeout error unless the URI is 'ldap://GOOD' + if self._uri == 'ldap://GOOD': + self.connected = True + self.who = who + self.cred = cred + return 1 + else: + raise ldap.TIMEOUT('LDAP connection timeout') + + def _bind_fails_invalid_credentials(self, who='', cred='', **kw): raise ldap.INVALID_CREDENTIALS('LDAP connection invalid') +def _bind_fails_invalid_credentials_failover(self, who='', cred='', **kw): + # Raise invalid credentials erorr unless the URI is 'ldap://GOOD' + if self._uri == 'ldap://GOOD': + self.connected = True + self.who = who + self.cred = cred + return 1 + else: + raise ldap.INVALID_CREDENTIALS('LDAP connection invalid') + + def _start_tls_s(self): if self.start_tls_already_called_flag: raise ldap.LOCAL_ERROR @@ -146,7 +183,7 @@ class TestLDAPConnection(unittest.TestCase): unbinds.append(1) # the binding fails with an LDAPError - ldappool.StateConnector.simple_bind_s = _bind_fails2 + ldappool.StateConnector.simple_bind_s = _bind_fails_server_down ldappool.StateConnector.unbind_s = _unbind uri = '' dn = 'uid=adminuser,ou=logins,dc=mozilla' @@ -162,6 +199,80 @@ class TestLDAPConnection(unittest.TestCase): else: raise AssertionError() + def test_simple_bind_fails_failover(self): + unbinds = [] + + def _unbind(self): + unbinds.append(1) + + # the binding to any server other than 'ldap://GOOD' fails + # with ldap.SERVER_DOWN + ldappool.StateConnector.simple_bind_s = \ + _bind_fails_server_down_failover + ldappool.StateConnector.unbind_s = _unbind + uri = 'ldap://BAD,ldap://GOOD' + dn = 'uid=adminuser,ou=logins,dc=mozilla' + passwd = 'adminuser' + cm = ldappool.ConnectionManager(uri, dn, passwd, use_pool=True, size=2) + self.assertEqual(len(cm), 0) + + try: + with cm.connection('dn', 'pass') as conn: + # Ensure we failed over to the second URI + self.assertTrue(conn.active) + self.assertEqual(conn._uri, 'ldap://GOOD') + pass + except Exception: + raise AssertionError() + + def test_simple_bind_fails_timeout(self): + unbinds = [] + + def _unbind(self): + unbinds.append(1) + + # the binding fails with ldap.TIMEOUT + ldappool.StateConnector.simple_bind_s = _bind_fails_timeout + ldappool.StateConnector.unbind_s = _unbind + uri = '' + dn = 'uid=adminuser,ou=logins,dc=mozilla' + passwd = 'adminuser' + cm = ldappool.ConnectionManager(uri, dn, passwd, use_pool=True, size=2) + self.assertEqual(len(cm), 0) + + try: + with cm.connection('dn', 'pass'): + pass + except ldap.TIMEOUT: + pass + else: + raise AssertionError() + + def test_simple_bind_fails_timeout_failover(self): + unbinds = [] + + def _unbind(self): + unbinds.append(1) + + # the binding to any server other than 'ldap://GOOD' fails + # with ldap.TIMEOUT + ldappool.StateConnector.simple_bind_s = _bind_fails_timeout_failover + ldappool.StateConnector.unbind_s = _unbind + uri = 'ldap://BAD,ldap://GOOD' + dn = 'uid=adminuser,ou=logins,dc=mozilla' + passwd = 'adminuser' + cm = ldappool.ConnectionManager(uri, dn, passwd, use_pool=True, size=2) + self.assertEqual(len(cm), 0) + + try: + with cm.connection('dn', 'pass') as conn: + # Ensure we failed over to the second URI + self.assertTrue(conn.active) + self.assertEqual(conn._uri, 'ldap://GOOD') + pass + except Exception: + raise AssertionError() + def test_simple_bind_fails_invalid_credentials(self): unbinds = [] @@ -184,3 +295,31 @@ class TestLDAPConnection(unittest.TestCase): pass else: raise AssertionError() + + def test_simple_bind_fails_invalid_credentials_failover(self): + unbinds = [] + + def _unbind(self): + unbinds.append(1) + + # the binding to any server other than 'ldap://GOOD' fails + # with ldap.INVALID_CREDENTIALS + ldappool.StateConnector.simple_bind_s = \ + _bind_fails_invalid_credentials_failover + ldappool.StateConnector.unbind_s = _unbind + uri = 'ldap://BAD,ldap://GOOD' + dn = 'uid=adminuser,ou=logins,dc=mozilla' + passwd = 'adminuser' + cm = ldappool.ConnectionManager(uri, dn, passwd, use_pool=True, size=2) + self.assertEqual(len(cm), 0) + + try: + # We expect this to throw an INVALID_CREDENTIALS exception for the + # first URI, as this is a hard-failure where we don't want failover + # to occur to subsequent URIs. + with cm.connection('dn', 'pass'): + pass + except ldap.INVALID_CREDENTIALS: + pass + else: + raise AssertionError()