diff --git a/novajoin/config.py b/novajoin/config.py index b62876c..c81426a 100644 --- a/novajoin/config.py +++ b/novajoin/config.py @@ -32,9 +32,14 @@ service_opts = [ help='Kerberos client keytab file'), cfg.StrOpt('domain', default=None, help='Domain for new hosts'), - cfg.IntOpt('connect_retries', default=2, + cfg.IntOpt('connect_retries', default=4, help='How many times to attempt to retry ' 'the connection to IPA before giving up'), + cfg.IntOpt('connect_backoff', default=0, + help='Initial number of seconds to backoff before ' + 'retrying the connection to IPA. The backoff ' + 'time is doubled after each retry. A value ' + 'of 0 disables backoff'), cfg.BoolOpt('project_subdomain', default=False, help='Treat the project as a DNS subdomain ' 'so a hostname would take the form: ' diff --git a/novajoin/exception.py b/novajoin/exception.py index d02c9e2..e363e2a 100644 --- a/novajoin/exception.py +++ b/novajoin/exception.py @@ -153,3 +153,7 @@ class NotificationVersionMismatch(JoinException): message = ("Provided notification version " "%(provided_maj)s.%(provided_min)s did not match expected " "%(expected_maj)s.%(expected_min)s for %(type)s") + + +class IPAConnectionError(JoinException): + message = "Unable to connect to IPA after %(tries) tries" diff --git a/novajoin/ipa.py b/novajoin/ipa.py index e4628d0..780c86f 100644 --- a/novajoin/ipa.py +++ b/novajoin/ipa.py @@ -41,6 +41,7 @@ if ipalib_imported: except ImportError: ipalib_imported = False +from novajoin import exception from novajoin.util import get_domain from oslo_config import cfg from oslo_log import log as logging @@ -54,13 +55,12 @@ LOG = logging.getLogger(__name__) class IPANovaJoinBase(object): - def __init__(self, backoff=0): - try: - self.ntries = CONF.connect_retries - except cfg.NoSuchOptError: - self.ntries = 1 + def __init__(self): if not ipalib_imported: return + + self.ntries = CONF.connect_retries + self.initial_backoff = CONF.connect_backoff self.ccache = "MEMORY:" + str(uuid.uuid4()) os.environ['KRB5CCNAME'] = self.ccache if self._ipa_client_configured() and not api.isdone('finalize'): @@ -70,7 +70,7 @@ class IPANovaJoinBase(object): api.bootstrap(context='novajoin') api.finalize() self.batch_args = list() - self.backoff = backoff + self.backoff = self.initial_backoff def split_principal(self, principal): """Split a principal into its components. Copied from IPA 4.0.0""" @@ -129,16 +129,20 @@ class IPANovaJoinBase(object): def __backoff(self): LOG.debug("Backing off %s seconds", self.backoff) time.sleep(self.backoff) - if self.backoff < 1024: + if self.backoff < 512: self.backoff = self.backoff * 2 + def __reset_backoff(self): + if self.backoff > self.initial_backoff: + LOG.debug("Resetting backoff to %d", self.initial_backoff) + self.backoff = self.initial_backoff + def __get_connection(self): """Make a connection to IPA or raise an error.""" tries = 0 - while (tries <= self.ntries) or (self.backoff > 0): - if self.backoff == 0: - LOG.debug("Attempt %d of %d", tries, self.ntries) + while (tries <= self.ntries): + LOG.debug("Attempt %d of %d", tries, self.ntries) if api.Backend.rpcclient.isconnected(): api.Backend.rpcclient.disconnect() try: @@ -149,6 +153,7 @@ class IPANovaJoinBase(object): except (errors.CCacheError, errors.TicketExpired, errors.KerberosError) as e: + tries += 1 LOG.debug("kinit again: %s", e) # pylint: disable=no-member try: @@ -158,9 +163,8 @@ class IPANovaJoinBase(object): self.ccache) except GSSError as e: LOG.debug("kinit failed: %s", e) - if tries > 0 and self.backoff: - self.__backoff() - tries += 1 + if self.backoff: + self.__backoff() except errors.NetworkError: tries += 1 if self.backoff: @@ -173,8 +177,13 @@ class IPANovaJoinBase(object): if self.backoff: self.__backoff() else: + # successful connection + self.__reset_backoff() return + LOG.error("Failed to connect to IPA after %d attempts", self.ntries) + raise exception.IPAConnectionError(tries=self.ntries) + def start_batch_operation(self): """Start a batch operation. diff --git a/novajoin/notifications.py b/novajoin/notifications.py index 8419061..dfdd62d 100644 --- a/novajoin/notifications.py +++ b/novajoin/notifications.py @@ -41,11 +41,9 @@ CONF = config.CONF LOG = logging.getLogger(__name__) -BACKOFF = 2 - def ipaclient(): - return IPAClient(backoff=BACKOFF) + return IPAClient() def novaclient(): @@ -194,9 +192,14 @@ class NotificationEndpoint(object): return LOG.info("Delete host %s (%s)", instance_id, hostname) - ipa = ipaclient() - ipa.delete_host(hostname, {}) - self.delete_subhosts(ipa, hostname_short, payload_metadata) + try: + ipa = ipaclient() + ipa.delete_host(hostname, {}) + self.delete_subhosts(ipa, hostname_short, payload_metadata) + except exception.IPAConnectionError: + LOG.error("IPA Connection Error when deleting host %s (%s). " + "Manual cleanup may be required in the IPA server.", + instance_id, hostname) @event_handlers('network.floating_ip.associate') def floaitng_ip_associate(self, payload): diff --git a/novajoin/releasenotes/notes/fix-backoff-mechanism-c681255215624413.yaml b/novajoin/releasenotes/notes/fix-backoff-mechanism-c681255215624413.yaml new file mode 100644 index 0000000..fe518f0 --- /dev/null +++ b/novajoin/releasenotes/notes/fix-backoff-mechanism-c681255215624413.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - Fix the retry backoff mechanism. With the current mechanism, if a backoff + is set, we will retry ad infinitum, leading to inconsistent results when + a deletion can happen after the instance as been recreated. With the new + mechanism, retries will occur a maximum number of times, with or without + backoff. A new parameter (connect_backoff, default=0) is added to specify + the initial backoff duration. Also, the default for the parameter for the + number of retries (connect_retries) has been increased from 2 to 4.