From 3cd22c77cc4f2cdde4a695866d7778c680944888 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Wed, 4 Mar 2015 12:40:19 -0800 Subject: [PATCH] Retry on EAI_AGAIN name resolution failures There is no sane way to convince Ubuntu to start these services after name resolution is working (because sysv init is horribly broken on Ubuntu). Work around this by catching EAI_AGAIN errors during name resolution and retrying until we can resolve names. This logs each failed resolution attempt so that users are aware of the issue if investigating logs. Change-Id: If94d4f04d0e1cfedc358fd9d678a36fc9cd8aa7b --- files/log-gearman-client.py | 16 ++++++++++++++++ files/log-gearman-worker.py | 14 ++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/files/log-gearman-client.py b/files/log-gearman-client.py index 03aa038..56466bf 100644 --- a/files/log-gearman-client.py +++ b/files/log-gearman-client.py @@ -24,6 +24,7 @@ import os.path import re import signal import threading +import time import yaml import zmq @@ -164,10 +165,25 @@ class Server(object): self.processors.append(log_processor) self.processors.append(subunit_processor) + def wait_for_name_resolution(self, host, port): + while True: + try: + socket.getaddrinfo(host, port) + except socket.gaierror as e: + if e.errno == socket.EAI_AGAIN: + logging.debug("Temporary failure in name resolution") + time.sleep(2) + continue + else: + raise + break + def main(self): statsd_host = os.environ.get('STATSD_HOST') statsd_port = int(os.environ.get('STATSD_PORT', 8125)) statsd_prefix = os.environ.get('STATSD_PREFIX', 'logstash.geard') + if statsd_host: + self.wait_for_name_resolution(statsd_host, statsd_port) self.gearserver = gear.Server( statsd_host=statsd_host, statsd_port=statsd_port, diff --git a/files/log-gearman-worker.py b/files/log-gearman-worker.py index 66007cb..428ab83 100644 --- a/files/log-gearman-worker.py +++ b/files/log-gearman-worker.py @@ -359,9 +359,23 @@ class Server(object): logging.basicConfig(level=logging.CRITICAL) logging.debug("Log pusher starting.") + def wait_for_name_resolution(self, host, port): + while True: + try: + socket.getaddrinfo(host, port) + except socket.gaierror as e: + if e.errno == socket.EAI_AGAIN: + logging.debug("Temporary failure in name resolution") + time.sleep(2) + continue + else: + raise + break + def setup_retriever(self): hostname = socket.gethostname() gearman_worker = gear.Worker(hostname + b'-pusher') + self.wait_for_name_resolution(self.gearman_host, self.gearman_port) gearman_worker.addServer(self.gearman_host, self.gearman_port) gearman_worker.registerFunction(b'push-log')