Retry on EAI_AGAIN name resolution failures

There is no sane way to convince Ubuntu to start these services after
name resolution is working (because sysv init is horribly broken on
Ubuntu). Work around this by catching EAI_AGAIN errors during name
resolution and retrying until we can resolve names.

This logs each failed resolution attempt so that users are aware of the
issue if investigating logs.

Change-Id: If94d4f04d0e1cfedc358fd9d678a36fc9cd8aa7b
This commit is contained in:
Clark Boylan 2015-03-04 12:40:19 -08:00
parent 158a75ac6e
commit e397381251
2 changed files with 30 additions and 0 deletions

View File

@ -24,6 +24,7 @@ import os.path
import re
import signal
import threading
import time
import yaml
import zmq
@ -164,10 +165,25 @@ class Server(object):
self.processors.append(log_processor)
self.processors.append(subunit_processor)
def wait_for_name_resolution(self, host, port):
while True:
try:
socket.getaddrinfo(host, port)
except socket.gaierror as e:
if e.errno == socket.EAI_AGAIN:
logging.debug("Temporary failure in name resolution")
time.sleep(2)
continue
else:
raise
break
def main(self):
statsd_host = os.environ.get('STATSD_HOST')
statsd_port = int(os.environ.get('STATSD_PORT', 8125))
statsd_prefix = os.environ.get('STATSD_PREFIX', 'logstash.geard')
if statsd_host:
self.wait_for_name_resolution(statsd_host, statsd_port)
self.gearserver = gear.Server(
statsd_host=statsd_host,
statsd_port=statsd_port,

View File

@ -359,9 +359,23 @@ class Server(object):
logging.basicConfig(level=logging.CRITICAL)
logging.debug("Log pusher starting.")
def wait_for_name_resolution(self, host, port):
while True:
try:
socket.getaddrinfo(host, port)
except socket.gaierror as e:
if e.errno == socket.EAI_AGAIN:
logging.debug("Temporary failure in name resolution")
time.sleep(2)
continue
else:
raise
break
def setup_retriever(self):
hostname = socket.gethostname()
gearman_worker = gear.Worker(hostname + b'-pusher')
self.wait_for_name_resolution(self.gearman_host, self.gearman_port)
gearman_worker.addServer(self.gearman_host,
self.gearman_port)
gearman_worker.registerFunction(b'push-log')