
308 lines
10 KiB

# Copyright 2015 Canonical Ltd.
# This file is part of cloud-init. See LICENCE file for license information.
# vi: ts=4 expandtab
import time
from time import monotonic as now
except ImportError: # pragma: nocover
from time import time as now
import requests
from requests import adapters
from requests import exceptions
from requests import structures
# Arg, why does requests vendorize urllib3....
from requests.packages.urllib3 import util as urllib3_util
from six.moves.urllib.parse import quote as urlquote # noqa
from six.moves.urllib.parse import urlparse # noqa
from six.moves.urllib.parse import urlunparse # noqa
from six.moves.http_client import BAD_REQUEST as _BAD_REQUEST
from six.moves.http_client import CONFLICT # noqa
from six.moves.http_client import MULTIPLE_CHOICES as _MULTIPLE_CHOICES
from six.moves.http_client import OK
from cloudinit import logging
from cloudinit import version
import ssl as _ssl # noqa
except ImportError:
LOG = logging.getLogger(__name__)
def _get_base_url(url):
parsed_url = list(urlparse(url, scheme='http'))
parsed_url[2] = parsed_url[3] = parsed_url[4] = parsed_url[5] = ''
return urlunparse(parsed_url)
def _clean_url(url):
parsed_url = list(urlparse(url, scheme='http'))
if not parsed_url[1] and parsed_url[2]:
# Swap these since this seems to be a common
# occurrence when given urls like ''
parsed_url[1] = parsed_url[2]
parsed_url[2] = ''
return urlunparse(parsed_url)
class _Retry(urllib3_util.Retry):
def is_forced_retry(self, method, status_code):
# Allow >= 400 to be tried...
return status_code >= _BAD_REQUEST
def sleep(self):
# The base class doesn't have a way to log what we are doing,
# so replace it with one that does...
backoff = self.get_backoff_time()
if backoff <= 0:
LOG.debug("Please wait %s seconds while we wait to try again",
class RequestsResponse(object):
"""A wrapper for requests responses (that provides common functions).
This exists so that things like StringResponse or FileResponse can
also exist, but with different sources of their response (aka not
just from the requests library).
def __init__(self, response):
self._response = response
def contents(self):
return self._response.content
def url(self):
return self._response.url
def ok(self, redirects_ok=False):
if redirects_ok:
upper = _BAD_REQUEST
return self.status_code >= OK and self.status_code < upper
def headers(self):
return self._response.headers
def status_code(self):
return self._response.status_code
def __str__(self):
return self._response.text
class UrlError(IOError):
def __init__(self, cause, code=None, headers=None):
super(UrlError, self).__init__(str(cause))
self.cause = cause
self.status_code = code
self.headers = headers or {}
def _get_ssl_args(url, ssl_details):
ssl_args = {}
scheme = urlparse(url).scheme
if scheme == 'https' and ssl_details:
LOG.warn("SSL is not supported, "
"cert. verification can not occur!")
if 'ca_certs' in ssl_details and ssl_details['ca_certs']:
ssl_args['verify'] = ssl_details['ca_certs']
ssl_args['verify'] = True
if 'cert_file' in ssl_details and 'key_file' in ssl_details:
ssl_args['cert'] = [ssl_details['cert_file'],
elif 'cert_file' in ssl_details:
ssl_args['cert'] = str(ssl_details['cert_file'])
return ssl_args
def read_url(url, data=None, timeout=None, retries=0,
headers=None, ssl_details=None,
check_status=True, allow_redirects=True):
"""Fetch a url (or post to one) with the given options.
:param url: url to fetch
:param data:
any data to POST (this switches the request method to POST
instead of GET)
:param timeout: the timeout (in seconds) to wait for a response
:param headers: any headers to provide (and send along) in the request
:param ssl_details:
a dictionary containing any ssl settings, cert_file, ca_certs
and verify are valid entries (and they are only used when the
url provided is https)
:param check_status:
checks that the response status is OK after fetching (this
ensures a exception is raised on non-OK status codes)
:param allow_redirects: enables redirects (or disables them)
:param retries:
maximum number of retries to attempt when fetching the url and
the fetch fails
url = _clean_url(url)
request_args = {
'url': url,
request_args.update(_get_ssl_args(url, ssl_details))
request_args['allow_redirects'] = allow_redirects
request_args['method'] = 'GET'
if timeout is not None:
request_args['timeout'] = max(float(timeout), 0)
if data:
request_args['method'] = 'POST'
request_args['data'] = data
if not headers:
headers = structures.CaseInsensitiveDict()
headers = structures.CaseInsensitiveDict(headers)
if 'User-Agent' not in headers:
headers['User-Agent'] = 'Cloud-Init/%s' % (version.version_string())
request_args['headers'] = headers
session = requests.Session()
if retries:
retry = _Retry(total=max(int(retries), 0),
raise_on_redirect=not allow_redirects)
with session:
response = session.request(**request_args)
if check_status:
except exceptions.RequestException as e:
if e.response is not None:
raise UrlError(e, code=e.response.status_code,
raise UrlError(e)
LOG.debug("Read from %s (%s, %sb)", url, response.status_code,
return RequestsResponse(response)
def wait_any_url(urls, max_wait=None, timeout=None,
status_cb=None, sleep_time=1,
"""Wait for one of many urls to respond correctly.
:param urls: a list of urls to try
:param max_wait: roughly the maximum time to wait before giving up
:param timeout: the timeout provided to ``read_url``
:param status_cb:
call method with string message when a url is not available
:param exception_cb:
call method with 2 arguments 'msg' (per status_cb) and
'exception', the exception that occurred.
:param sleep_time: how long to sleep before trying each url again
The idea of this routine is to wait for the EC2 metdata service to
come up. On both Eucalyptus and EC2 we have seen the case where
the instance hit the MD before the MD service was up. EC2 seems
to have permenantely fixed this, though.
In openstack, the metadata service might be painfully slow, and
unable to avoid hitting a timeout of even up to 10 seconds or more
(LP: #894279) for a simple GET.
Offset those needs with the need to not hang forever (and block boot)
on a system where cloud-init is configured to look for EC2 Metadata
service but is not going to find one. It is possible that the instance
data host ( may be firewalled off Entirely for a sytem,
meaning that the connection will block forever unless a timeout is set.
This will return a tuple of the first url which succeeded and the
response object.
start_time = now()
def log_status_cb(msg, exc=None):
if not status_cb:
status_cb = log_status_cb
def timeup(max_wait, start_time):
current_time = now()
return ((max_wait <= 0 or max_wait is None) or
(current_time - start_time > max_wait))
loop_n = 0
while True:
# This makes a backoff with the following graph:
sleep_time = int(loop_n / 5) + 1
for url in urls:
current_time = now()
if loop_n != 0:
if timeup(max_wait, start_time):
if (timeout and
(current_time + timeout > (start_time + max_wait))):
# shorten timeout to not run way over max_time
timeout = int((start_time + max_wait) - current_time)
reason = ""
url_exc = None
response = read_url(url, timeout=timeout, check_status=False)
if not response.contents:
reason = "empty response [%s]" % (response.code)
url_exc = UrlError(ValueError(reason), code=response.code,
elif not response.ok():
reason = "bad status code [%s]" % (response.code)
url_exc = UrlError(ValueError(reason), code=response.code,
return url, response
except UrlError as e:
reason = "request error [%s]" % e
url_exc = e
except Exception as e:
reason = "unexpected error [%s]" % e
url_exc = e
current_time = now()
time_taken = int(current_time - start_time)
status_msg = "Calling '%s' failed [%s/%ss]: %s" % (url,
if exception_cb:
exception_cb(msg=status_msg, exception=url_exc)
if timeup(max_wait, start_time):
loop_n = loop_n + 1
LOG.debug("Please wait %s seconds while we wait to try again",
return None