Bring over the 'url_helper' module from bzr

This module is quite useful for fetching urls in a manner
that can be provided custom headers, retries and ssl_args
so let's ensure we have it available for usage under these
circumstances.

Also adds some basic sanity tests so that it will continue
to work as expected.

Change-Id: I4d5e777cf530e822bbea69c76846ebebb48a7927
This commit is contained in:
Joshua Harlow 2015-04-02 12:04:50 -07:00
parent 3e391d6032
commit 911ada7bba
6 changed files with 436 additions and 1 deletions

21
cloudinit/test.py Normal file
View File

@ -0,0 +1,21 @@
# Copyright 2015 Canonical Ltd.
# This file is part of cloud-init. See LICENCE file for license information.
#
# vi: ts=4 expandtab
import httpretty
import testtools
class TestCase(testtools.TestCase):
"""Base class for all cloud-init test cases."""
def setUp(self):
super(TestCase, self).setUp()
# Do not allow any unknown network connections to get triggered...
httpretty.HTTPretty.allow_net_connect = False
def tearDown(self):
super(TestCase, self).tearDown()
# Ok allow it again....
httpretty.HTTPretty.allow_net_connect = True

View File

@ -0,0 +1,102 @@
# Copyright 2015 Canonical Ltd.
# This file is part of cloud-init. See LICENCE file for license information.
#
# vi: ts=4 expandtab
import httpretty
from cloudinit import test
from cloudinit import url_helper
class UrlHelperWaitForUrlsTest(test.TestCase):
@httpretty.activate
def test_url_wait_for(self):
urls_actions = [
("http://www.yahoo.com", (False, False, True)),
("http://www.google.com", (False, False, False)),
]
urls = []
for (url, actions) in urls_actions:
urls.append(url)
for worked in actions:
if worked:
httpretty.register_uri(httpretty.GET,
url, body=b'it worked!')
else:
httpretty.register_uri(httpretty.GET,
url, body=b'no worky',
status=400)
url = url_helper.wait_any_url(urls)
self.assertEqual("http://www.yahoo.com", url)
@httpretty.activate
def test_url_wait_for_no_work(self):
def request_callback(request, uri, headers):
return (400, headers, b"no worky")
urls = [
"http://www.yahoo.com",
"http://www.google.com",
]
for url in urls:
httpretty.register_uri(httpretty.GET,
url, body=request_callback)
self.assertIsNone(url_helper.wait_any_url(urls, max_wait=1))
class UrlHelperFetchTest(test.TestCase):
@httpretty.activate
def test_url_fetch(self):
httpretty.register_uri(httpretty.GET,
"http://www.yahoo.com",
body=b'it worked!')
resp = url_helper.read_url("http://www.yahoo.com")
self.assertEqual(b"it worked!", resp.contents)
self.assertEqual(url_helper.OK, resp.status_code)
@httpretty.activate
def test_retry_url_fetch(self):
httpretty.register_uri(httpretty.GET,
"http://www.yahoo.com",
responses=[
httpretty.Response(body=b"no worky",
status=400),
httpretty.Response(body=b"it worked!",
status=200),
])
resp = url_helper.read_url("http://www.yahoo.com", retries=2)
self.assertEqual(b"it worked!", resp.contents)
self.assertEqual(url_helper.OK, resp.status_code)
@httpretty.activate
def test_failed_url_fetch(self):
httpretty.register_uri(httpretty.GET,
"http://www.yahoo.com",
body=b'no worky', status=400)
self.assertRaises(url_helper.UrlError,
url_helper.read_url, "http://www.yahoo.com")
@httpretty.activate
def test_failed_retry_url_fetch(self):
httpretty.register_uri(httpretty.GET,
"http://www.yahoo.com",
responses=[
httpretty.Response(body=b"no worky",
status=400),
httpretty.Response(body=b"no worky",
status=400),
httpretty.Response(body=b"no worky",
status=400),
])
self.assertRaises(url_helper.UrlError,
url_helper.read_url, "http://www.yahoo.com",
retries=2)

297
cloudinit/url_helper.py Normal file
View File

@ -0,0 +1,297 @@
# Copyright 2015 Canonical Ltd.
# This file is part of cloud-init. See LICENCE file for license information.
#
# vi: ts=4 expandtab
import logging
import time
try:
from time import monotonic as now
except ImportError:
from time import time as now
import requests
from requests import adapters
from requests import exceptions
from requests import structures
# Arg, why does requests vendorize urllib3....
from requests.packages.urllib3 import util as urllib3_util
from six.moves.urllib.parse import quote as urlquote # noqa
from six.moves.urllib.parse import urlparse # noqa
from six.moves.urllib.parse import urlunparse # noqa
from six.moves.http_client import BAD_REQUEST as _BAD_REQUEST
from six.moves.http_client import MULTIPLE_CHOICES as _MULTIPLE_CHOICES
from six.moves.http_client import OK
from cloudinit import version
SSL_ENABLED = True
try:
import ssl as _ssl # noqa
except ImportError:
SSL_ENABLED = False
LOG = logging.getLogger(__name__)
def _get_base_url(url):
parsed_url = list(urlparse(url, scheme='http'))
parsed_url[2] = parsed_url[3] = parsed_url[4] = parsed_url[5] = ''
return urlunparse(parsed_url)
def _clean_url(url):
parsed_url = list(urlparse(url, scheme='http'))
if not parsed_url[1] and parsed_url[2]:
# Swap these since this seems to be a common
# occurrence when given urls like 'www.google.com'
parsed_url[1] = parsed_url[2]
parsed_url[2] = ''
return urlunparse(parsed_url)
class _Retry(urllib3_util.Retry):
def is_forced_retry(self, method, status_code):
# Allow >= 400 to be tried...
return status_code >= _BAD_REQUEST
def sleep(self):
# The base class doesn't have a way to log what we are doing,
# so replace it with one that does...
backoff = self.get_backoff_time()
if backoff <= 0:
return
else:
LOG.debug("Please wait %s seconds while we wait to try again",
backoff)
time.sleep(backoff)
class RequestsResponse(object):
"""A wrapper for requests responses (that provides common functions).
This exists so that things like StringResponse or FileResponse can
also exist, but with different sources of there response (aka not
just from the requests library).
"""
def __init__(self, response):
self._response = response
@property
def contents(self):
return self._response.content
@property
def url(self):
return self._response.url
def ok(self, redirects_ok=False):
upper = _MULTIPLE_CHOICES
if redirects_ok:
upper = _BAD_REQUEST
return self.status_code >= OK and self.status_code < upper
@property
def headers(self):
return self._response.headers
@property
def status_code(self):
return self._response.status_code
def __str__(self):
return self._response.text
class UrlError(IOError):
def __init__(self, cause, code=None, headers=None):
super(UrlError, self).__init__(str(cause))
self.cause = cause
self.status_code = code
self.headers = headers or {}
def _get_ssl_args(url, ssl_details):
ssl_args = {}
scheme = urlparse(url).scheme
if scheme == 'https' and ssl_details:
if not SSL_ENABLED:
LOG.warn("SSL is not supported, "
"cert. verification can not occur!")
else:
if 'ca_certs' in ssl_details and ssl_details['ca_certs']:
ssl_args['verify'] = ssl_details['ca_certs']
else:
ssl_args['verify'] = True
if 'cert_file' in ssl_details and 'key_file' in ssl_details:
ssl_args['cert'] = [ssl_details['cert_file'],
ssl_details['key_file']]
elif 'cert_file' in ssl_details:
ssl_args['cert'] = str(ssl_details['cert_file'])
return ssl_args
def read_url(url, data=None, timeout=None, retries=0,
headers=None, ssl_details=None,
check_status=True, allow_redirects=True):
"""Fetch a url (or post to one) with the given options.
url: url to fetch
data: any data to POST (this switches the request method to POST
instead of GET)
timeout: the timeout (in seconds) to wait for a response
headers: any headers to provide (and send along) in the request
ssl_details: a dictionary containing any ssl settings, cert_file,
ca_certs and verify are valid entries (and they are only
used when the url provided is https)
check_status: checks that the response status is OK after fetching (this
ensures a exception is raised on non-OK status codes)
allow_redirects: enables redirects (or disables them)
retries: maximum number of retries to attempt when fetching the url and
the fetch fails
"""
url = _clean_url(url)
request_args = {
'url': url,
}
request_args.update(_get_ssl_args(url, ssl_details))
request_args['allow_redirects'] = allow_redirects
request_args['method'] = 'GET'
if timeout is not None:
request_args['timeout'] = max(float(timeout), 0)
if data:
request_args['method'] = 'POST'
request_args['data'] = data
if not headers:
headers = structures.CaseInsensitiveDict()
else:
headers = structures.CaseInsensitiveDict(headers)
if 'User-Agent' not in headers:
headers['User-Agent'] = 'Cloud-Init/%s' % (version.version_string())
request_args['headers'] = headers
session = requests.Session()
if retries:
retry = _Retry(total=max(int(retries), 0),
raise_on_redirect=not allow_redirects)
session.mount(_get_base_url(url),
adapters.HTTPAdapter(max_retries=retry))
try:
with session:
response = session.request(**request_args)
if check_status:
response.raise_for_status()
except exceptions.RequestException as e:
if e.response is not None:
raise UrlError(e, code=e.response.status_code,
headers=e.response.headers)
else:
raise UrlError(e)
else:
LOG.debug("Read from %s (%s, %sb)", url, response.status_code,
len(response.content))
return RequestsResponse(response)
def wait_any_url(urls, max_wait=None, timeout=None,
status_cb=None, sleep_time=1,
exception_cb=None):
"""Wait for one of many urls to respond correctly.
urls: a list of urls to try
max_wait: roughly the maximum time to wait before giving up
timeout: the timeout provided to ``read_url``
status_cb: call method with string message when a url is not available
exception_cb: call method with 2 arguments 'msg' (per status_cb) and
'exception', the exception that occurred.
sleep_time: how long to sleep before trying each url again
The idea of this routine is to wait for the EC2 metdata service to
come up. On both Eucalyptus and EC2 we have seen the case where
the instance hit the MD before the MD service was up. EC2 seems
to have permenantely fixed this, though.
In openstack, the metadata service might be painfully slow, and
unable to avoid hitting a timeout of even up to 10 seconds or more
(LP: #894279) for a simple GET.
Offset those needs with the need to not hang forever (and block boot)
on a system where cloud-init is configured to look for EC2 Metadata
service but is not going to find one. It is possible that the instance
data host (169.254.169.254) may be firewalled off Entirely for a sytem,
meaning that the connection will block forever unless a timeout is set.
"""
start_time = now()
def log_status_cb(msg, exc=None):
LOG.debug(msg)
if not status_cb:
status_cb = log_status_cb
def timeup(max_wait, start_time):
current_time = now()
return ((max_wait <= 0 or max_wait is None) or
(current_time - start_time > max_wait))
loop_n = 0
while True:
# This makes a backoff with the following graph:
#
# https://www.desmos.com/calculator/c8pwjy6wmt
sleep_time = int(loop_n / 5) + 1
for url in urls:
current_time = now()
if loop_n != 0:
if timeup(max_wait, start_time):
break
if (timeout and
(current_time + timeout > (start_time + max_wait))):
# shorten timeout to not run way over max_time
timeout = int((start_time + max_wait) - current_time)
reason = ""
url_exc = None
try:
response = read_url(url, timeout=timeout, check_status=False)
if not response.contents:
reason = "empty response [%s]" % (response.code)
url_exc = UrlError(ValueError(reason), code=response.code,
headers=response.headers)
elif not response.ok():
reason = "bad status code [%s]" % (response.code)
url_exc = UrlError(ValueError(reason), code=response.code,
headers=response.headers)
else:
return url
except UrlError as e:
reason = "request error [%s]" % e
url_exc = e
except Exception as e:
reason = "unexpected error [%s]" % e
url_exc = e
current_time = now()
time_taken = int(current_time - start_time)
status_msg = "Calling '%s' failed [%s/%ss]: %s" % (url,
time_taken,
max_wait,
reason)
status_cb(status_msg)
if exception_cb:
exception_cb(msg=status_msg, exception=url_exc)
if timeup(max_wait, start_time):
break
loop_n = loop_n + 1
LOG.debug("Please wait %s seconds while we wait to try again",
sleep_time)
time.sleep(sleep_time)
return None

14
cloudinit/version.py Normal file
View File

@ -0,0 +1,14 @@
# Copyright 2015 Canonical Ltd.
# This file is part of cloud-init. See LICENCE file for license information.
#
# vi: ts=4 expandtab
import pkg_resources
try:
from pbr import version as pbr_version
_version_info = pbr_version.VersionInfo('cloudinit')
version_string = _version_info.version_string
except ImportError:
_version_info = pkg_resources.get_distribution('cloudinit')
version_string = lambda: _version_info.version

View File

@ -6,5 +6,5 @@ pbr>=0.6,!=0.7,<1.0
six>=1.7.0
pyyaml
jsonpatch
requests
requests>=1.0
requests-oauthlib

View File

@ -5,6 +5,7 @@
httpretty>=0.7.1
mock
nose
testtools
# For doc building
sphinx>=1.1.2,!=1.2.0,!=1.3b1,<1.3