From 911ada7bba9e06793ae6e51a2235f949b3c8ceff Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Thu, 2 Apr 2015 12:04:50 -0700 Subject: [PATCH] Bring over the 'url_helper' module from bzr This module is quite useful for fetching urls in a manner that can be provided custom headers, retries and ssl_args so let's ensure we have it available for usage under these circumstances. Also adds some basic sanity tests so that it will continue to work as expected. Change-Id: I4d5e777cf530e822bbea69c76846ebebb48a7927 --- cloudinit/test.py | 21 ++ cloudinit/tests/test_url_helper.py | 102 ++++++++++ cloudinit/url_helper.py | 297 +++++++++++++++++++++++++++++ cloudinit/version.py | 14 ++ requirements.txt | 2 +- test-requirements.txt | 1 + 6 files changed, 436 insertions(+), 1 deletion(-) create mode 100644 cloudinit/test.py create mode 100644 cloudinit/tests/test_url_helper.py create mode 100644 cloudinit/url_helper.py create mode 100644 cloudinit/version.py diff --git a/cloudinit/test.py b/cloudinit/test.py new file mode 100644 index 00000000..719c7b62 --- /dev/null +++ b/cloudinit/test.py @@ -0,0 +1,21 @@ +# Copyright 2015 Canonical Ltd. +# This file is part of cloud-init. See LICENCE file for license information. +# +# vi: ts=4 expandtab + +import httpretty +import testtools + + +class TestCase(testtools.TestCase): + """Base class for all cloud-init test cases.""" + + def setUp(self): + super(TestCase, self).setUp() + # Do not allow any unknown network connections to get triggered... + httpretty.HTTPretty.allow_net_connect = False + + def tearDown(self): + super(TestCase, self).tearDown() + # Ok allow it again.... + httpretty.HTTPretty.allow_net_connect = True diff --git a/cloudinit/tests/test_url_helper.py b/cloudinit/tests/test_url_helper.py new file mode 100644 index 00000000..338f0a13 --- /dev/null +++ b/cloudinit/tests/test_url_helper.py @@ -0,0 +1,102 @@ +# Copyright 2015 Canonical Ltd. +# This file is part of cloud-init. See LICENCE file for license information. +# +# vi: ts=4 expandtab + +import httpretty + +from cloudinit import test +from cloudinit import url_helper + + +class UrlHelperWaitForUrlsTest(test.TestCase): + + @httpretty.activate + def test_url_wait_for(self): + urls_actions = [ + ("http://www.yahoo.com", (False, False, True)), + ("http://www.google.com", (False, False, False)), + ] + urls = [] + for (url, actions) in urls_actions: + urls.append(url) + for worked in actions: + if worked: + httpretty.register_uri(httpretty.GET, + url, body=b'it worked!') + else: + httpretty.register_uri(httpretty.GET, + url, body=b'no worky', + status=400) + + url = url_helper.wait_any_url(urls) + self.assertEqual("http://www.yahoo.com", url) + + @httpretty.activate + def test_url_wait_for_no_work(self): + + def request_callback(request, uri, headers): + return (400, headers, b"no worky") + + urls = [ + "http://www.yahoo.com", + "http://www.google.com", + ] + for url in urls: + httpretty.register_uri(httpretty.GET, + url, body=request_callback) + + self.assertIsNone(url_helper.wait_any_url(urls, max_wait=1)) + + +class UrlHelperFetchTest(test.TestCase): + + @httpretty.activate + def test_url_fetch(self): + httpretty.register_uri(httpretty.GET, + "http://www.yahoo.com", + body=b'it worked!') + + resp = url_helper.read_url("http://www.yahoo.com") + self.assertEqual(b"it worked!", resp.contents) + self.assertEqual(url_helper.OK, resp.status_code) + + @httpretty.activate + def test_retry_url_fetch(self): + httpretty.register_uri(httpretty.GET, + "http://www.yahoo.com", + responses=[ + httpretty.Response(body=b"no worky", + status=400), + httpretty.Response(body=b"it worked!", + status=200), + ]) + + resp = url_helper.read_url("http://www.yahoo.com", retries=2) + self.assertEqual(b"it worked!", resp.contents) + self.assertEqual(url_helper.OK, resp.status_code) + + @httpretty.activate + def test_failed_url_fetch(self): + httpretty.register_uri(httpretty.GET, + "http://www.yahoo.com", + body=b'no worky', status=400) + self.assertRaises(url_helper.UrlError, + url_helper.read_url, "http://www.yahoo.com") + + @httpretty.activate + def test_failed_retry_url_fetch(self): + httpretty.register_uri(httpretty.GET, + "http://www.yahoo.com", + responses=[ + httpretty.Response(body=b"no worky", + status=400), + httpretty.Response(body=b"no worky", + status=400), + httpretty.Response(body=b"no worky", + status=400), + ]) + + self.assertRaises(url_helper.UrlError, + url_helper.read_url, "http://www.yahoo.com", + retries=2) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py new file mode 100644 index 00000000..eb2d66c3 --- /dev/null +++ b/cloudinit/url_helper.py @@ -0,0 +1,297 @@ +# Copyright 2015 Canonical Ltd. +# This file is part of cloud-init. See LICENCE file for license information. +# +# vi: ts=4 expandtab + +import logging +import time + +try: + from time import monotonic as now +except ImportError: + from time import time as now + +import requests +from requests import adapters +from requests import exceptions +from requests import structures + +# Arg, why does requests vendorize urllib3.... +from requests.packages.urllib3 import util as urllib3_util + +from six.moves.urllib.parse import quote as urlquote # noqa +from six.moves.urllib.parse import urlparse # noqa +from six.moves.urllib.parse import urlunparse # noqa + +from six.moves.http_client import BAD_REQUEST as _BAD_REQUEST +from six.moves.http_client import MULTIPLE_CHOICES as _MULTIPLE_CHOICES +from six.moves.http_client import OK + +from cloudinit import version + + +SSL_ENABLED = True +try: + import ssl as _ssl # noqa +except ImportError: + SSL_ENABLED = False + + +LOG = logging.getLogger(__name__) + + +def _get_base_url(url): + parsed_url = list(urlparse(url, scheme='http')) + parsed_url[2] = parsed_url[3] = parsed_url[4] = parsed_url[5] = '' + return urlunparse(parsed_url) + + +def _clean_url(url): + parsed_url = list(urlparse(url, scheme='http')) + if not parsed_url[1] and parsed_url[2]: + # Swap these since this seems to be a common + # occurrence when given urls like 'www.google.com' + parsed_url[1] = parsed_url[2] + parsed_url[2] = '' + return urlunparse(parsed_url) + + +class _Retry(urllib3_util.Retry): + def is_forced_retry(self, method, status_code): + # Allow >= 400 to be tried... + return status_code >= _BAD_REQUEST + + def sleep(self): + # The base class doesn't have a way to log what we are doing, + # so replace it with one that does... + backoff = self.get_backoff_time() + if backoff <= 0: + return + else: + LOG.debug("Please wait %s seconds while we wait to try again", + backoff) + time.sleep(backoff) + + +class RequestsResponse(object): + """A wrapper for requests responses (that provides common functions). + + This exists so that things like StringResponse or FileResponse can + also exist, but with different sources of there response (aka not + just from the requests library). + """ + + def __init__(self, response): + self._response = response + + @property + def contents(self): + return self._response.content + + @property + def url(self): + return self._response.url + + def ok(self, redirects_ok=False): + upper = _MULTIPLE_CHOICES + if redirects_ok: + upper = _BAD_REQUEST + return self.status_code >= OK and self.status_code < upper + + @property + def headers(self): + return self._response.headers + + @property + def status_code(self): + return self._response.status_code + + def __str__(self): + return self._response.text + + +class UrlError(IOError): + def __init__(self, cause, code=None, headers=None): + super(UrlError, self).__init__(str(cause)) + self.cause = cause + self.status_code = code + self.headers = headers or {} + + +def _get_ssl_args(url, ssl_details): + ssl_args = {} + scheme = urlparse(url).scheme + if scheme == 'https' and ssl_details: + if not SSL_ENABLED: + LOG.warn("SSL is not supported, " + "cert. verification can not occur!") + else: + if 'ca_certs' in ssl_details and ssl_details['ca_certs']: + ssl_args['verify'] = ssl_details['ca_certs'] + else: + ssl_args['verify'] = True + if 'cert_file' in ssl_details and 'key_file' in ssl_details: + ssl_args['cert'] = [ssl_details['cert_file'], + ssl_details['key_file']] + elif 'cert_file' in ssl_details: + ssl_args['cert'] = str(ssl_details['cert_file']) + return ssl_args + + +def read_url(url, data=None, timeout=None, retries=0, + headers=None, ssl_details=None, + check_status=True, allow_redirects=True): + """Fetch a url (or post to one) with the given options. + + url: url to fetch + data: any data to POST (this switches the request method to POST + instead of GET) + timeout: the timeout (in seconds) to wait for a response + headers: any headers to provide (and send along) in the request + ssl_details: a dictionary containing any ssl settings, cert_file, + ca_certs and verify are valid entries (and they are only + used when the url provided is https) + check_status: checks that the response status is OK after fetching (this + ensures a exception is raised on non-OK status codes) + allow_redirects: enables redirects (or disables them) + retries: maximum number of retries to attempt when fetching the url and + the fetch fails + """ + url = _clean_url(url) + request_args = { + 'url': url, + } + request_args.update(_get_ssl_args(url, ssl_details)) + request_args['allow_redirects'] = allow_redirects + request_args['method'] = 'GET' + if timeout is not None: + request_args['timeout'] = max(float(timeout), 0) + if data: + request_args['method'] = 'POST' + request_args['data'] = data + if not headers: + headers = structures.CaseInsensitiveDict() + else: + headers = structures.CaseInsensitiveDict(headers) + if 'User-Agent' not in headers: + headers['User-Agent'] = 'Cloud-Init/%s' % (version.version_string()) + request_args['headers'] = headers + session = requests.Session() + if retries: + retry = _Retry(total=max(int(retries), 0), + raise_on_redirect=not allow_redirects) + session.mount(_get_base_url(url), + adapters.HTTPAdapter(max_retries=retry)) + try: + with session: + response = session.request(**request_args) + if check_status: + response.raise_for_status() + except exceptions.RequestException as e: + if e.response is not None: + raise UrlError(e, code=e.response.status_code, + headers=e.response.headers) + else: + raise UrlError(e) + else: + LOG.debug("Read from %s (%s, %sb)", url, response.status_code, + len(response.content)) + return RequestsResponse(response) + + +def wait_any_url(urls, max_wait=None, timeout=None, + status_cb=None, sleep_time=1, + exception_cb=None): + """Wait for one of many urls to respond correctly. + + urls: a list of urls to try + max_wait: roughly the maximum time to wait before giving up + timeout: the timeout provided to ``read_url`` + status_cb: call method with string message when a url is not available + exception_cb: call method with 2 arguments 'msg' (per status_cb) and + 'exception', the exception that occurred. + sleep_time: how long to sleep before trying each url again + + The idea of this routine is to wait for the EC2 metdata service to + come up. On both Eucalyptus and EC2 we have seen the case where + the instance hit the MD before the MD service was up. EC2 seems + to have permenantely fixed this, though. + + In openstack, the metadata service might be painfully slow, and + unable to avoid hitting a timeout of even up to 10 seconds or more + (LP: #894279) for a simple GET. + + Offset those needs with the need to not hang forever (and block boot) + on a system where cloud-init is configured to look for EC2 Metadata + service but is not going to find one. It is possible that the instance + data host (169.254.169.254) may be firewalled off Entirely for a sytem, + meaning that the connection will block forever unless a timeout is set. + """ + start_time = now() + + def log_status_cb(msg, exc=None): + LOG.debug(msg) + + if not status_cb: + status_cb = log_status_cb + + def timeup(max_wait, start_time): + current_time = now() + return ((max_wait <= 0 or max_wait is None) or + (current_time - start_time > max_wait)) + + loop_n = 0 + while True: + # This makes a backoff with the following graph: + # + # https://www.desmos.com/calculator/c8pwjy6wmt + sleep_time = int(loop_n / 5) + 1 + for url in urls: + current_time = now() + if loop_n != 0: + if timeup(max_wait, start_time): + break + if (timeout and + (current_time + timeout > (start_time + max_wait))): + # shorten timeout to not run way over max_time + timeout = int((start_time + max_wait) - current_time) + reason = "" + url_exc = None + try: + response = read_url(url, timeout=timeout, check_status=False) + if not response.contents: + reason = "empty response [%s]" % (response.code) + url_exc = UrlError(ValueError(reason), code=response.code, + headers=response.headers) + elif not response.ok(): + reason = "bad status code [%s]" % (response.code) + url_exc = UrlError(ValueError(reason), code=response.code, + headers=response.headers) + else: + return url + except UrlError as e: + reason = "request error [%s]" % e + url_exc = e + except Exception as e: + reason = "unexpected error [%s]" % e + url_exc = e + + current_time = now() + time_taken = int(current_time - start_time) + status_msg = "Calling '%s' failed [%s/%ss]: %s" % (url, + time_taken, + max_wait, + reason) + status_cb(status_msg) + if exception_cb: + exception_cb(msg=status_msg, exception=url_exc) + + if timeup(max_wait, start_time): + break + + loop_n = loop_n + 1 + LOG.debug("Please wait %s seconds while we wait to try again", + sleep_time) + time.sleep(sleep_time) + + return None diff --git a/cloudinit/version.py b/cloudinit/version.py new file mode 100644 index 00000000..29485a84 --- /dev/null +++ b/cloudinit/version.py @@ -0,0 +1,14 @@ +# Copyright 2015 Canonical Ltd. +# This file is part of cloud-init. See LICENCE file for license information. +# +# vi: ts=4 expandtab + +import pkg_resources + +try: + from pbr import version as pbr_version + _version_info = pbr_version.VersionInfo('cloudinit') + version_string = _version_info.version_string +except ImportError: + _version_info = pkg_resources.get_distribution('cloudinit') + version_string = lambda: _version_info.version diff --git a/requirements.txt b/requirements.txt index 7c19a071..72881c25 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,5 +6,5 @@ pbr>=0.6,!=0.7,<1.0 six>=1.7.0 pyyaml jsonpatch -requests +requests>=1.0 requests-oauthlib diff --git a/test-requirements.txt b/test-requirements.txt index 8dedde52..d3e8bc46 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -5,6 +5,7 @@ httpretty>=0.7.1 mock nose +testtools # For doc building sphinx>=1.1.2,!=1.2.0,!=1.3b1,<1.3