Switch HTTP store to using requests

Previously the HTTP store was using httplib and specifically unverified
HTTPS connections to download data about images. By switching to using
requests, we will get several benefits:

1. Certificate verification when using HTTPS
2. Connection pooling when following redirects
3. Help handling redirects

Closes-bug: 1263067
Partial-bug: 1436082
Implements: blueprint http-store-on-requests

Co-Authored-By: Sabari Kumar Murugesan <smurugesan@vmware.com>

Change-Id: Ib114919c1e1361ba64fe9e8382e1a2c39dbb3271
This commit is contained in:
Ian Cordasco 2015-03-27 17:49:36 -05:00 committed by Sabari Kumar Murugesan
parent 6dc26c55f4
commit 2572ea1410
4 changed files with 133 additions and 87 deletions

View File

@ -14,17 +14,16 @@
# under the License.
import logging
import socket
from oslo_utils import encodeutils
from six.moves import http_client
from six.moves import urllib
import requests
from glance_store import capabilities
import glance_store.driver
from glance_store import exceptions
from glance_store.i18n import _
from glance_store.i18n import _LE
import glance_store.location
LOG = logging.getLogger(__name__)
@ -109,14 +108,16 @@ def http_response_iterator(conn, response, size):
Return an iterator for a file-like object.
:param conn: HTTP(S) Connection
:param response: http_client.HTTPResponse object
:param response: urllib3.HTTPResponse object
:param size: Chunk size to iterate with
"""
chunk = response.read(size)
while chunk:
yield chunk
try:
chunk = response.read(size)
conn.close()
while chunk:
yield chunk
chunk = response.read(size)
finally:
conn.close()
class Store(glance_store.driver.Store):
@ -138,11 +139,11 @@ class Store(glance_store.driver.Store):
"""
try:
conn, resp, content_length = self._query(location, 'GET')
except socket.error:
reason = _LE("Remote server where the image is present "
"is unavailable.")
LOG.error(reason)
raise exceptions.RemoteServiceUnavailable()
except requests.exceptions.ConnectionError:
reason = _("Remote server where the image is present "
"is unavailable.")
LOG.exception(reason)
raise exceptions.RemoteServiceUnavailable(message=reason)
iterator = http_response_iterator(conn, resp, self.READ_CHUNKSIZE)
@ -166,63 +167,91 @@ class Store(glance_store.driver.Store):
:param location: `glance_store.location.Location` object, supplied
from glance_store.location.get_location_from_uri()
"""
conn = None
try:
size = self._query(location, 'HEAD')[2]
except (socket.error, http_client.HTTPException) as exc:
conn, resp, size = self._query(location, 'HEAD')
except requests.exceptions.ConnectionError as exc:
err_msg = encodeutils.exception_to_unicode(exc)
reason = _("The HTTP URL is invalid: %s") % err_msg
LOG.info(reason)
raise exceptions.BadStoreUri(message=reason)
finally:
# NOTE(sabari): Close the connection as the request was made with
# stream=True
if conn is not None:
conn.close()
return size
def _query(self, location, verb, depth=0):
if depth > MAX_REDIRECTS:
def _query(self, location, verb):
redirects_followed = 0
while redirects_followed < MAX_REDIRECTS:
loc = location.store_location
conn = self._get_response(loc, verb)
# NOTE(sigmavirus24): If it was generally successful, break early
if conn.status_code < 300:
break
self._check_store_uri(conn, loc)
redirects_followed += 1
# NOTE(sigmavirus24): Close the response so we don't leak sockets
conn.close()
location = self._new_location(location, conn.headers['location'])
else:
reason = (_("The HTTP URL exceeded %s maximum "
"redirects.") % MAX_REDIRECTS)
LOG.debug(reason)
raise exceptions.MaxRedirectsExceeded(message=reason)
loc = location.store_location
conn_class = self._get_conn_class(loc)
conn = conn_class(loc.netloc)
conn.request(verb, loc.path, "", {})
resp = conn.getresponse()
resp = conn.raw
content_length = int(resp.getheader('content-length', 0))
return (conn, resp, content_length)
def _new_location(self, old_location, url):
store_name = old_location.store_name
store_class = old_location.store_location.__class__
image_id = old_location.image_id
store_specs = old_location.store_specs
return glance_store.location.Location(store_name,
store_class,
self.conf,
uri=url,
image_id=image_id,
store_specs=store_specs)
@staticmethod
def _check_store_uri(conn, loc):
# TODO(sigmavirus24): Make this a staticmethod
# Check for bad status codes
if resp.status >= 400:
if resp.status == http_client.NOT_FOUND:
if conn.status_code >= 400:
if conn.status_code == requests.codes.not_found:
reason = _("HTTP datastore could not find image at URI.")
LOG.debug(reason)
raise exceptions.NotFound(message=reason)
reason = (_("HTTP URL %(url)s returned a "
"%(status)s status code.") %
dict(url=loc.path, status=resp.status))
"%(status)s status code. \nThe response body:\n"
"%(body)s") %
{'url': loc.path, 'status': conn.status_code,
'body': conn.text})
LOG.debug(reason)
raise exceptions.BadStoreUri(message=reason)
location_header = resp.getheader("location")
if location_header:
if resp.status not in (301, 302):
reason = (_("The HTTP URL %(url)s attempted to redirect "
"with an invalid %(status)s status code.") %
dict(url=loc.path, status=resp.status))
LOG.info(reason)
raise exceptions.BadStoreUri(message=reason)
location_class = glance_store.location.Location
new_loc = location_class(location.store_name,
location.store_location.__class__,
self.conf,
uri=location_header,
image_id=location.image_id,
store_specs=location.store_specs)
return self._query(new_loc, verb, depth + 1)
content_length = int(resp.getheader('content-length', 0))
return (conn, resp, content_length)
if conn.is_redirect and conn.status_code not in (301, 302):
reason = (_("The HTTP URL %(url)s attempted to redirect "
"with an invalid %(status)s status code.") %
{'url': loc.path, 'status': conn.status_code})
LOG.info(reason)
raise exceptions.BadStoreUri(message=reason)
def _get_conn_class(self, loc):
"""
Returns connection class for accessing the resource. Useful
for dependency injection and stubouts in testing...
"""
return {'http': http_client.HTTPConnection,
'https': http_client.HTTPSConnection}[loc.scheme]
def _get_response(self, location, verb):
if not hasattr(self, 'session'):
self.session = requests.Session()
return self.session.request(verb, location.get_uri(), stream=True,
allow_redirects=False)

View File

@ -15,7 +15,7 @@
import mock
from six.moves import http_client
import requests
import glance_store
from glance_store._drivers import http
@ -36,25 +36,19 @@ class TestHttpStore(base.StoreBaseTest,
self.store = http.Store(self.conf)
self.register_store_schemes(self.store, 'http')
def _mock_httplib(self):
"""Mock httplib connection object.
def _mock_requests(self):
"""Mock requests session object.
Should be called when need to mock httplib response and request
objects.
Should be called when we need to mock request/response objects.
"""
response = mock.patch('six.moves.http_client'
'.HTTPConnection.getresponse')
self.response = response.start()
self.response.return_value = utils.FakeHTTPResponse()
self.addCleanup(response.stop)
request = mock.patch('six.moves.http_client.HTTPConnection.request')
request = mock.patch('requests.Session.request')
self.request = request.start()
self.request.side_effect = lambda w, x, y, z: None
self.addCleanup(request.stop)
def test_http_get(self):
self._mock_httplib()
self._mock_requests()
self.request.return_value = utils.fake_response()
uri = "http://netloc/path/to/file.tar.gz"
expected_returns = ['I ', 'am', ' a', ' t', 'ea', 'po', 't,', ' s',
'ho', 'rt', ' a', 'nd', ' s', 'to', 'ut', '\n']
@ -74,16 +68,16 @@ class TestHttpStore(base.StoreBaseTest,
# Add two layers of redirects to the response stack, which will
# return the default 200 OK with the expected data after resolving
# both redirects.
self._mock_httplib()
self._mock_requests()
redirect1 = {"location": "http://example.com/teapot.img"}
redirect2 = {"location": "http://example.com/teapot_real.img"}
responses = [utils.FakeHTTPResponse(status=302, headers=redirect1),
utils.FakeHTTPResponse(status=301, headers=redirect2),
utils.FakeHTTPResponse()]
responses = [utils.fake_response(status_code=302, headers=redirect1),
utils.fake_response(status_code=301, headers=redirect2),
utils.fake_response()]
def getresponse():
def getresponse(*args, **kwargs):
return responses.pop()
self.response.side_effect = getresponse
self.request.side_effect = getresponse
uri = "http://netloc/path/to/file.tar.gz"
expected_returns = ['I ', 'am', ' a', ' t', 'ea', 'po', 't,', ' s',
@ -97,40 +91,42 @@ class TestHttpStore(base.StoreBaseTest,
self.assertEqual(chunks, expected_returns)
def test_http_get_max_redirects(self):
self._mock_httplib()
self._mock_requests()
redirect = {"location": "http://example.com/teapot.img"}
responses = ([utils.FakeHTTPResponse(status=302, headers=redirect)]
responses = ([utils.fake_response(status_code=302, headers=redirect)]
* (http.MAX_REDIRECTS + 2))
def getresponse():
def getresponse(*args, **kwargs):
return responses.pop()
self.response.side_effect = getresponse
self.request.side_effect = getresponse
uri = "http://netloc/path/to/file.tar.gz"
loc = location.get_location_from_uri(uri, conf=self.conf)
self.assertRaises(exceptions.MaxRedirectsExceeded, self.store.get, loc)
def test_http_get_redirect_invalid(self):
self._mock_httplib()
self._mock_requests()
redirect = {"location": "http://example.com/teapot.img"}
redirect_resp = utils.FakeHTTPResponse(status=307, headers=redirect)
self.response.return_value = redirect_resp
redirect_resp = utils.fake_response(status_code=307, headers=redirect)
self.request.return_value = redirect_resp
uri = "http://netloc/path/to/file.tar.gz"
loc = location.get_location_from_uri(uri, conf=self.conf)
self.assertRaises(exceptions.BadStoreUri, self.store.get, loc)
def test_http_get_not_found(self):
self._mock_httplib()
fake = utils.FakeHTTPResponse(status=404, data="404 Not Found")
self.response.return_value = fake
self._mock_requests()
fake = utils.fake_response(status_code=404, content="404 Not Found")
self.request.return_value = fake
uri = "http://netloc/path/to/file.tar.gz"
loc = location.get_location_from_uri(uri, conf=self.conf)
self.assertRaises(exceptions.NotFound, self.store.get, loc)
def test_http_delete_raise_error(self):
self._mock_httplib()
self._mock_requests()
self.request.return_value = utils.fake_response()
uri = "https://netloc/path/to/file.tar.gz"
loc = location.get_location_from_uri(uri, conf=self.conf)
self.assertRaises(exceptions.StoreDeleteNotSupported,
@ -146,17 +142,21 @@ class TestHttpStore(base.StoreBaseTest,
None, None, 'http')
def test_http_get_size_with_non_existent_image_raises_Not_Found(self):
self._mock_httplib()
fake = utils.FakeHTTPResponse(status=404, data="404 Not Found")
self.response.return_value = fake
self._mock_requests()
self.request.return_value = utils.fake_response(
status_code=404, content='404 Not Found')
uri = "http://netloc/path/to/file.tar.gz"
loc = location.get_location_from_uri(uri, conf=self.conf)
self.assertRaises(exceptions.NotFound, self.store.get_size, loc)
self.request.assert_called_once_with('HEAD', uri, stream=True,
allow_redirects=False)
def test_http_get_size_bad_status_line(self):
self._mock_httplib()
self.response.side_effect = http_client.BadStatusLine(line='')
self._mock_requests()
# Note(sabari): Low-level httplib.BadStatusLine will be raised as
# ConnectionErorr after migrating to requests.
self.request.side_effect = requests.exceptions.ConnectionError
uri = "http://netloc/path/to/file.tar.gz"
loc = location.get_location_from_uri(uri, conf=self.conf)

View File

@ -16,6 +16,8 @@
import six
from six.moves import urllib
import requests
def sort_url_by_qs_keys(url):
# NOTE(kragniz): this only sorts the keys of the query string of a url.
@ -57,3 +59,17 @@ class FakeHTTPResponse(object):
def read(self, amt):
self.data.read(amt)
def release_conn(self):
pass
def close(self):
self.data.close()
def fake_response(status_code=200, headers=None, content=None):
r = requests.models.Response()
r.status_code = status_code
r.headers = headers or {}
r.raw = FakeHTTPResponse(status_code, headers, content)
return r

View File

@ -15,3 +15,4 @@ debtcollector>=1.2.0 # Apache-2.0
jsonschema!=2.5.0,<3.0.0,>=2.0.0 # MIT
python-keystoneclient!=1.8.0,!=2.1.0,>=1.6.0 # Apache-2.0
requests>=2.8.1,!=2.9.0 # Apache-2.0