swift/swift/common/middleware/dlo.py

364 lines
15 KiB
Python

# Copyright (c) 2013 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from ConfigParser import ConfigParser, NoSectionError, NoOptionError
from hashlib import md5
from swift.common import constraints
from swift.common.exceptions import ListingIterError, SegmentError
from swift.common.http import is_success
from swift.common.swob import Request, Response, \
HTTPRequestedRangeNotSatisfiable, HTTPBadRequest, HTTPConflict
from swift.common.utils import get_logger, json, \
RateLimitedIterator, read_conf_dir, quote, close_if_possible, \
closing_if_possible
from swift.common.request_helpers import SegmentedIterable
from swift.common.wsgi import WSGIContext, make_subrequest
from urllib import unquote
class GetContext(WSGIContext):
def __init__(self, dlo, logger):
super(GetContext, self).__init__(dlo.app)
self.dlo = dlo
self.logger = logger
def _get_container_listing(self, req, version, account, container,
prefix, marker=''):
con_req = make_subrequest(
req.environ, path='/'.join(['', version, account, container]),
method='GET',
headers={'x-auth-token': req.headers.get('x-auth-token')},
agent=('%(orig)s ' + 'DLO MultipartGET'), swift_source='DLO')
con_req.query_string = 'format=json&prefix=%s' % quote(prefix)
if marker:
con_req.query_string += '&marker=%s' % quote(marker)
con_resp = con_req.get_response(self.dlo.app)
if not is_success(con_resp.status_int):
return con_resp, None
with closing_if_possible(con_resp.app_iter):
return None, json.loads(''.join(con_resp.app_iter))
def _segment_listing_iterator(self, req, version, account, container,
prefix, segments, first_byte=None,
last_byte=None):
# It's sort of hokey that this thing takes in the first page of
# segments as an argument, but we need to compute the etag and content
# length from the first page, and it's better to have a hokey
# interface than to make redundant requests.
if first_byte is None:
first_byte = 0
if last_byte is None:
last_byte = float("inf")
marker = ''
while True:
for segment in segments:
seg_length = int(segment['bytes'])
if first_byte >= seg_length:
# don't need any bytes from this segment
first_byte = max(first_byte - seg_length, -1)
last_byte = max(last_byte - seg_length, -1)
continue
elif last_byte < 0:
# no bytes are needed from this or any future segment
break
seg_name = segment['name']
if isinstance(seg_name, unicode):
seg_name = seg_name.encode("utf-8")
# (obj path, etag, size, first byte, last byte)
yield ("/" + "/".join((version, account, container,
seg_name)),
# We deliberately omit the etag and size here;
# SegmentedIterable will check size and etag if
# specified, but we don't want it to. DLOs only care
# that the objects' names match the specified prefix.
None, None,
(None if first_byte <= 0 else first_byte),
(None if last_byte >= seg_length - 1 else last_byte))
first_byte = max(first_byte - seg_length, -1)
last_byte = max(last_byte - seg_length, -1)
if len(segments) < constraints.CONTAINER_LISTING_LIMIT:
# a short page means that we're done with the listing
break
elif last_byte < 0:
break
marker = segments[-1]['name']
error_response, segments = self._get_container_listing(
req, version, account, container, prefix, marker)
if error_response:
# we've already started sending the response body to the
# client, so all we can do is raise an exception to make the
# WSGI server close the connection early
close_if_possible(error_response.app_iter)
raise ListingIterError(
"Got status %d listing container /%s/%s" %
(error_response.status_int, account, container))
def get_or_head_response(self, req, x_object_manifest,
response_headers=None):
if response_headers is None:
response_headers = self._response_headers
container, obj_prefix = x_object_manifest.split('/', 1)
container = unquote(container)
obj_prefix = unquote(obj_prefix)
# manifest might point to a different container
req.acl = None
version, account, _junk = req.split_path(2, 3, True)
error_response, segments = self._get_container_listing(
req, version, account, container, obj_prefix)
if error_response:
return error_response
have_complete_listing = len(segments) < \
constraints.CONTAINER_LISTING_LIMIT
first_byte = last_byte = None
actual_content_length = None
content_length_for_swob_range = None
if req.range and len(req.range.ranges) == 1:
content_length_for_swob_range = sum(o['bytes'] for o in segments)
# This is a hack to handle suffix byte ranges (e.g. "bytes=-5"),
# which we can't honor unless we have a complete listing.
_junk, range_end = req.range.ranges_for_length(float("inf"))[0]
# If this is all the segments, we know whether or not this
# range request is satisfiable.
#
# Alternately, we may not have all the segments, but this range
# falls entirely within the first page's segments, so we know
# that it is satisfiable.
if (have_complete_listing
or range_end < content_length_for_swob_range):
byteranges = req.range.ranges_for_length(
content_length_for_swob_range)
if not byteranges:
return HTTPRequestedRangeNotSatisfiable(request=req)
first_byte, last_byte = byteranges[0]
# For some reason, swob.Range.ranges_for_length adds 1 to the
# last byte's position.
last_byte -= 1
actual_content_length = last_byte - first_byte + 1
else:
# The range may or may not be satisfiable, but we can't tell
# based on just one page of listing, and we're not going to go
# get more pages because that would use up too many resources,
# so we ignore the Range header and return the whole object.
actual_content_length = None
content_length_for_swob_range = None
req.range = None
response_headers = [
(h, v) for h, v in response_headers
if h.lower() not in ("content-length", "content-range")]
if content_length_for_swob_range is not None:
# Here, we have to give swob a big-enough content length so that
# it can compute the actual content length based on the Range
# header. This value will not be visible to the client; swob will
# substitute its own Content-Length.
#
# Note: if the manifest points to at least CONTAINER_LISTING_LIMIT
# segments, this may be less than the sum of all the segments'
# sizes. However, it'll still be greater than the last byte in the
# Range header, so it's good enough for swob.
response_headers.append(('Content-Length',
str(content_length_for_swob_range)))
elif have_complete_listing:
actual_content_length = sum(o['bytes'] for o in segments)
response_headers.append(('Content-Length',
str(actual_content_length)))
if have_complete_listing:
response_headers = [(h, v) for h, v in response_headers
if h.lower() != "etag"]
etag = md5()
for seg_dict in segments:
etag.update(seg_dict['hash'].strip('"'))
response_headers.append(('Etag', '"%s"' % etag.hexdigest()))
app_iter = None
if req.method == 'GET':
listing_iter = RateLimitedIterator(
self._segment_listing_iterator(
req, version, account, container, obj_prefix, segments,
first_byte=first_byte, last_byte=last_byte),
self.dlo.rate_limit_segments_per_sec,
limit_after=self.dlo.rate_limit_after_segment)
app_iter = SegmentedIterable(
req, self.dlo.app, listing_iter, ua_suffix="DLO MultipartGET",
swift_source="DLO", name=req.path, logger=self.logger,
max_get_time=self.dlo.max_get_time,
response_body_length=actual_content_length)
try:
app_iter.validate_first_segment()
except (SegmentError, ListingIterError):
return HTTPConflict(request=req)
resp = Response(request=req, headers=response_headers,
conditional_response=True,
app_iter=app_iter)
return resp
def handle_request(self, req, start_response):
"""
Take a GET or HEAD request, and if it is for a dynamic large object
manifest, return an appropriate response.
Otherwise, simply pass it through.
"""
resp_iter = self._app_call(req.environ)
# make sure this response is for a dynamic large object manifest
for header, value in self._response_headers:
if (header.lower() == 'x-object-manifest'):
close_if_possible(resp_iter)
response = self.get_or_head_response(req, value)
return response(req.environ, start_response)
else:
# Not a dynamic large object manifest; just pass it through.
start_response(self._response_status,
self._response_headers,
self._response_exc_info)
return resp_iter
class DynamicLargeObject(object):
def __init__(self, app, conf):
self.app = app
self.logger = get_logger(conf, log_route='dlo')
# DLO functionality used to live in the proxy server, not middleware,
# so let's try to go find config values in the proxy's config section
# to ease cluster upgrades.
self._populate_config_from_old_location(conf)
self.max_get_time = int(conf.get('max_get_time', '86400'))
self.rate_limit_after_segment = int(conf.get(
'rate_limit_after_segment', '10'))
self.rate_limit_segments_per_sec = int(conf.get(
'rate_limit_segments_per_sec', '1'))
def _populate_config_from_old_location(self, conf):
if ('rate_limit_after_segment' in conf or
'rate_limit_segments_per_sec' in conf or
'max_get_time' in conf or
'__file__' not in conf):
return
cp = ConfigParser()
if os.path.isdir(conf['__file__']):
read_conf_dir(cp, conf['__file__'])
else:
cp.read(conf['__file__'])
try:
pipe = cp.get("pipeline:main", "pipeline")
except (NoSectionError, NoOptionError):
return
proxy_name = pipe.rsplit(None, 1)[-1]
proxy_section = "app:" + proxy_name
for setting in ('rate_limit_after_segment',
'rate_limit_segments_per_sec',
'max_get_time'):
try:
conf[setting] = cp.get(proxy_section, setting)
except (NoSectionError, NoOptionError):
pass
def __call__(self, env, start_response):
"""
WSGI entry point
"""
req = Request(env)
try:
vrs, account, container, obj = req.split_path(4, 4, True)
except ValueError:
return self.app(env, start_response)
# install our COPY-callback hook
env['swift.copy_hook'] = self.copy_hook(
env.get('swift.copy_hook',
lambda src_req, src_resp, sink_req: src_resp))
if ((req.method == 'GET' or req.method == 'HEAD') and
req.params.get('multipart-manifest') != 'get'):
return GetContext(self, self.logger).\
handle_request(req, start_response)
elif req.method == 'PUT':
error_response = self.validate_x_object_manifest_header(
req, start_response)
if error_response:
return error_response(env, start_response)
return self.app(env, start_response)
def validate_x_object_manifest_header(self, req, start_response):
"""
Make sure that X-Object-Manifest is valid if present.
"""
if 'X-Object-Manifest' in req.headers:
value = req.headers['X-Object-Manifest']
container = prefix = None
try:
container, prefix = value.split('/', 1)
except ValueError:
pass
if not container or not prefix or '?' in value or '&' in value or \
prefix[0] == '/':
return HTTPBadRequest(
request=req,
body=('X-Object-Manifest must be in the '
'format container/prefix'))
def copy_hook(self, inner_hook):
def dlo_copy_hook(source_req, source_resp, sink_req):
x_o_m = source_resp.headers.get('X-Object-Manifest')
if x_o_m:
if source_req.params.get('multipart-manifest') == 'get':
# To copy the manifest, we let the copy proceed as normal,
# but ensure that X-Object-Manifest is set on the new
# object.
sink_req.headers['X-Object-Manifest'] = x_o_m
else:
ctx = GetContext(self, self.logger)
source_resp = ctx.get_or_head_response(
source_req, x_o_m, source_resp.headers.items())
return inner_hook(source_req, source_resp, sink_req)
return dlo_copy_hook
def filter_factory(global_conf, **local_conf):
conf = global_conf.copy()
conf.update(local_conf)
def dlo_filter(app):
return DynamicLargeObject(app, conf)
return dlo_filter