swift/swift/obj/mem_diskfile.py

514 lines
17 KiB
Python

# Copyright (c) 2010-2013 OpenStack, LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" In-Memory Disk File Interface for Swift Object Server"""
import io
import time
from contextlib import contextmanager
from eventlet import Timeout
from swift.common.utils import Timestamp
from swift.common.exceptions import DiskFileQuarantined, DiskFileNotExist, \
DiskFileCollision, DiskFileDeleted, DiskFileNotOpen
from swift.common.request_helpers import is_sys_meta
from swift.common.swob import multi_range_iterator
from swift.common.utils import md5
from swift.obj.diskfile import DATAFILE_SYSTEM_META, RESERVED_DATAFILE_META
class InMemoryFileSystem(object):
"""
A very simplistic in-memory file system scheme.
There is one dictionary mapping a given object name to a tuple. The first
entry in the tuple is the BytesIO buffer representing the file contents,
the second entry is the metadata dictionary.
"""
def __init__(self):
self._filesystem = {}
def get_object(self, name):
"""
Return back an file-like object and its metadata
:param name: standard object name
:return: (fp, metadata) fp is ``BytesIO`` in-memory representation
object (or None). metadata is a dictionary
of metadata (or None)
"""
val = self._filesystem.get(name)
if val is None:
fp, metadata = None, None
else:
fp, metadata = val
return fp, metadata
def put_object(self, name, fp, metadata):
"""
Store object into memory
:param name: standard object name
:param fp: ``BytesIO`` in-memory representation object
:param metadata: dictionary of metadata to be written
"""
self._filesystem[name] = (fp, metadata)
def del_object(self, name):
"""
Delete object from memory
:param name: standard object name
"""
del self._filesystem[name]
def get_diskfile(self, account, container, obj, **kwargs):
return DiskFile(self, account, container, obj)
def pickle_async_update(self, *args, **kwargs):
"""
For now don't handle async updates.
"""
pass
class DiskFileWriter(object):
"""
.. note::
Sample alternative pluggable on-disk backend implementation.
Encapsulation of the write context for servicing PUT REST API
requests. Serves as the context manager object for DiskFile's create()
method.
:param fs: internal file system object to use
:param name: standard object name
"""
def __init__(self, fs, name):
self._filesystem = fs
self._name = name
self._fp = None
self._upload_size = 0
self._chunks_etag = md5(usedforsecurity=False)
def open(self):
"""
Prepare to accept writes.
Create a new ``BytesIO`` object for a started-but-not-yet-finished
PUT.
"""
self._fp = io.BytesIO()
return self
def close(self):
"""
Clean up resources following an ``open()``.
Note: If ``put()`` has not been called, the data written will be lost.
"""
self._fp = None
def write(self, chunk):
"""
Write a chunk of data into the ``BytesIO`` object.
:param chunk: the chunk of data to write as a string object
"""
self._fp.write(chunk)
self._upload_size += len(chunk)
self._chunks_etag.update(chunk)
def chunks_finished(self):
"""
Expose internal stats about written chunks.
:returns: a tuple, (upload_size, etag)
"""
return self._upload_size, self._chunks_etag.hexdigest()
def put(self, metadata):
"""
Make the final association in the in-memory file system for this name
with the ``BytesIO`` object.
:param metadata: dictionary of metadata to be written
"""
metadata['name'] = self._name
self._filesystem.put_object(self._name, self._fp, metadata)
def commit(self, timestamp):
"""
Perform any operations necessary to mark the object as durable. For
mem_diskfile type this is a no-op.
:param timestamp: object put timestamp, an instance of
:class:`~swift.common.utils.Timestamp`
"""
pass
class DiskFileReader(object):
"""
.. note::
Sample alternative pluggable on-disk backend implementation.
Encapsulation of the read context for servicing GET REST API
requests. Serves as the context manager object for DiskFile's reader()
method.
:param name: object name
:param fp: open file object pointer reference
:param obj_size: on-disk size of object in bytes
:param etag: MD5 hash of object from metadata
"""
def __init__(self, name, fp, obj_size, etag):
self._name = name
self._fp = fp
self._obj_size = obj_size
self._etag = etag
#
self._iter_etag = None
self._bytes_read = 0
self._started_at_0 = False
self._read_to_eof = False
self._suppress_file_closing = False
#
self.was_quarantined = ''
def __iter__(self):
try:
self._bytes_read = 0
self._started_at_0 = False
self._read_to_eof = False
if self._fp.tell() == 0:
self._started_at_0 = True
self._iter_etag = md5(usedforsecurity=False)
while True:
chunk = self._fp.read()
if chunk:
if self._iter_etag:
self._iter_etag.update(chunk)
self._bytes_read += len(chunk)
yield chunk
else:
self._read_to_eof = True
break
finally:
if not self._suppress_file_closing:
self.close()
def app_iter_range(self, start, stop):
if start or start == 0:
self._fp.seek(start)
if stop is not None:
length = stop - start
else:
length = None
try:
for chunk in self:
if length is not None:
length -= len(chunk)
if length < 0:
# Chop off the extra:
yield chunk[:length]
break
yield chunk
finally:
if not self._suppress_file_closing:
self.close()
def app_iter_ranges(self, ranges, content_type, boundary, size):
if not ranges:
yield ''
else:
try:
self._suppress_file_closing = True
for chunk in multi_range_iterator(
ranges, content_type, boundary, size,
self.app_iter_range):
yield chunk
finally:
self._suppress_file_closing = False
try:
self.close()
except DiskFileQuarantined:
pass
def _quarantine(self, msg):
self.was_quarantined = msg
def _handle_close_quarantine(self):
if self._bytes_read != self._obj_size:
self._quarantine(
"Bytes read: %s, does not match metadata: %s" % (
self._bytes_read, self._obj_size))
elif self._iter_etag and \
self._etag != self._iter_etag.hexdigest():
self._quarantine(
"ETag %s and file's md5 %s do not match" % (
self._etag, self._iter_etag.hexdigest()))
def close(self):
"""
Close the file. Will handle quarantining file if necessary.
"""
if self._fp:
try:
if self._started_at_0 and self._read_to_eof:
self._handle_close_quarantine()
except (Exception, Timeout):
pass
finally:
self._fp = None
class DiskFile(object):
"""
.. note::
Sample alternative pluggable on-disk backend implementation. This
example duck-types the reference implementation DiskFile class.
Manage object files in-memory.
:param fs: an instance of InMemoryFileSystem
:param account: account name for the object
:param container: container name for the object
:param obj: object name for the object
"""
def __init__(self, fs, account, container, obj):
self._name = '/' + '/'.join((account, container, obj))
self._metadata = None
self._fp = None
self._filesystem = fs
self.fragments = None
def open(self, modernize=False, current_time=None):
"""
Open the file and read the metadata.
This method must populate the _metadata attribute.
:param current_time: Unix time used in checking expiration. If not
present, the current time will be used.
:raises DiskFileCollision: on name mis-match with metadata
:raises DiskFileDeleted: if it does not exist, or a tombstone is
present
:raises DiskFileQuarantined: if while reading metadata of the file
some data did pass cross checks
"""
fp, self._metadata = self._filesystem.get_object(self._name)
if fp is None:
raise DiskFileDeleted()
self._fp = self._verify_data_file(fp, current_time)
self._metadata = self._metadata or {}
return self
def __enter__(self):
if self._metadata is None:
raise DiskFileNotOpen()
return self
def __exit__(self, t, v, tb):
if self._fp is not None:
self._fp = None
def _quarantine(self, name, msg):
"""
Quarantine a file; responsible for incrementing the associated logger's
count of quarantines.
:param name: name of object to quarantine
:param msg: reason for quarantining to be included in the exception
:returns: DiskFileQuarantined exception object
"""
# for this implementation we simply delete the bad object
self._filesystem.del_object(name)
return DiskFileQuarantined(msg)
def _verify_data_file(self, fp, current_time):
"""
Verify the metadata's name value matches what we think the object is
named.
:raises DiskFileCollision: if the metadata stored name does not match
the referenced name of the file
:raises DiskFileNotExist: if the object has expired
:raises DiskFileQuarantined: if data inconsistencies were detected
between the metadata and the file-system
metadata
"""
try:
mname = self._metadata['name']
except KeyError:
raise self._quarantine(self._name, "missing name metadata")
else:
if mname != self._name:
raise DiskFileCollision('Client path does not match path '
'stored in object metadata')
try:
x_delete_at = int(self._metadata['X-Delete-At'])
except KeyError:
pass
except ValueError:
# Quarantine, the x-delete-at key is present but not an
# integer.
raise self._quarantine(
self._name, "bad metadata x-delete-at value %s" % (
self._metadata['X-Delete-At']))
else:
if current_time is None:
current_time = time.time()
if x_delete_at <= current_time:
raise DiskFileNotExist('Expired')
try:
metadata_size = int(self._metadata['Content-Length'])
except KeyError:
raise self._quarantine(
self._name, "missing content-length in metadata")
except ValueError:
# Quarantine, the content-length key is present but not an
# integer.
raise self._quarantine(
self._name, "bad metadata content-length value %s" % (
self._metadata['Content-Length']))
try:
fp.seek(0, 2)
obj_size = fp.tell()
fp.seek(0, 0)
except OSError as err:
# Quarantine, we can't successfully stat the file.
raise self._quarantine(self._name, "not stat-able: %s" % err)
if obj_size != metadata_size:
raise self._quarantine(
self._name, "metadata content-length %s does"
" not match actual object size %s" % (
metadata_size, obj_size))
return fp
def get_metadata(self):
"""
Provide the metadata for an object as a dictionary.
:returns: object's metadata dictionary
"""
if self._metadata is None:
raise DiskFileNotOpen()
return self._metadata
get_datafile_metadata = get_metadata
get_metafile_metadata = get_metadata
def read_metadata(self, current_time=None):
"""
Return the metadata for an object.
:param current_time: Unix time used in checking expiration. If not
present, the current time will be used.
:returns: metadata dictionary for an object
"""
with self.open(current_time=current_time):
return self.get_metadata()
def reader(self, keep_cache=False):
"""
Return a swift.common.swob.Response class compatible "app_iter"
object. The responsibility of closing the open file is passed to the
DiskFileReader object.
:param keep_cache:
"""
dr = DiskFileReader(self._name, self._fp,
int(self._metadata['Content-Length']),
self._metadata['ETag'])
# At this point the reader object is now responsible for
# the file pointer.
self._fp = None
return dr
def writer(self, size=None):
return DiskFileWriter(self._filesystem, self._name)
@contextmanager
def create(self, size=None):
"""
Context manager to create a file. We create a temporary file first, and
then return a DiskFileWriter object to encapsulate the state.
:param size: optional initial size of file to explicitly allocate on
disk
:raises DiskFileNoSpace: if a size is specified and allocation fails
"""
writer = self.writer(size)
try:
yield writer.open()
finally:
writer.close()
def write_metadata(self, metadata):
"""
Write a block of metadata to an object.
"""
data, cur_mdata = self._filesystem.get_object(self._name)
if data is not None:
# The object exists. Update the new metadata with the object's
# immutable metadata (e.g. name, size, etag, sysmeta) and store it
# with the object data.
immutable_metadata = dict(
[(key, val) for key, val in cur_mdata.items()
if key.lower() in (RESERVED_DATAFILE_META |
DATAFILE_SYSTEM_META)
or is_sys_meta('object', key)])
metadata.update(immutable_metadata)
metadata['name'] = self._name
self._filesystem.put_object(self._name, data, metadata)
def delete(self, timestamp):
"""
Perform a delete for the given object in the given container under the
given account.
This creates a tombstone file with the given timestamp, and removes
any older versions of the object file. Any file that has an older
timestamp than timestamp will be deleted.
:param timestamp: timestamp to compare with each file
"""
fp, md = self._filesystem.get_object(self._name)
if md and md['X-Timestamp'] < Timestamp(timestamp):
self._filesystem.del_object(self._name)
@property
def timestamp(self):
if self._metadata is None:
raise DiskFileNotOpen()
return Timestamp(self._metadata.get('X-Timestamp'))
data_timestamp = timestamp
durable_timestamp = timestamp
content_type_timestamp = timestamp
@property
def content_type(self):
if self._metadata is None:
raise DiskFileNotOpen()
return self._metadata.get('Content-Type')