pyeclib/pyeclib/ec_iface.py

554 lines
19 KiB
Python

# Copyright (c) 2013-2014, Kevin Greenan (kmgreen2@gmail.com)
# Copyright (c) 2014, Tushar Gohad (tushar.gohad@intel.com)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution. THIS SOFTWARE IS
# PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
# NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from .enum import Enum
from .enum import unique
from .utils import create_instance
from .utils import positive_int_value
from pyeclib_c import get_liberasurecode_version
import warnings
def check_backend_available(backend_name):
from pyeclib_c import check_backend_available
if backend_name.startswith('flat_xor_hd'):
int_type = PyECLib_EC_Types.get_by_name('flat_xor_hd')
else:
int_type = PyECLib_EC_Types.get_by_name(backend_name)
if not int_type:
return False
return check_backend_available(int_type.value)
def PyECLibVersion(z, y, x):
return (((z) << 16) + ((y) << 8) + (x))
PYECLIB_MAJOR = 1
PYECLIB_MINOR = 6
PYECLIB_REV = 1
PYECLIB_VERSION = PyECLibVersion(PYECLIB_MAJOR, PYECLIB_MINOR,
PYECLIB_REV)
PYECLIB_MAX_DATA = 32
PYECLIB_MAX_PARITY = 32
@unique
class PyECLibEnum(Enum):
def describe(self):
# returns supported types
return list(self)
@classmethod
def has_enum(cls, name):
# returns True if name is a valid member of the enum
try:
cls.__getattr__(name)
except AttributeError:
return False
return True
@classmethod
def get_by_name(cls, name):
try:
obj = cls.__getattr__(name)
except AttributeError:
return None
return obj
@classmethod
def names(cls):
return [name for name, value in cls.__members__.items()]
@classmethod
def values(cls):
return [value for name, value in cls.__members__.items()]
def __str__(self):
return "%s: %d" % (self.name, self.value)
# Erasure Code backends supported as of this PyECLib API rev
class PyECLib_EC_Types(PyECLibEnum):
# Note: the Enum start value defaults to 1 as the starting value and not 0
# 0 is False in the boolean sense but enum members evaluate to True
jerasure_rs_vand = 1
jerasure_rs_cauchy = 2
flat_xor_hd = 3
isa_l_rs_vand = 4
shss = 5
liberasurecode_rs_vand = 6
isa_l_rs_cauchy = 7
libphazr = 8
# Output of Erasure (en)Coding process are data "fragments". Fragment data
# integrity checks are provided by a checksum embedded in a header (prepend)
# for each fragment.
# The following Enum defines the schemes supported for fragment checksums.
# The checksum type is "none" unless specified.
class PyECLib_FRAGHDRCHKSUM_Types(PyECLibEnum):
# Note: the Enum start value defaults to 1 as the starting value and not 0
# 0 is False in the boolean sense but enum members evaluate to True
none = 1
inline_crc32 = 2
# Main ECDriver class
class ECDriver(object):
'''A driver to encode, decode, and reconstruct erasure-coded data.'''
def __init__(self, **kwargs):
'''
:param ec_type: the erasure coding type to use for this driver.
:param k: number of data fragments to use. Required.
:param m: number of parity fragments to use. Required.
:param chksum_type:
:param validate: default: False
:param library_import_str: default: 'pyeclib.core.ECPyECLibDriver'
You must provide either ``ec_type`` or ``library_import_str``;
typically you just want to use ``ec_type``. See ALL_EC_TYPES for the
list of all EC types supported by PyECLib, and VALID_EC_TYPES for the
list of all EC types currently available on this system.
'''
self.k = -1
self.m = -1
self.hd = -1
self.ec_type = None
self.chksum_type = None
self.validate = False
for required in ('k', 'm'):
if required not in kwargs:
raise ECDriverError(
"Invalid Argument: %s is required" % required)
if 'ec_type' not in kwargs and 'library_import_str' not in kwargs:
raise ECDriverError(
"Invalid Argument: either ec_type or library_import_str "
"must be provided")
for (key, value) in kwargs.items():
if key == "k":
try:
self.k = positive_int_value(value)
except ValueError:
raise ECDriverError(
"Invalid number of data fragments (k)")
elif key == "m":
try:
self.m = positive_int_value(value)
except ValueError:
raise ECDriverError(
"Invalid number of parity fragments (m)")
elif key == "ec_type":
if value in ["flat_xor_hd", "flat_xor_hd_3", "flat_xor_hd_4"]:
if value == "flat_xor_hd" or value == "flat_xor_hd_3":
self.hd = 3
elif value == "flat_xor_hd_4":
self.hd = 4
value = "flat_xor_hd"
elif value == "libphazr":
self.hd = 1
if PyECLib_EC_Types.has_enum(value):
self.ec_type = PyECLib_EC_Types.get_by_name(value)
if self.ec_type in (PyECLib_EC_Types.jerasure_rs_vand,
PyECLib_EC_Types.jerasure_rs_cauchy):
warnings.warn('Jerasure support is deprecated and '
'may be removed in a future release',
FutureWarning, stacklevel=2)
else:
raise ECBackendNotSupported(
"%s is not a valid EC type for PyECLib!" % value)
elif key == "chksum_type":
if PyECLib_FRAGHDRCHKSUM_Types.has_enum(value):
self.chksum_type = \
PyECLib_FRAGHDRCHKSUM_Types.get_by_name(value)
else:
raise ECDriverError(
"%s is not a valid checksum type for PyECLib!" % value)
elif key == "validate":
# validate if the ec type is available (runtime check)
self.validate = value
if self.hd == -1:
self.hd = self.m
self.library_import_str = kwargs.pop('library_import_str',
'pyeclib.core.ECPyECLibDriver')
#
# Instantiate EC backend driver
#
self.ec_lib_reference = create_instance(
self.library_import_str,
k=self.k,
m=self.m,
hd=self.hd,
ec_type=self.ec_type,
chksum_type=self.chksum_type,
validate=int(self.validate)
)
#
# Verify that the imported library implements the required functions
#
required_methods = [
'decode',
'encode',
'reconstruct',
'fragments_needed',
'min_parity_fragments_needed',
'get_metadata',
'verify_stripe_metadata',
'get_segment_info',
]
missing_methods = ' '.join(
method for method in required_methods
if not callable(getattr(self.ec_lib_reference, method, None)))
if missing_methods:
raise ECDriverError(
"The following required methods are not implemented "
"in %s: %s" % (self.library_import_str, missing_methods))
def __repr__(self):
return '%s(ec_type=%r, k=%r, m=%r)' % (
type(self).__name__,
'flat_xor_hd_%s' % self.hd if self.ec_type.name == 'flat_xor_hd'
else self.ec_type.name,
self.k,
self.m)
def encode(self, data_bytes):
"""
Encode an arbitrary-sized string
:param data_bytes: the buffer to encode
:returns: a list of buffers (first k entries are data and
the last m are parity)
:raises: ECDriverError if there is an error during encoding
"""
return self.ec_lib_reference.encode(data_bytes)
def decode(self, fragment_payloads, ranges=None,
force_metadata_checks=False):
"""
Decode a set of fragments into a buffer that represents the original
buffer passed into encode().
:param fragment_payloads: a list of buffers representing a subset of
the list generated by encode()
:param ranges (optional): a list of byte ranges to return instead of
the entire buffer
:param force_metadata_checks (optional): validate collective integrity
of the fragments before trying to decode
:returns: a buffer
:raises: ECDriverError if there is an error during decoding
"""
return self.ec_lib_reference.decode(fragment_payloads, ranges,
force_metadata_checks)
def reconstruct(self, available_fragment_payloads,
missing_fragment_indexes):
"""
Reconstruct a missing fragment from a subset of available fragments.
:param available_fragment_payloads: a list of buffers representing
a subset of the list generated
by encode()
:param missing_fragment_indexes: a list of integers representing
the indexes of the fragments to be
reconstructed.
:returns: a list of buffers (ordered by fragment index) containing
the reconstructed payload associated with the indexes
provided in missing_fragment_indexes
:raises: ECDriverError if there is an error during decoding or there
are not sufficient fragments to decode
"""
return self.ec_lib_reference.reconstruct(
available_fragment_payloads, missing_fragment_indexes)
def fragments_needed(self, reconstruction_indexes,
exclude_indexes=None):
"""
Determine which fragments are needed to reconstruct some subset of
missing fragments.
:param reconstruction_indexes: a list of integers representing the
indexes of the fragments to be
reconstructed.
:param exclude_indexes: a list of integers representing the
indexes of the fragments to be
excluded from the reconstruction
equations.
:returns: a list containing fragment indexes that can be used to
reconstruct the missing fragments.
:raises: ECDriverError if there is an error during decoding or there
are not sufficient fragments to decode
"""
if exclude_indexes is None:
exclude_indexes = []
return self.ec_lib_reference.fragments_needed(reconstruction_indexes,
exclude_indexes)
def min_parity_fragments_needed(self):
return self.ec_lib_reference.min_parity_fragments_needed()
def get_metadata(self, fragment, formatted=0):
"""
Get opaque metadata for a fragment. The metadata is opaque to the
client, but meaningful to the underlying library. It is used to verify
stripes in verify_stripe_metadata().
:param fragment: a buffer representing a single fragment generated by
the encode() function.
:returns: an opaque buffer to be passed into verify_stripe_metadata()
:raises: ECDriverError if there was a problem getting the metadata.
"""
return self.ec_lib_reference.get_metadata(fragment, formatted)
def verify_stripe_metadata(self, fragment_metadata_list):
"""
Verify a subset of fragments generated by encode()
:param fragment_metadata_list: a list of buffers representing the
metadata from a subset of the framgments
generated by encode().
:returns: 'None' if the metadata is consistent.
a list of fragment indexes corresponding to inconsistent
fragments
:raises: ECDriverError if there was a problem verifying the metadata
"""
return self.ec_lib_reference.verify_stripe_metadata(
fragment_metadata_list)
def get_segment_info(self, data_len, segment_size):
"""
Get segmentation info for a given data length and
segment size.
Semment info returns a dict with the following keys:
segment_size: size of the payload to give to encode()
last_segment_size: size of the payload to give to encode()
fragment_size: the fragment size returned by encode()
last_fragment_size: the fragment size returned by encode()
num_segments: number of segments
This allows the caller to prepare requests
when segmenting a data stream to be EC'd.
Since the data length will rarely be aligned
to the segment size, the last segment will be
a different size than the others.
There are restrictions on the length given to encode(),
so calling this before encode is highly recommended when
segmenting a data stream.
"""
return self.ec_lib_reference.get_segment_info(data_len, segment_size)
#
# Map of segment indexes with a list of tuples
#
def get_segment_info_byterange(self, ranges, data_len, segment_size):
"""
Get segmentation info for a byterange request, given a data length and
segment size.
This will return a map-of-maps that represents a recipe describing
the segments and ranges within each segment needed to satisfy a range
request.
Assume a range request is given for an object with segment size 3K and
a 1 MB file:
Ranges = (0, 1), (1, 12), (10, 1000), (0, segment_size-1),
(1, segment_size+1), (segment_size-1, 2*segment_size)
This will return a map keyed on the ranges, where there is a recipe
given for each range:
{
(0, 1): {0: (0, 1)},
(10, 1000): {0: (10, 1000)},
(1, 12): {0: (1, 12)},
(0, 3071): {0: (0, 3071)},
(3071, 6144): {0: (3071, 3071), 1: (0, 3071), 2: (0, 0)},
(1, 3073): {0: (1, 3071), 1: (0,0)}
}
"""
segment_info = self.ec_lib_reference.get_segment_info(
data_len, segment_size)
segment_size = segment_info['segment_size']
sorted_ranges = ranges[:]
sorted_ranges.sort(key=lambda obj: obj[0])
recipe = {}
for r in ranges:
segment_map = {}
begin_off = r[0]
end_off = r[1]
begin_segment = begin_off // segment_size
end_segment = end_off // segment_size
if begin_segment == end_segment:
begin_relative_off = begin_off % segment_size
end_relative_off = end_off % segment_size
segment_map[begin_segment] = (begin_relative_off,
end_relative_off)
else:
begin_relative_off = begin_off % segment_size
end_relative_off = end_off % segment_size
segment_map[begin_segment] = (begin_relative_off,
segment_size - 1)
for middle_segment in range(begin_segment + 1, end_segment):
segment_map[middle_segment] = (0, segment_size - 1)
segment_map[end_segment] = (0, end_relative_off)
recipe[r] = segment_map
return recipe
# PyECLib Exceptions
# Generic ECDriverException
class ECDriverError(Exception):
def __init__(self, error):
try:
self.error_str = str(error)
except Exception:
self.error_str = 'Error retrieving the error message from %s' \
% error.__class__.__name__
def __str__(self):
return self.error_str
# More specific exceptions, mapped to liberasurecode error codes
# Specified EC backend is not supported by PyECLib/liberasurecode
class ECBackendNotSupported(ECDriverError):
pass
# Unsupported EC method
class ECMethodNotImplemented(ECDriverError):
pass
# liberasurecode backend init error
class ECBackendInitializationError(ECDriverError):
pass
# Specified backend instance is invalid/unavailable
class ECBackendInstanceNotAvailable(ECDriverError):
pass
# Specified backend instance is busy
class ECBackendInstanceInUse(ECDriverError):
pass
# Invalid parameter passed to a method
class ECInvalidParameter(ECDriverError):
pass
# Invalid or incompatible fragment metadata
class ECInvalidFragmentMetadata(ECDriverError):
pass
# Fragment checksum verification failed
class ECBadFragmentChecksum(ECDriverError):
pass
# Insufficient fragments specified for decode or reconstruct operation
class ECInsufficientFragments(ECDriverError):
pass
# Out of memory
class ECOutOfMemory(ECDriverError):
pass
# PyECLib helper for "available" EC types
ALL_EC_TYPES = [
'jerasure_rs_vand',
'jerasure_rs_cauchy',
'flat_xor_hd_3',
'flat_xor_hd_4',
'isa_l_rs_vand',
'shss',
'liberasurecode_rs_vand',
'isa_l_rs_cauchy',
'libphazr',
]
def _PyECLibValidECTypes():
available_ec_types = []
for _type in ALL_EC_TYPES:
if check_backend_available(_type):
available_ec_types.append(_type)
return available_ec_types
VALID_EC_TYPES = _PyECLibValidECTypes()
def _liberasurecode_version():
version_int = get_liberasurecode_version()
major = (version_int >> 16) & 0xff
minor = (version_int >> 8) & 0xff
rev = (version_int >> 0) & 0xff
return '%d.%d.%d' % (major, minor, rev)
LIBERASURECODE_VERSION = _liberasurecode_version()