554 lines
19 KiB
Python
554 lines
19 KiB
Python
# Copyright (c) 2013-2014, Kevin Greenan (kmgreen2@gmail.com)
|
|
# Copyright (c) 2014, Tushar Gohad (tushar.gohad@intel.com)
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# Redistributions of source code must retain the above copyright notice, this
|
|
# list of conditions and the following disclaimer.
|
|
#
|
|
# Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions and the following disclaimer in the documentation
|
|
# and/or other materials provided with the distribution. THIS SOFTWARE IS
|
|
# PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
|
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
|
|
# NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
from .enum import Enum
|
|
from .enum import unique
|
|
from .utils import create_instance
|
|
from .utils import positive_int_value
|
|
from pyeclib_c import get_liberasurecode_version
|
|
|
|
import warnings
|
|
|
|
|
|
def check_backend_available(backend_name):
|
|
from pyeclib_c import check_backend_available
|
|
|
|
if backend_name.startswith('flat_xor_hd'):
|
|
int_type = PyECLib_EC_Types.get_by_name('flat_xor_hd')
|
|
else:
|
|
int_type = PyECLib_EC_Types.get_by_name(backend_name)
|
|
if not int_type:
|
|
return False
|
|
return check_backend_available(int_type.value)
|
|
|
|
|
|
def PyECLibVersion(z, y, x):
|
|
return (((z) << 16) + ((y) << 8) + (x))
|
|
|
|
|
|
PYECLIB_MAJOR = 1
|
|
PYECLIB_MINOR = 6
|
|
PYECLIB_REV = 1
|
|
PYECLIB_VERSION = PyECLibVersion(PYECLIB_MAJOR, PYECLIB_MINOR,
|
|
PYECLIB_REV)
|
|
|
|
|
|
PYECLIB_MAX_DATA = 32
|
|
PYECLIB_MAX_PARITY = 32
|
|
|
|
|
|
@unique
|
|
class PyECLibEnum(Enum):
|
|
|
|
def describe(self):
|
|
# returns supported types
|
|
return list(self)
|
|
|
|
@classmethod
|
|
def has_enum(cls, name):
|
|
# returns True if name is a valid member of the enum
|
|
try:
|
|
cls.__getattr__(name)
|
|
except AttributeError:
|
|
return False
|
|
return True
|
|
|
|
@classmethod
|
|
def get_by_name(cls, name):
|
|
try:
|
|
obj = cls.__getattr__(name)
|
|
except AttributeError:
|
|
return None
|
|
return obj
|
|
|
|
@classmethod
|
|
def names(cls):
|
|
return [name for name, value in cls.__members__.items()]
|
|
|
|
@classmethod
|
|
def values(cls):
|
|
return [value for name, value in cls.__members__.items()]
|
|
|
|
def __str__(self):
|
|
return "%s: %d" % (self.name, self.value)
|
|
|
|
|
|
# Erasure Code backends supported as of this PyECLib API rev
|
|
class PyECLib_EC_Types(PyECLibEnum):
|
|
# Note: the Enum start value defaults to 1 as the starting value and not 0
|
|
# 0 is False in the boolean sense but enum members evaluate to True
|
|
jerasure_rs_vand = 1
|
|
jerasure_rs_cauchy = 2
|
|
flat_xor_hd = 3
|
|
isa_l_rs_vand = 4
|
|
shss = 5
|
|
liberasurecode_rs_vand = 6
|
|
isa_l_rs_cauchy = 7
|
|
libphazr = 8
|
|
|
|
|
|
# Output of Erasure (en)Coding process are data "fragments". Fragment data
|
|
# integrity checks are provided by a checksum embedded in a header (prepend)
|
|
# for each fragment.
|
|
|
|
# The following Enum defines the schemes supported for fragment checksums.
|
|
# The checksum type is "none" unless specified.
|
|
class PyECLib_FRAGHDRCHKSUM_Types(PyECLibEnum):
|
|
# Note: the Enum start value defaults to 1 as the starting value and not 0
|
|
# 0 is False in the boolean sense but enum members evaluate to True
|
|
none = 1
|
|
inline_crc32 = 2
|
|
|
|
|
|
# Main ECDriver class
|
|
class ECDriver(object):
|
|
'''A driver to encode, decode, and reconstruct erasure-coded data.'''
|
|
|
|
def __init__(self, **kwargs):
|
|
'''
|
|
:param ec_type: the erasure coding type to use for this driver.
|
|
:param k: number of data fragments to use. Required.
|
|
:param m: number of parity fragments to use. Required.
|
|
:param chksum_type:
|
|
:param validate: default: False
|
|
:param library_import_str: default: 'pyeclib.core.ECPyECLibDriver'
|
|
|
|
You must provide either ``ec_type`` or ``library_import_str``;
|
|
typically you just want to use ``ec_type``. See ALL_EC_TYPES for the
|
|
list of all EC types supported by PyECLib, and VALID_EC_TYPES for the
|
|
list of all EC types currently available on this system.
|
|
'''
|
|
self.k = -1
|
|
self.m = -1
|
|
self.hd = -1
|
|
self.ec_type = None
|
|
self.chksum_type = None
|
|
self.validate = False
|
|
|
|
for required in ('k', 'm'):
|
|
if required not in kwargs:
|
|
raise ECDriverError(
|
|
"Invalid Argument: %s is required" % required)
|
|
|
|
if 'ec_type' not in kwargs and 'library_import_str' not in kwargs:
|
|
raise ECDriverError(
|
|
"Invalid Argument: either ec_type or library_import_str "
|
|
"must be provided")
|
|
|
|
for (key, value) in kwargs.items():
|
|
if key == "k":
|
|
try:
|
|
self.k = positive_int_value(value)
|
|
except ValueError:
|
|
raise ECDriverError(
|
|
"Invalid number of data fragments (k)")
|
|
elif key == "m":
|
|
try:
|
|
self.m = positive_int_value(value)
|
|
except ValueError:
|
|
raise ECDriverError(
|
|
"Invalid number of parity fragments (m)")
|
|
elif key == "ec_type":
|
|
if value in ["flat_xor_hd", "flat_xor_hd_3", "flat_xor_hd_4"]:
|
|
if value == "flat_xor_hd" or value == "flat_xor_hd_3":
|
|
self.hd = 3
|
|
elif value == "flat_xor_hd_4":
|
|
self.hd = 4
|
|
value = "flat_xor_hd"
|
|
elif value == "libphazr":
|
|
self.hd = 1
|
|
|
|
if PyECLib_EC_Types.has_enum(value):
|
|
self.ec_type = PyECLib_EC_Types.get_by_name(value)
|
|
if self.ec_type in (PyECLib_EC_Types.jerasure_rs_vand,
|
|
PyECLib_EC_Types.jerasure_rs_cauchy):
|
|
warnings.warn('Jerasure support is deprecated and '
|
|
'may be removed in a future release',
|
|
FutureWarning, stacklevel=2)
|
|
|
|
else:
|
|
raise ECBackendNotSupported(
|
|
"%s is not a valid EC type for PyECLib!" % value)
|
|
elif key == "chksum_type":
|
|
if PyECLib_FRAGHDRCHKSUM_Types.has_enum(value):
|
|
self.chksum_type = \
|
|
PyECLib_FRAGHDRCHKSUM_Types.get_by_name(value)
|
|
else:
|
|
raise ECDriverError(
|
|
"%s is not a valid checksum type for PyECLib!" % value)
|
|
elif key == "validate":
|
|
# validate if the ec type is available (runtime check)
|
|
self.validate = value
|
|
|
|
if self.hd == -1:
|
|
self.hd = self.m
|
|
|
|
self.library_import_str = kwargs.pop('library_import_str',
|
|
'pyeclib.core.ECPyECLibDriver')
|
|
#
|
|
# Instantiate EC backend driver
|
|
#
|
|
self.ec_lib_reference = create_instance(
|
|
self.library_import_str,
|
|
k=self.k,
|
|
m=self.m,
|
|
hd=self.hd,
|
|
ec_type=self.ec_type,
|
|
chksum_type=self.chksum_type,
|
|
validate=int(self.validate)
|
|
)
|
|
|
|
#
|
|
# Verify that the imported library implements the required functions
|
|
#
|
|
required_methods = [
|
|
'decode',
|
|
'encode',
|
|
'reconstruct',
|
|
'fragments_needed',
|
|
'min_parity_fragments_needed',
|
|
'get_metadata',
|
|
'verify_stripe_metadata',
|
|
'get_segment_info',
|
|
]
|
|
|
|
missing_methods = ' '.join(
|
|
method for method in required_methods
|
|
if not callable(getattr(self.ec_lib_reference, method, None)))
|
|
|
|
if missing_methods:
|
|
raise ECDriverError(
|
|
"The following required methods are not implemented "
|
|
"in %s: %s" % (self.library_import_str, missing_methods))
|
|
|
|
def __repr__(self):
|
|
return '%s(ec_type=%r, k=%r, m=%r)' % (
|
|
type(self).__name__,
|
|
'flat_xor_hd_%s' % self.hd if self.ec_type.name == 'flat_xor_hd'
|
|
else self.ec_type.name,
|
|
self.k,
|
|
self.m)
|
|
|
|
def encode(self, data_bytes):
|
|
"""
|
|
Encode an arbitrary-sized string
|
|
:param data_bytes: the buffer to encode
|
|
:returns: a list of buffers (first k entries are data and
|
|
the last m are parity)
|
|
:raises: ECDriverError if there is an error during encoding
|
|
"""
|
|
return self.ec_lib_reference.encode(data_bytes)
|
|
|
|
def decode(self, fragment_payloads, ranges=None,
|
|
force_metadata_checks=False):
|
|
"""
|
|
Decode a set of fragments into a buffer that represents the original
|
|
buffer passed into encode().
|
|
|
|
:param fragment_payloads: a list of buffers representing a subset of
|
|
the list generated by encode()
|
|
:param ranges (optional): a list of byte ranges to return instead of
|
|
the entire buffer
|
|
:param force_metadata_checks (optional): validate collective integrity
|
|
of the fragments before trying to decode
|
|
:returns: a buffer
|
|
:raises: ECDriverError if there is an error during decoding
|
|
"""
|
|
return self.ec_lib_reference.decode(fragment_payloads, ranges,
|
|
force_metadata_checks)
|
|
|
|
def reconstruct(self, available_fragment_payloads,
|
|
missing_fragment_indexes):
|
|
"""
|
|
Reconstruct a missing fragment from a subset of available fragments.
|
|
|
|
:param available_fragment_payloads: a list of buffers representing
|
|
a subset of the list generated
|
|
by encode()
|
|
:param missing_fragment_indexes: a list of integers representing
|
|
the indexes of the fragments to be
|
|
reconstructed.
|
|
:returns: a list of buffers (ordered by fragment index) containing
|
|
the reconstructed payload associated with the indexes
|
|
provided in missing_fragment_indexes
|
|
:raises: ECDriverError if there is an error during decoding or there
|
|
are not sufficient fragments to decode
|
|
"""
|
|
return self.ec_lib_reference.reconstruct(
|
|
available_fragment_payloads, missing_fragment_indexes)
|
|
|
|
def fragments_needed(self, reconstruction_indexes,
|
|
exclude_indexes=None):
|
|
"""
|
|
Determine which fragments are needed to reconstruct some subset of
|
|
missing fragments.
|
|
|
|
:param reconstruction_indexes: a list of integers representing the
|
|
indexes of the fragments to be
|
|
reconstructed.
|
|
:param exclude_indexes: a list of integers representing the
|
|
indexes of the fragments to be
|
|
excluded from the reconstruction
|
|
equations.
|
|
:returns: a list containing fragment indexes that can be used to
|
|
reconstruct the missing fragments.
|
|
:raises: ECDriverError if there is an error during decoding or there
|
|
are not sufficient fragments to decode
|
|
"""
|
|
if exclude_indexes is None:
|
|
exclude_indexes = []
|
|
return self.ec_lib_reference.fragments_needed(reconstruction_indexes,
|
|
exclude_indexes)
|
|
|
|
def min_parity_fragments_needed(self):
|
|
return self.ec_lib_reference.min_parity_fragments_needed()
|
|
|
|
def get_metadata(self, fragment, formatted=0):
|
|
"""
|
|
Get opaque metadata for a fragment. The metadata is opaque to the
|
|
client, but meaningful to the underlying library. It is used to verify
|
|
stripes in verify_stripe_metadata().
|
|
|
|
:param fragment: a buffer representing a single fragment generated by
|
|
the encode() function.
|
|
:returns: an opaque buffer to be passed into verify_stripe_metadata()
|
|
:raises: ECDriverError if there was a problem getting the metadata.
|
|
"""
|
|
return self.ec_lib_reference.get_metadata(fragment, formatted)
|
|
|
|
def verify_stripe_metadata(self, fragment_metadata_list):
|
|
"""
|
|
Verify a subset of fragments generated by encode()
|
|
|
|
:param fragment_metadata_list: a list of buffers representing the
|
|
metadata from a subset of the framgments
|
|
generated by encode().
|
|
:returns: 'None' if the metadata is consistent.
|
|
a list of fragment indexes corresponding to inconsistent
|
|
fragments
|
|
:raises: ECDriverError if there was a problem verifying the metadata
|
|
|
|
"""
|
|
return self.ec_lib_reference.verify_stripe_metadata(
|
|
fragment_metadata_list)
|
|
|
|
def get_segment_info(self, data_len, segment_size):
|
|
"""
|
|
Get segmentation info for a given data length and
|
|
segment size.
|
|
|
|
Semment info returns a dict with the following keys:
|
|
|
|
segment_size: size of the payload to give to encode()
|
|
last_segment_size: size of the payload to give to encode()
|
|
fragment_size: the fragment size returned by encode()
|
|
last_fragment_size: the fragment size returned by encode()
|
|
num_segments: number of segments
|
|
|
|
This allows the caller to prepare requests
|
|
when segmenting a data stream to be EC'd.
|
|
|
|
Since the data length will rarely be aligned
|
|
to the segment size, the last segment will be
|
|
a different size than the others.
|
|
|
|
There are restrictions on the length given to encode(),
|
|
so calling this before encode is highly recommended when
|
|
segmenting a data stream.
|
|
"""
|
|
return self.ec_lib_reference.get_segment_info(data_len, segment_size)
|
|
|
|
#
|
|
# Map of segment indexes with a list of tuples
|
|
#
|
|
def get_segment_info_byterange(self, ranges, data_len, segment_size):
|
|
"""
|
|
Get segmentation info for a byterange request, given a data length and
|
|
segment size.
|
|
|
|
This will return a map-of-maps that represents a recipe describing
|
|
the segments and ranges within each segment needed to satisfy a range
|
|
request.
|
|
|
|
Assume a range request is given for an object with segment size 3K and
|
|
a 1 MB file:
|
|
|
|
Ranges = (0, 1), (1, 12), (10, 1000), (0, segment_size-1),
|
|
(1, segment_size+1), (segment_size-1, 2*segment_size)
|
|
|
|
This will return a map keyed on the ranges, where there is a recipe
|
|
given for each range:
|
|
|
|
{
|
|
(0, 1): {0: (0, 1)},
|
|
(10, 1000): {0: (10, 1000)},
|
|
(1, 12): {0: (1, 12)},
|
|
(0, 3071): {0: (0, 3071)},
|
|
(3071, 6144): {0: (3071, 3071), 1: (0, 3071), 2: (0, 0)},
|
|
(1, 3073): {0: (1, 3071), 1: (0,0)}
|
|
}
|
|
|
|
"""
|
|
|
|
segment_info = self.ec_lib_reference.get_segment_info(
|
|
data_len, segment_size)
|
|
|
|
segment_size = segment_info['segment_size']
|
|
|
|
sorted_ranges = ranges[:]
|
|
sorted_ranges.sort(key=lambda obj: obj[0])
|
|
|
|
recipe = {}
|
|
|
|
for r in ranges:
|
|
segment_map = {}
|
|
begin_off = r[0]
|
|
end_off = r[1]
|
|
begin_segment = begin_off // segment_size
|
|
end_segment = end_off // segment_size
|
|
|
|
if begin_segment == end_segment:
|
|
begin_relative_off = begin_off % segment_size
|
|
end_relative_off = end_off % segment_size
|
|
segment_map[begin_segment] = (begin_relative_off,
|
|
end_relative_off)
|
|
else:
|
|
begin_relative_off = begin_off % segment_size
|
|
end_relative_off = end_off % segment_size
|
|
|
|
segment_map[begin_segment] = (begin_relative_off,
|
|
segment_size - 1)
|
|
|
|
for middle_segment in range(begin_segment + 1, end_segment):
|
|
segment_map[middle_segment] = (0, segment_size - 1)
|
|
|
|
segment_map[end_segment] = (0, end_relative_off)
|
|
|
|
recipe[r] = segment_map
|
|
|
|
return recipe
|
|
|
|
|
|
# PyECLib Exceptions
|
|
|
|
# Generic ECDriverException
|
|
class ECDriverError(Exception):
|
|
def __init__(self, error):
|
|
try:
|
|
self.error_str = str(error)
|
|
except Exception:
|
|
self.error_str = 'Error retrieving the error message from %s' \
|
|
% error.__class__.__name__
|
|
|
|
def __str__(self):
|
|
return self.error_str
|
|
|
|
|
|
# More specific exceptions, mapped to liberasurecode error codes
|
|
|
|
# Specified EC backend is not supported by PyECLib/liberasurecode
|
|
class ECBackendNotSupported(ECDriverError):
|
|
pass
|
|
|
|
|
|
# Unsupported EC method
|
|
class ECMethodNotImplemented(ECDriverError):
|
|
pass
|
|
|
|
|
|
# liberasurecode backend init error
|
|
class ECBackendInitializationError(ECDriverError):
|
|
pass
|
|
|
|
|
|
# Specified backend instance is invalid/unavailable
|
|
class ECBackendInstanceNotAvailable(ECDriverError):
|
|
pass
|
|
|
|
|
|
# Specified backend instance is busy
|
|
class ECBackendInstanceInUse(ECDriverError):
|
|
pass
|
|
|
|
|
|
# Invalid parameter passed to a method
|
|
class ECInvalidParameter(ECDriverError):
|
|
pass
|
|
|
|
|
|
# Invalid or incompatible fragment metadata
|
|
class ECInvalidFragmentMetadata(ECDriverError):
|
|
pass
|
|
|
|
|
|
# Fragment checksum verification failed
|
|
class ECBadFragmentChecksum(ECDriverError):
|
|
pass
|
|
|
|
|
|
# Insufficient fragments specified for decode or reconstruct operation
|
|
class ECInsufficientFragments(ECDriverError):
|
|
pass
|
|
|
|
|
|
# Out of memory
|
|
class ECOutOfMemory(ECDriverError):
|
|
pass
|
|
|
|
|
|
# PyECLib helper for "available" EC types
|
|
ALL_EC_TYPES = [
|
|
'jerasure_rs_vand',
|
|
'jerasure_rs_cauchy',
|
|
'flat_xor_hd_3',
|
|
'flat_xor_hd_4',
|
|
'isa_l_rs_vand',
|
|
'shss',
|
|
'liberasurecode_rs_vand',
|
|
'isa_l_rs_cauchy',
|
|
'libphazr',
|
|
]
|
|
|
|
|
|
def _PyECLibValidECTypes():
|
|
available_ec_types = []
|
|
for _type in ALL_EC_TYPES:
|
|
if check_backend_available(_type):
|
|
available_ec_types.append(_type)
|
|
return available_ec_types
|
|
|
|
|
|
VALID_EC_TYPES = _PyECLibValidECTypes()
|
|
|
|
|
|
def _liberasurecode_version():
|
|
version_int = get_liberasurecode_version()
|
|
major = (version_int >> 16) & 0xff
|
|
minor = (version_int >> 8) & 0xff
|
|
rev = (version_int >> 0) & 0xff
|
|
return '%d.%d.%d' % (major, minor, rev)
|
|
|
|
|
|
LIBERASURECODE_VERSION = _liberasurecode_version()
|