pyeclib/pyeclib/ec_iface.py

# Copyright (c) 2013-2014, Kevin Greenan (kmgreen2@gmail.com)
# Copyright (c) 2014, Tushar Gohad (tushar.gohad@intel.com)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.  THIS SOFTWARE IS
# PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
# NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from .enum import Enum
from .enum import unique
from .utils import create_instance
from .utils import positive_int_value
from pyeclib_c import get_liberasurecode_version

import warnings


def check_backend_available(backend_name):
    from pyeclib_c import check_backend_available

    if backend_name.startswith('flat_xor_hd'):
        int_type = PyECLib_EC_Types.get_by_name('flat_xor_hd')
    else:
        int_type = PyECLib_EC_Types.get_by_name(backend_name)
    if not int_type:
        return False
    return check_backend_available(int_type.value)


def PyECLibVersion(z, y, x):
    return (((z) << 16) + ((y) << 8) + (x))


PYECLIB_MAJOR = 1
PYECLIB_MINOR = 6
PYECLIB_REV = 1
PYECLIB_VERSION = PyECLibVersion(PYECLIB_MAJOR, PYECLIB_MINOR,
                                 PYECLIB_REV)


PYECLIB_MAX_DATA = 32
PYECLIB_MAX_PARITY = 32


@unique
class PyECLibEnum(Enum):

    def describe(self):
        # returns supported types
        return list(self)

    @classmethod
    def has_enum(cls, name):
        # returns True if name is a valid member of the enum
        try:
            cls.__getattr__(name)
        except AttributeError:
            return False
        return True

    @classmethod
    def get_by_name(cls, name):
        try:
            obj = cls.__getattr__(name)
        except AttributeError:
            return None
        return obj

    @classmethod
    def names(cls):
        return [name for name, value in cls.__members__.items()]

    @classmethod
    def values(cls):
        return [value for name, value in cls.__members__.items()]

    def __str__(self):
        return "%s: %d" % (self.name, self.value)


# Erasure Code backends supported as of this PyECLib API rev
class PyECLib_EC_Types(PyECLibEnum):
    # Note: the Enum start value defaults to 1 as the starting value and not 0
    # 0 is False in the boolean sense but enum members evaluate to True
    jerasure_rs_vand = 1
    jerasure_rs_cauchy = 2
    flat_xor_hd = 3
    isa_l_rs_vand = 4
    shss = 5
    liberasurecode_rs_vand = 6
    isa_l_rs_cauchy = 7
    libphazr = 8


# Output of Erasure (en)Coding process are data "fragments".  Fragment data
# integrity checks are provided by a checksum embedded in a header (prepend)
# for each fragment.

# The following Enum defines the schemes supported for fragment checksums.
# The checksum type is "none" unless specified.
class PyECLib_FRAGHDRCHKSUM_Types(PyECLibEnum):
    # Note: the Enum start value defaults to 1 as the starting value and not 0
    # 0 is False in the boolean sense but enum members evaluate to True
    none = 1
    inline_crc32 = 2


# Main ECDriver class
class ECDriver(object):
    '''A driver to encode, decode, and reconstruct erasure-coded data.'''

    def __init__(self, **kwargs):
        '''
        :param ec_type: the erasure coding type to use for this driver.
        :param k: number of data fragments to use. Required.
        :param m: number of parity fragments to use. Required.
        :param chksum_type:
        :param validate: default: False
        :param library_import_str: default: 'pyeclib.core.ECPyECLibDriver'

        You must provide either ``ec_type`` or ``library_import_str``;
        typically you just want to use ``ec_type``. See ALL_EC_TYPES for the
        list of all EC types supported by PyECLib, and VALID_EC_TYPES for the
        list of all EC types currently available on this system.
        '''
        self.k = -1
        self.m = -1
        self.hd = -1
        self.ec_type = None
        self.chksum_type = None
        self.validate = False

        for required in ('k', 'm'):
            if required not in kwargs:
                raise ECDriverError(
                    "Invalid Argument: %s is required" % required)

        if 'ec_type' not in kwargs and 'library_import_str' not in kwargs:
            raise ECDriverError(
                "Invalid Argument: either ec_type or library_import_str "
                "must be provided")

        for (key, value) in kwargs.items():
            if key == "k":
                try:
                    self.k = positive_int_value(value)
                except ValueError:
                    raise ECDriverError(
                        "Invalid number of data fragments (k)")
            elif key == "m":
                try:
                    self.m = positive_int_value(value)
                except ValueError:
                    raise ECDriverError(
                        "Invalid number of parity fragments (m)")
            elif key == "ec_type":
                if value in ["flat_xor_hd", "flat_xor_hd_3", "flat_xor_hd_4"]:
                    if value == "flat_xor_hd" or value == "flat_xor_hd_3":
                        self.hd = 3
                    elif value == "flat_xor_hd_4":
                        self.hd = 4
                    value = "flat_xor_hd"
                elif value == "libphazr":
                    self.hd = 1

                if PyECLib_EC_Types.has_enum(value):
                    self.ec_type = PyECLib_EC_Types.get_by_name(value)
                    if self.ec_type in (PyECLib_EC_Types.jerasure_rs_vand,
                                        PyECLib_EC_Types.jerasure_rs_cauchy):
                        warnings.warn('Jerasure support is deprecated and '
                                      'may be removed in a future release',
                                      FutureWarning, stacklevel=2)

                else:
                    raise ECBackendNotSupported(
                        "%s is not a valid EC type for PyECLib!" % value)
            elif key == "chksum_type":
                if PyECLib_FRAGHDRCHKSUM_Types.has_enum(value):
                    self.chksum_type = \
                        PyECLib_FRAGHDRCHKSUM_Types.get_by_name(value)
                else:
                    raise ECDriverError(
                        "%s is not a valid checksum type for PyECLib!" % value)
            elif key == "validate":
                # validate if the ec type is available (runtime check)
                self.validate = value

        if self.hd == -1:
            self.hd = self.m

        self.library_import_str = kwargs.pop('library_import_str',
                                             'pyeclib.core.ECPyECLibDriver')
        #
        # Instantiate EC backend driver
        #
        self.ec_lib_reference = create_instance(
            self.library_import_str,
            k=self.k,
            m=self.m,
            hd=self.hd,
            ec_type=self.ec_type,
            chksum_type=self.chksum_type,
            validate=int(self.validate)
        )

        #
        # Verify that the imported library implements the required functions
        #
        required_methods = [
            'decode',
            'encode',
            'reconstruct',
            'fragments_needed',
            'min_parity_fragments_needed',
            'get_metadata',
            'verify_stripe_metadata',
            'get_segment_info',
        ]

        missing_methods = ' '.join(
            method for method in required_methods
            if not callable(getattr(self.ec_lib_reference, method, None)))

        if missing_methods:
            raise ECDriverError(
                "The following required methods are not implemented "
                "in %s: %s" % (self.library_import_str, missing_methods))

    def __repr__(self):
        return '%s(ec_type=%r, k=%r, m=%r)' % (
            type(self).__name__,
            'flat_xor_hd_%s' % self.hd if self.ec_type.name == 'flat_xor_hd'
            else self.ec_type.name,
            self.k,
            self.m)

    def encode(self, data_bytes):
        """
        Encode an arbitrary-sized string
        :param data_bytes: the buffer to encode
        :returns: a list of buffers (first k entries are data and
                  the last m are parity)
        :raises: ECDriverError if there is an error during encoding
        """
        return self.ec_lib_reference.encode(data_bytes)

    def decode(self, fragment_payloads, ranges=None,
               force_metadata_checks=False):
        """
        Decode a set of fragments into a buffer that represents the original
        buffer passed into encode().

        :param fragment_payloads: a list of buffers representing a subset of
                                  the list generated by encode()
        :param ranges (optional): a list of byte ranges to return instead of
                                  the entire buffer
        :param force_metadata_checks (optional): validate collective integrity
                                  of the fragments before trying to decode
        :returns: a buffer
        :raises: ECDriverError if there is an error during decoding
        """
        return self.ec_lib_reference.decode(fragment_payloads, ranges,
                                            force_metadata_checks)

    def reconstruct(self, available_fragment_payloads,
                    missing_fragment_indexes):
        """
        Reconstruct a missing fragment from a subset of available fragments.

        :param available_fragment_payloads: a list of buffers representing
                                            a subset of the list generated
                                            by encode()
        :param missing_fragment_indexes: a list of integers representing
                                         the indexes of the fragments to be
                                         reconstructed.
        :returns: a list of buffers (ordered by fragment index) containing
                  the reconstructed payload associated with the indexes
                  provided in missing_fragment_indexes
        :raises: ECDriverError if there is an error during decoding or there
                 are not sufficient fragments to decode
        """
        return self.ec_lib_reference.reconstruct(
            available_fragment_payloads, missing_fragment_indexes)

    def fragments_needed(self, reconstruction_indexes,
                         exclude_indexes=None):
        """
        Determine which fragments are needed to reconstruct some subset of
        missing fragments.

        :param reconstruction_indexes: a list of integers representing the
                                         indexes of the fragments to be
                                         reconstructed.
        :param exclude_indexes: a list of integers representing the
                                         indexes of the fragments to be
                                         excluded from the reconstruction
                                         equations.
        :returns: a list containing fragment indexes that can be used to
                  reconstruct the missing fragments.
        :raises: ECDriverError if there is an error during decoding or there
                 are not sufficient fragments to decode
        """
        if exclude_indexes is None:
            exclude_indexes = []
        return self.ec_lib_reference.fragments_needed(reconstruction_indexes,
                                                      exclude_indexes)

    def min_parity_fragments_needed(self):
        return self.ec_lib_reference.min_parity_fragments_needed()

    def get_metadata(self, fragment, formatted=0):
        """
        Get opaque metadata for a fragment.  The metadata is opaque to the
        client, but meaningful to the underlying library.  It is used to verify
        stripes in verify_stripe_metadata().

        :param fragment: a buffer representing a single fragment generated by
                         the encode() function.
        :returns: an opaque buffer to be passed into verify_stripe_metadata()
        :raises: ECDriverError if there was a problem getting the metadata.
        """
        return self.ec_lib_reference.get_metadata(fragment, formatted)

    def verify_stripe_metadata(self, fragment_metadata_list):
        """
        Verify a subset of fragments generated by encode()

        :param fragment_metadata_list: a list of buffers representing the
                                       metadata from a subset of the framgments
                                       generated by encode().
        :returns: 'None' if the metadata is consistent.
                  a list of fragment indexes corresponding to inconsistent
                  fragments
        :raises: ECDriverError if there was a problem verifying the metadata

        """
        return self.ec_lib_reference.verify_stripe_metadata(
            fragment_metadata_list)

    def get_segment_info(self, data_len, segment_size):
        """
        Get segmentation info for a given data length and
        segment size.

        Semment info returns a dict with the following keys:

        segment_size: size of the payload to give to encode()
        last_segment_size: size of the payload to give to encode()
        fragment_size: the fragment size returned by encode()
        last_fragment_size: the fragment size returned by encode()
        num_segments: number of segments

        This allows the caller to prepare requests
        when segmenting a data stream to be EC'd.

        Since the data length will rarely be aligned
        to the segment size, the last segment will be
        a different size than the others.

        There are restrictions on the length given to encode(),
        so calling this before encode is highly recommended when
        segmenting a data stream.
        """
        return self.ec_lib_reference.get_segment_info(data_len, segment_size)

    #
    # Map of segment indexes with a list of tuples
    #
    def get_segment_info_byterange(self, ranges, data_len, segment_size):
        """
        Get segmentation info for a byterange request, given a data length and
        segment size.

        This will return a map-of-maps that represents a recipe describing
        the segments and ranges within each segment needed to satisfy a range
        request.

        Assume a range request is given for an object with segment size 3K and
        a 1 MB file:

        Ranges = (0, 1), (1, 12), (10, 1000), (0, segment_size-1),
                 (1, segment_size+1), (segment_size-1, 2*segment_size)

        This will return a map keyed on the ranges, where there is a recipe
        given for each range:

        {
         (0, 1): {0: (0, 1)},
         (10, 1000): {0: (10, 1000)},
         (1, 12): {0: (1, 12)},
         (0, 3071): {0: (0, 3071)},
         (3071, 6144): {0: (3071, 3071), 1: (0, 3071), 2: (0, 0)},
         (1, 3073): {0: (1, 3071), 1: (0,0)}
        }

        """

        segment_info = self.ec_lib_reference.get_segment_info(
            data_len, segment_size)

        segment_size = segment_info['segment_size']

        sorted_ranges = ranges[:]
        sorted_ranges.sort(key=lambda obj: obj[0])

        recipe = {}

        for r in ranges:
            segment_map = {}
            begin_off = r[0]
            end_off = r[1]
            begin_segment = begin_off // segment_size
            end_segment = end_off // segment_size

            if begin_segment == end_segment:
                begin_relative_off = begin_off % segment_size
                end_relative_off = end_off % segment_size
                segment_map[begin_segment] = (begin_relative_off,
                                              end_relative_off)
            else:
                begin_relative_off = begin_off % segment_size
                end_relative_off = end_off % segment_size

                segment_map[begin_segment] = (begin_relative_off,
                                              segment_size - 1)

                for middle_segment in range(begin_segment + 1, end_segment):
                    segment_map[middle_segment] = (0, segment_size - 1)

                segment_map[end_segment] = (0, end_relative_off)

            recipe[r] = segment_map

        return recipe


# PyECLib Exceptions

# Generic ECDriverException
class ECDriverError(Exception):
    def __init__(self, error):
        try:
            self.error_str = str(error)
        except Exception:
            self.error_str = 'Error retrieving the error message from %s' \
                % error.__class__.__name__

    def __str__(self):
        return self.error_str


# More specific exceptions, mapped to liberasurecode error codes

# Specified EC backend is not supported by PyECLib/liberasurecode
class ECBackendNotSupported(ECDriverError):
    pass


# Unsupported EC method
class ECMethodNotImplemented(ECDriverError):
    pass


# liberasurecode backend init error
class ECBackendInitializationError(ECDriverError):
    pass


# Specified backend instance is invalid/unavailable
class ECBackendInstanceNotAvailable(ECDriverError):
    pass


# Specified backend instance is busy
class ECBackendInstanceInUse(ECDriverError):
    pass


# Invalid parameter passed to a method
class ECInvalidParameter(ECDriverError):
    pass


# Invalid or incompatible fragment metadata
class ECInvalidFragmentMetadata(ECDriverError):
    pass


# Fragment checksum verification failed
class ECBadFragmentChecksum(ECDriverError):
    pass


# Insufficient fragments specified for decode or reconstruct operation
class ECInsufficientFragments(ECDriverError):
    pass


# Out of memory
class ECOutOfMemory(ECDriverError):
    pass


# PyECLib helper for "available" EC types
ALL_EC_TYPES = [
    'jerasure_rs_vand',
    'jerasure_rs_cauchy',
    'flat_xor_hd_3',
    'flat_xor_hd_4',
    'isa_l_rs_vand',
    'shss',
    'liberasurecode_rs_vand',
    'isa_l_rs_cauchy',
    'libphazr',
]


def _PyECLibValidECTypes():
    available_ec_types = []
    for _type in ALL_EC_TYPES:
        if check_backend_available(_type):
            available_ec_types.append(_type)
    return available_ec_types


VALID_EC_TYPES = _PyECLibValidECTypes()


def _liberasurecode_version():
    version_int = get_liberasurecode_version()
    major = (version_int >> 16) & 0xff
    minor = (version_int >> 8) & 0xff
    rev = (version_int >> 0) & 0xff
    return '%d.%d.%d' % (major, minor, rev)


LIBERASURECODE_VERSION = _liberasurecode_version()