Add raid unit

New raid unit adds source for optimized xor and P+Q functions.

Signed-off-by: Greg Tucker <greg.b.tucker@intel.com>
This commit is contained in:
Greg Tucker 2016-04-26 15:55:12 -07:00
parent fce681adb4
commit d6c5e9620d
24 changed files with 4263 additions and 5 deletions

View File

@ -25,6 +25,7 @@ perf_tests32=
# Include units
include erasure_code/Makefile.am
include raid/Makefile.am
# LIB version info not necessarily the same as package version
LIBISAL_CURRENT=2

View File

@ -27,9 +27,10 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
objs = bin\ec_base.obj bin\ec_highlevel_func.obj bin\ec_multibinary.obj bin\gf_2vect_dot_prod_avx.obj bin\gf_2vect_dot_prod_avx2.obj bin\gf_2vect_dot_prod_avx512.obj bin\gf_2vect_dot_prod_sse.obj bin\gf_2vect_mad_avx.obj bin\gf_2vect_mad_avx2.obj bin\gf_2vect_mad_avx512.obj bin\gf_2vect_mad_sse.obj bin\gf_3vect_dot_prod_avx.obj bin\gf_3vect_dot_prod_avx2.obj bin\gf_3vect_dot_prod_avx512.obj bin\gf_3vect_dot_prod_sse.obj bin\gf_3vect_mad_avx.obj bin\gf_3vect_mad_avx2.obj bin\gf_3vect_mad_avx512.obj bin\gf_3vect_mad_sse.obj bin\gf_4vect_dot_prod_avx.obj bin\gf_4vect_dot_prod_avx2.obj bin\gf_4vect_dot_prod_avx512.obj bin\gf_4vect_dot_prod_sse.obj bin\gf_4vect_mad_avx.obj bin\gf_4vect_mad_avx2.obj bin\gf_4vect_mad_avx512.obj bin\gf_4vect_mad_sse.obj bin\gf_5vect_dot_prod_avx.obj bin\gf_5vect_dot_prod_avx2.obj bin\gf_5vect_dot_prod_sse.obj bin\gf_5vect_mad_avx.obj bin\gf_5vect_mad_avx2.obj bin\gf_5vect_mad_sse.obj bin\gf_6vect_dot_prod_avx.obj bin\gf_6vect_dot_prod_avx2.obj bin\gf_6vect_dot_prod_sse.obj bin\gf_6vect_mad_avx.obj bin\gf_6vect_mad_avx2.obj bin\gf_6vect_mad_sse.obj bin\gf_vect_dot_prod_avx.obj bin\gf_vect_dot_prod_avx2.obj bin\gf_vect_dot_prod_avx512.obj bin\gf_vect_dot_prod_sse.obj bin\gf_vect_mad_avx.obj bin\gf_vect_mad_avx2.obj bin\gf_vect_mad_avx512.obj bin\gf_vect_mad_sse.obj bin\gf_vect_mul_avx.obj bin\gf_vect_mul_sse.obj
INCLUDES = -I./ -Ierasure_code/ -Iinclude/
objs = bin\ec_base.obj bin\ec_highlevel_func.obj bin\ec_multibinary.obj bin\gf_2vect_dot_prod_avx.obj bin\gf_2vect_dot_prod_avx2.obj bin\gf_2vect_dot_prod_avx512.obj bin\gf_2vect_dot_prod_sse.obj bin\gf_2vect_mad_avx.obj bin\gf_2vect_mad_avx2.obj bin\gf_2vect_mad_avx512.obj bin\gf_2vect_mad_sse.obj bin\gf_3vect_dot_prod_avx.obj bin\gf_3vect_dot_prod_avx2.obj bin\gf_3vect_dot_prod_avx512.obj bin\gf_3vect_dot_prod_sse.obj bin\gf_3vect_mad_avx.obj bin\gf_3vect_mad_avx2.obj bin\gf_3vect_mad_avx512.obj bin\gf_3vect_mad_sse.obj bin\gf_4vect_dot_prod_avx.obj bin\gf_4vect_dot_prod_avx2.obj bin\gf_4vect_dot_prod_avx512.obj bin\gf_4vect_dot_prod_sse.obj bin\gf_4vect_mad_avx.obj bin\gf_4vect_mad_avx2.obj bin\gf_4vect_mad_avx512.obj bin\gf_4vect_mad_sse.obj bin\gf_5vect_dot_prod_avx.obj bin\gf_5vect_dot_prod_avx2.obj bin\gf_5vect_dot_prod_sse.obj bin\gf_5vect_mad_avx.obj bin\gf_5vect_mad_avx2.obj bin\gf_5vect_mad_sse.obj bin\gf_6vect_dot_prod_avx.obj bin\gf_6vect_dot_prod_avx2.obj bin\gf_6vect_dot_prod_sse.obj bin\gf_6vect_mad_avx.obj bin\gf_6vect_mad_avx2.obj bin\gf_6vect_mad_sse.obj bin\gf_vect_dot_prod_avx.obj bin\gf_vect_dot_prod_avx2.obj bin\gf_vect_dot_prod_avx512.obj bin\gf_vect_dot_prod_sse.obj bin\gf_vect_mad_avx.obj bin\gf_vect_mad_avx2.obj bin\gf_vect_mad_avx512.obj bin\gf_vect_mad_sse.obj bin\gf_vect_mul_avx.obj bin\gf_vect_mul_sse.obj bin\pq_check_sse.obj bin\pq_gen_avx.obj bin\pq_gen_avx2.obj bin\pq_gen_sse.obj bin\raid_base.obj bin\raid_multibinary.obj bin\xor_check_sse.obj bin\xor_gen_avx.obj bin\xor_gen_sse.obj
INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Iinclude/
LINKFLAGS = /nologo
CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $(INCLUDES) $(D)
AFLAGS = -f win64 $(INCLUDES) $(D)
@ -53,13 +54,24 @@ isa-l.dll: $(objs)
{erasure_code}.asm.obj:
$(AS) $(AFLAGS) -o $@ $?
{raid}.c.obj:
$(CC) $(CFLAGS) /c -Fo$@ $?
{raid}.asm.obj:
$(AS) $(AFLAGS) -o $@ $?
# Examples
ex = xor_example.exe
ex: lib $(ex)
$(ex): $(@B).obj
.obj.exe:
link /out:$@ $(LINKFLAGS) isa-l.lib $?
# Check tests
checks = erasure_code_test.exe erasure_code_update_test.exe gf_inverse_test.exe gf_vect_mul_test.exe
checks = erasure_code_test.exe erasure_code_update_test.exe gf_inverse_test.exe gf_vect_mul_test.exe \
pq_check_test.exe pq_gen_test.exe xor_check_test.exe xor_gen_test.exe
checks: lib $(checks)
$(checks): $(@B).obj
@ -73,7 +85,7 @@ tests: lib $(tests)
$(tests): $(@B).obj
# Performance tests
perfs = erasure_code_base_perf.exe erasure_code_perf.exe erasure_code_sse_perf.exe erasure_code_update_perf.exe gf_2vect_dot_prod_sse_perf.exe gf_3vect_dot_prod_sse_perf.exe gf_4vect_dot_prod_sse_perf.exe gf_5vect_dot_prod_sse_perf.exe gf_6vect_dot_prod_sse_perf.exe gf_vect_dot_prod_1tbl.exe gf_vect_dot_prod_avx_perf.exe gf_vect_dot_prod_perf.exe gf_vect_dot_prod_sse_perf.exe gf_vect_mad_perf.exe gf_vect_mul_avx_perf.exe gf_vect_mul_perf.exe gf_vect_mul_sse_perf.exe
perfs = erasure_code_base_perf.exe erasure_code_perf.exe erasure_code_sse_perf.exe erasure_code_update_perf.exe gf_2vect_dot_prod_sse_perf.exe gf_3vect_dot_prod_sse_perf.exe gf_4vect_dot_prod_sse_perf.exe gf_5vect_dot_prod_sse_perf.exe gf_6vect_dot_prod_sse_perf.exe gf_vect_dot_prod_1tbl.exe gf_vect_dot_prod_avx_perf.exe gf_vect_dot_prod_perf.exe gf_vect_dot_prod_sse_perf.exe gf_vect_mad_perf.exe gf_vect_mul_avx_perf.exe gf_vect_mul_perf.exe gf_vect_mul_sse_perf.exe pq_gen_perf.exe xor_gen_perf.exe
perfs: lib $(perfs)
$(perfs): $(@B).obj

View File

@ -27,7 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
units = erasure_code
units = erasure_code raid
default: lib

302
include/raid.h Normal file
View File

@ -0,0 +1,302 @@
/**********************************************************************
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#ifndef _RAID_H_
#define _RAID_H_
/**
* @file raid.h
* @brief Interface to RAID functions - XOR and P+Q calculation.
*
* This file defines the interface to optimized XOR calculation (RAID5) or P+Q
* dual parity (RAID6). Operations are carried out on an array of pointers to
* sources and output arrays.
*/
#ifdef __cplusplus
extern "C" {
#endif
/* Multi-binary functions */
/**
* @brief Generate XOR parity vector from N sources, runs appropriate version.
*
* This function determines what instruction sets are enabled and
* selects the appropriate version at runtime.
*
* @param vects Number of source+dest vectors in array.
* @param len Length of each vector in bytes.
* @param array Array of pointers to source and dest. For XOR the dest is
* the last pointer. ie array[vects-1]. Src and dest
* pointers must be aligned to 32B.
*
* @returns 0 pass, other fail
*/
int xor_gen(int vects, int len, void **array);
/**
* @brief Checks that array has XOR parity sum of 0 across all vectors, runs appropriate version.
*
* This function determines what instruction sets are enabled and
* selects the appropriate version at runtime.
*
* @param vects Number of vectors in array.
* @param len Length of each vector in bytes.
* @param array Array of pointers to vectors. Src and dest pointers
* must be aligned to 16B.
*
* @returns 0 pass, other fail
*/
int xor_check(int vects, int len, void **array);
/**
* @brief Generate P+Q parity vectors from N sources, runs appropriate version.
*
* This function determines what instruction sets are enabled and
* selects the appropriate version at runtime.
*
* @param vects Number of source+dest vectors in array.
* @param len Length of each vector in bytes. Must be 32B aligned.
* @param array Array of pointers to source and dest. For P+Q the dest
* is the last two pointers. ie array[vects-2],
* array[vects-1]. P and Q parity vectors are
* written to these last two pointers. Src and dest
* pointers must be aligned to 32B.
*
* @returns 0 pass, other fail
*/
int pq_gen(int vects, int len, void **array);
/**
* @brief Checks that array of N sources, P and Q are consistent across all vectors, runs appropriate version.
*
* This function determines what instruction sets are enabled and
* selects the appropriate version at runtime.
*
* @param vects Number of vectors in array including P&Q.
* @param len Length of each vector in bytes. Must be 16B aligned.
* @param array Array of pointers to source and P, Q. P and Q parity
* are assumed to be the last two pointers in the array.
* All pointers must be aligned to 16B.
*
* @returns 0 pass, other fail
*/
int pq_check(int vects, int len, void **array);
/* Arch specific versions */
/**
* @brief Generate XOR parity vector from N sources.
* @requires SSE4.1
*
* @param vects Number of source+dest vectors in array.
* @param len Length of each vector in bytes.
* @param array Array of pointers to source and dest. For XOR the dest is
* the last pointer. ie array[vects-1]. Src and dest pointers
* must be aligned to 16B.
*
* @returns 0 pass, other fail
*/
int xor_gen_sse(int vects, int len, void **array);
/**
* @brief Generate XOR parity vector from N sources.
* @requires AVX
*
* @param vects Number of source+dest vectors in array.
* @param len Length of each vector in bytes.
* @param array Array of pointers to source and dest. For XOR the dest is
* the last pointer. ie array[vects-1]. Src and dest pointers
* must be aligned to 32B.
*
* @returns 0 pass, other fail
*/
int xor_gen_avx(int vects, int len, void **array);
/**
* @brief Checks that array has XOR parity sum of 0 across all vectors.
* @requires SSE4.1
*
* @param vects Number of vectors in array.
* @param len Length of each vector in bytes.
* @param array Array of pointers to vectors. Src and dest pointers
* must be aligned to 16B.
*
* @returns 0 pass, other fail
*/
int xor_check_sse(int vects, int len, void **array);
/**
* @brief Generate P+Q parity vectors from N sources.
* @requires SSE4.1
*
* @param vects Number of source+dest vectors in array.
* @param len Length of each vector in bytes. Must be 16B aligned.
* @param array Array of pointers to source and dest. For P+Q the dest
* is the last two pointers. ie array[vects-2],
* array[vects-1]. P and Q parity vectors are
* written to these last two pointers. Src and dest
* pointers must be aligned to 16B.
*
* @returns 0 pass, other fail
*/
int pq_gen_sse(int vects, int len, void **array);
/**
* @brief Generate P+Q parity vectors from N sources.
* @requires AVX
*
* @param vects Number of source+dest vectors in array.
* @param len Length of each vector in bytes. Must be 16B aligned.
* @param array Array of pointers to source and dest. For P+Q the dest
* is the last two pointers. ie array[vects-2],
* array[vects-1]. P and Q parity vectors are
* written to these last two pointers. Src and dest
* pointers must be aligned to 16B.
*
* @returns 0 pass, other fail
*/
int pq_gen_avx(int vects, int len, void **array);
/**
* @brief Generate P+Q parity vectors from N sources.
* @requires AVX2
*
* @param vects Number of source+dest vectors in array.
* @param len Length of each vector in bytes. Must be 32B aligned.
* @param array Array of pointers to source and dest. For P+Q the dest
* is the last two pointers. ie array[vects-2],
* array[vects-1]. P and Q parity vectors are
* written to these last two pointers. Src and dest
* pointers must be aligned to 32B.
*
* @returns 0 pass, other fail
*/
int pq_gen_avx2(int vects, int len, void **array);
/**
* @brief Checks that array of N sources, P and Q are consistent across all vectors.
* @requires SSE4.1
*
* @param vects Number of vectors in array including P&Q.
* @param len Length of each vector in bytes. Must be 16B aligned.
* @param array Array of pointers to source and P, Q. P and Q parity
are assumed to be the last two pointers in the array.
All pointers must be aligned to 16B.
* @returns 0 pass, other fail
*/
int pq_check_sse(int vects, int len, void **array);
/**
* @brief Generate P+Q parity vectors from N sources, runs baseline version.
* @param vects Number of source+dest vectors in array.
* @param len Length of each vector in bytes. Must be 16B aligned.
* @param array Array of pointers to source and dest. For P+Q the dest
* is the last two pointers. ie array[vects-2],
* array[vects-1]. P and Q parity vectors are
* written to these last two pointers. Src and dest pointers
* must be aligned to 16B.
*
* @returns 0 pass, other fail
*/
int pq_gen_base(int vects, int len, void **array);
/**
* @brief Generate XOR parity vector from N sources, runs baseline version.
* @param vects Number of source+dest vectors in array.
* @param len Length of each vector in bytes.
* @param array Array of pointers to source and dest. For XOR the dest is
* the last pointer. ie array[vects-1]. Src and dest pointers
* must be aligned to 32B.
*
* @returns 0 pass, other fail
*/
int xor_gen_base(int vects, int len, void **array);
/**
* @brief Checks that array has XOR parity sum of 0 across all vectors, runs baseline version.
*
* @param vects Number of vectors in array.
* @param len Length of each vector in bytes.
* @param array Array of pointers to vectors. Src and dest pointers
* must be aligned to 16B.
*
* @returns 0 pass, other fail
*/
int xor_check_base(int vects, int len, void **array);
/**
* @brief Checks that array of N sources, P and Q are consistent across all vectors, runs baseline version.
*
* @param vects Number of vectors in array including P&Q.
* @param len Length of each vector in bytes. Must be 16B aligned.
* @param array Array of pointers to source and P, Q. P and Q parity
* are assumed to be the last two pointers in the array.
* All pointers must be aligned to 16B.
*
* @returns 0 pass, other fail
*/
int pq_check_base(int vects, int len, void **array);
#ifdef __cplusplus
}
#endif
#endif //_RAID_H_

View File

@ -54,3 +54,18 @@ gf_vect_mul @50
ec_encode_data_update @51
gf_vect_dot_prod @52
gf_vect_mad @53
xor_gen @54
xor_check @55
pq_gen @56
pq_check @57
xor_gen_sse @58
xor_gen_avx @59
xor_check_sse @60
pq_gen_sse @61
pq_gen_avx @62
pq_gen_avx2 @63
pq_check_sse @64
pq_gen_base @65
xor_gen_base @66
xor_check_base @67
pq_check_base @68

45
raid/Makefile.am Normal file
View File

@ -0,0 +1,45 @@
########################################################################
# Copyright(c) 2011-2015 Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
lsrc += raid/xor_gen_sse.asm raid/pq_gen_sse.asm raid/xor_check_sse.asm \
raid/pq_check_sse.asm raid/pq_gen_avx.asm \
raid/xor_gen_avx.asm raid/pq_gen_avx2.asm \
raid/raid_base.c raid/raid_multibinary.asm
extern_hdrs += include/raid.h
other_src += include/test.h include/types.h
check_tests += raid/xor_gen_test raid/pq_gen_test raid/xor_check_test raid/pq_check_test
perf_tests += raid/xor_gen_perf raid/pq_gen_perf
examples += raid/xor_example
lsrc32 += xor_gen_sse.asm pq_gen_sse_i32.asm xor_check_sse.asm pq_check_sse_i32.asm raid_base.c

277
raid/pq_check_sse.asm Normal file
View File

@ -0,0 +1,277 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Optimized pq of N source vectors using SSE3
;;; int pq_check_sse(int vects, int len, void **array)
;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
;;; (**array). Last two pointers are the P and Q destinations respectively.
;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
%include "reg_sizes.asm"
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp3 arg4
%define return rax
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
%define arg2 r8
%define arg3 r9
%define tmp r11
%define tmp3 r10
%define return rax
%define stack_size 7*16 + 8 ; must be an odd multiple of 8
%define func(x) proc_frame x
%macro FUNC_SAVE 0
alloc_stack stack_size
save_xmm128 xmm6, 0*16
save_xmm128 xmm7, 1*16
save_xmm128 xmm8, 2*16
save_xmm128 xmm9, 3*16
save_xmm128 xmm10, 4*16
save_xmm128 xmm11, 5*16
save_xmm128 xmm15, 6*16
end_prolog
%endmacro
%macro FUNC_RESTORE 0
movdqa xmm6, [rsp + 0*16]
movdqa xmm7, [rsp + 1*16]
movdqa xmm8, [rsp + 2*16]
movdqa xmm9, [rsp + 3*16]
movdqa xmm10, [rsp + 4*16]
movdqa xmm11, [rsp + 5*16]
movdqa xmm15, [rsp + 9*16]
add rsp, stack_size
%endmacro
%endif
%define vec arg0
%define len arg1
%define ptr arg3
%define pos return
%define xp1 xmm0
%define xq1 xmm1
%define xtmp1 xmm2
%define xs1 xmm3
%define xp2 xmm4
%define xq2 xmm5
%define xtmp2 xmm6
%define xs2 xmm7
%define xp3 xmm8
%define xq3 xmm9
%define xtmp3 xmm10
%define xs3 xmm11
%define xpoly xmm15
;;; Use Non-temporal load/stor
%ifdef NO_NT_LDST
%define XLDR movdqa
%define XSTR movdqa
%else
%define XLDR movdqa
%define XSTR movntdq
%endif
default rel
[bits 64]
section .text
align 16
global pq_check_sse:function
func(pq_check_sse)
FUNC_SAVE
sub vec, 3 ;Keep as offset to last source
jng return_fail ;Must have at least 2 sources
cmp len, 0
je return_pass
test len, (16-1) ;Check alignment of length
jnz return_fail
mov pos, 0
movdqa xpoly, [poly]
cmp len, 48
jl loop16
len_aligned_32bytes:
sub len, 48 ;Do end of vec first and run backward
loop48:
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
XLDR xp2, [ptr+pos+16] ;Initialize xp2 with P2 src + 16B ahead
XLDR xp3, [ptr+pos+32] ;Initialize xp3 with P2 src + 32B ahead
pxor xq1, xq1 ;q1 = 0
pxor xq2, xq2 ;q2 = 0
pxor xq3, xq3 ;q3 = 0
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
XLDR xs3, [ptr+pos+32] ;Preload last vector (source)
next_vect:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
pxor xp1, xs1 ; p1 ^= s1
pxor xp2, xs2 ; p2 ^= s2
pxor xp3, xs3 ; p3 ^= s2
pxor xq1, xs1 ; q1 ^= s1
pxor xq2, xs2 ; q2 ^= s2
pxor xq3, xs3 ; q3 ^= s3
pxor xtmp1, xtmp1 ; xtmp1 = 0 - for compare to 0
pxor xtmp2, xtmp2 ; xtmp2 = 0
pxor xtmp3, xtmp3 ; xtmp3 = 0
pcmpgtb xtmp1, xq1 ; xtmp1 = mask 0xff or 0x00 if bit7 set
pcmpgtb xtmp2, xq2 ; xtmp2 = mask 0xff or 0x00 if bit7 set
pcmpgtb xtmp3, xq3 ; xtmp3 = mask 0xff or 0x00 if bit7 set
pand xtmp1, xpoly ; xtmp1 = poly or 0x00
pand xtmp2, xpoly ; xtmp2 = poly or 0x00
pand xtmp3, xpoly ; xtmp3 = poly or 0x00
XLDR xs1, [ptr+pos] ; Get next vector (source data1)
XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
XLDR xs3, [ptr+pos+32] ; Get next vector (source data3)
paddb xq1, xq1 ; q1 = q1<<1
paddb xq2, xq2 ; q2 = q2<<1
paddb xq3, xq3 ; q3 = q3<<1
pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
pxor xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
jg next_vect ; Loop for each vect except 0
pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
pxor xq1, xs1 ;q1 ^= 1 * s1[0]
pxor xp2, xs2 ;p2 ^= s2[0]
pxor xq2, xs2 ;q2 ^= 1 * s2[0]
pxor xp3, xs3 ;p3 ^= s3[0]
pxor xq3, xs3 ;q3 ^= 1 * s3[0]
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
XLDR xtmp1, [tmp+pos] ;re-init xq1 with Q1 src
XLDR xtmp2, [tmp+pos+16] ;re-init xq2 with Q2 src + 16B ahead
XLDR xtmp3, [tmp+pos+32] ;re-init xq3 with Q2 src + 32B ahead
pxor xq1, xtmp1 ;xq1 = q1 calculated ^ q1 saved
pxor xq2, xtmp2
pxor xq3, xtmp3
por xp1, xq1 ;Confirm that all P&Q parity are 0
por xp1, xp2
por xp1, xq2
por xp1, xp3
por xp1, xq3
ptest xp1, xp1
jnz return_fail
add pos, 48
cmp pos, len
jle loop48
;; ------------------------------
;; Do last 16 or 32 Bytes remaining
add len, 48
cmp pos, len
je return_pass
loop16:
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
pxor xq1, xq1 ;q = 0
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
next_vect16:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
pxor xq1, xs1 ; q ^= s
pxor xtmp1, xtmp1 ; xtmp = 0
pcmpgtb xtmp1, xq1 ; xtmp = mask 0xff or 0x00 if bit7 set
pand xtmp1, xpoly ; xtmp = poly or 0x00
pxor xp1, xs1 ; p ^= s
paddb xq1, xq1 ; q = q<<1
pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
XLDR xs1, [ptr+pos] ; Get next vector (source data)
jg next_vect16 ; Loop for each vect except 0
pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
pxor xq1, xs1 ;q ^= 1 * s[0]
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
XLDR xtmp1, [tmp+pos] ;re-init tmp with Q1 src
pxor xq1, xtmp1 ;xq1 = q1 calculated ^ q1 saved
por xp1, xq1 ;Confirm that all P&Q parity are = 0
ptest xp1, xp1
jnz return_fail
add pos, 16
cmp pos, len
jl loop16
return_pass:
mov return, 0
FUNC_RESTORE
ret
return_fail:
mov return, 1
FUNC_RESTORE
ret
endproc_frame
section .data
align 16
poly:
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
;;; func core, ver, snum
slversion pq_check_sse, 00, 06, 0033

282
raid/pq_check_sse_i32.asm Normal file
View File

@ -0,0 +1,282 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Optimized pq of N source vectors using SSE3
;;; int pq_gen_sse(int vects, int len, void **array)
;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
;;; (**array). Last two pointers are the P and Q destinations respectively.
;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
%include "reg_sizes.asm"
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define return rax
%define PS 8
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
%define arg2 r8
%define arg3 r9
%define return rax
%define PS 8
%define tmp r11
%define stack_size 2*16 + 8 ; must be an odd multiple of 8
%define func(x) proc_frame x
%macro FUNC_SAVE 0
alloc_stack stack_size
save_xmm128 xmm6, 0*16
save_xmm128 xmm7, 1*16
end_prolog
%endmacro
%macro FUNC_RESTORE 0
movdqa xmm6, [rsp + 0*16]
movdqa xmm7, [rsp + 1*16]
add rsp, stack_size
%endmacro
%elifidn __OUTPUT_FORMAT__, elf32
%define arg0 edx
%define arg1 ecx
%define return eax
%define PS 4
%define func(x) x:
%define arg(x) [ebp+8+PS*x]
%define arg2 edi ; must sav/restore
%define arg3 esi
%define tmp ebx
%macro FUNC_SAVE 0
push ebp
mov ebp, esp
push esi
push edi
push ebx
mov arg0, arg(0)
mov arg1, arg(1)
mov arg2, arg(2)
%endmacro
%macro FUNC_RESTORE 0
pop ebx
pop edi
pop esi
mov esp, ebp ;if has frame pointer?
pop ebp
%endmacro
%endif ; output formats
%define vec arg0
%define len arg1
%define ptr arg3
%define pos return
%define xp1 xmm0
%define xq1 xmm1
%define xtmp1 xmm2
%define xs1 xmm3
%define xp2 xmm4
%define xq2 xmm5
%define xtmp2 xmm6
%define xs2 xmm7
%ifidn PS,8 ; 64-bit code
default rel
[bits 64]
%define xpoly xmm15
%elifidn PS,4 ; 32-bit code
%define xpoly [poly]
%endif
;;; Use Non-temporal load/stor
%ifdef NO_NT_LDST
%define XLDR movdqa
%define XSTR movdqa
%else
%define XLDR movntdqa
%define XSTR movntdq
%endif
section .text
align 16
global pq_check_sse:function
func(pq_check_sse)
FUNC_SAVE
sub vec, 3 ;Keep as offset to last source
jng return_fail ;Must have at least 2 sources
cmp len, 0
je return_pass
test len, (16-1) ;Check alignment of length
jnz return_fail
mov pos, 0
%ifidn PS,8
movdqa xpoly, [poly] ;For 64-bit, load poly into high xmm reg
%endif
cmp len, 32
jl loop16
len_aligned_32bytes:
sub len, 32 ;Do end of vec first and run backward
loop32:
mov ptr, [arg2+PS+vec*PS] ;Get address of P parity vector
mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
XLDR xp2, [ptr+pos+16] ;Initialize xp2 with P2 src + 16B ahead
pxor xq1, xq1 ;q1 = 0
pxor xq2, xq2 ;q2 = 0
mov ptr, [arg2+vec*PS] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
next_vect:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*PS] ; get pointer to next vect
pxor xp1, xs1 ; p1 ^= s1
pxor xp2, xs2 ; p2 ^= s2
pxor xq1, xs1 ; q1 ^= s1
pxor xq2, xs2 ; q2 ^= s2
pxor xtmp1, xtmp1 ; xtmp1 = 0 - for compare to 0
pxor xtmp2, xtmp2 ; xtmp2 = 0
pcmpgtb xtmp1, xq1 ; xtmp1 = mask 0xff or 0x00 if bit7 set
pcmpgtb xtmp2, xq2 ; xtmp2 = mask 0xff or 0x00 if bit7 set
pand xtmp1, xpoly ; xtmp1 = poly or 0x00
pand xtmp2, xpoly ; xtmp2 = poly or 0x00
XLDR xs1, [ptr+pos] ; Get next vector (source data1)
XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
paddb xq1, xq1 ; q1 = q1<<1
paddb xq2, xq2 ; q2 = q2<<1
pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
jg next_vect ; Loop for each vect except 0
pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
pxor xq1, xs1 ;q1 ^= 1 * s1[0]
pxor xp2, xs2 ;p2 ^= s2[0]
pxor xq2, xs2 ;q2 ^= 1 * s2[0]
mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
XLDR xtmp1, [tmp+pos] ;re-init xq1 with Q1 src
XLDR xtmp2, [tmp+pos+16] ;re-init xq2 with Q2 src + 16B ahead
pxor xq1, xtmp1 ;xq1 = q1 calculated ^ q1 saved
pxor xq2, xtmp2
por xp1, xq1 ;Confirm that all P&Q parity are 0
por xp1, xp2
por xp1, xq2
ptest xp1, xp1
jnz return_fail
add pos, 32
cmp pos, len
jle loop32
;; ------------------------------
;; Do last 16 Bytes remaining
add len, 32
cmp pos, len
je return_pass
loop16:
mov ptr, [arg2+PS+vec*PS] ;Get address of P parity vector
mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
pxor xq1, xq1 ;q = 0
mov ptr, [arg2+vec*PS] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
next_vect16:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*PS] ; get pointer to next vect
pxor xq1, xs1 ; q ^= s
pxor xtmp1, xtmp1 ; xtmp = 0
pcmpgtb xtmp1, xq1 ; xtmp = mask 0xff or 0x00 if bit7 set
pand xtmp1, xpoly ; xtmp = poly or 0x00
pxor xp1, xs1 ; p ^= s
paddb xq1, xq1 ; q = q<<1
pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
XLDR xs1, [ptr+pos] ; Get next vector (source data)
jg next_vect16 ; Loop for each vect except 0
pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
pxor xq1, xs1 ;q ^= 1 * s[0]
mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
XLDR xtmp1, [tmp+pos] ;re-init tmp with Q1 src
pxor xq1, xtmp1 ;xq1 = q1 calculated ^ q1 saved
por xp1, xq1 ;Confirm that all P&Q parity are = 0
ptest xp1, xp1
jnz return_fail
add pos, 16
cmp pos, len
jl loop16
return_pass:
mov return, 0
FUNC_RESTORE
ret
return_fail:
mov return, 1
FUNC_RESTORE
ret
endproc_frame
section .data
align 16
poly:
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
;;; func core, ver, snum
slversion pq_check_sse, 00, 06, 0033

304
raid/pq_check_test.c Normal file
View File

@ -0,0 +1,304 @@
/**********************************************************************
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include<stdio.h>
#include<stdint.h>
#include<string.h>
#include<stdlib.h>
#include "raid.h"
#include "types.h"
#define TEST_SOURCES 16
#define TEST_LEN 1024
#define TEST_MEM ((TEST_SOURCES + 2)*(TEST_LEN))
#ifndef TEST_SEED
# define TEST_SEED 0x1234
#endif
int ref_multi_pq(int vects, int len, void **array)
{
int i, j;
unsigned char p, q, s;
unsigned char **src = (unsigned char **)array;
for (i = 0; i < len; i++) {
q = p = src[vects - 3][i];
for (j = vects - 4; j >= 0; j--) {
p ^= s = src[j][i];
q = s ^ ((q << 1) ^ ((q & 0x80) ? 0x1d : 0)); // mult by GF{2}
}
src[vects - 2][i] = p; // second to last pointer is p
src[vects - 1][i] = q; // last pointer is q
}
return 0;
}
// Generates pseudo-random data
void rand_buffer(unsigned char *buf, long buffer_size)
{
long i;
for (i = 0; i < buffer_size; i++)
buf[i] = rand();
}
int main(int argc, char *argv[])
{
int i, j, k, ret, fail = 0;
void *buffs[TEST_SOURCES + 2];
char c;
char *tmp_buf[TEST_SOURCES + 2];
int serr, lerr;
printf("Test pq_check_test %d sources X %d bytes\n", TEST_SOURCES, TEST_LEN);
srand(TEST_SEED);
// Allocate the arrays
for (i = 0; i < TEST_SOURCES + 2; i++) {
void *buf;
if (posix_memalign(&buf, 16, TEST_LEN)) {
printf("alloc error: Fail");
return 1;
}
buffs[i] = buf;
}
// Test of all zeros
for (i = 0; i < TEST_SOURCES + 2; i++)
memset(buffs[i], 0, TEST_LEN);
ref_multi_pq(TEST_SOURCES + 2, TEST_LEN, buffs);
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
if (ret != 0) {
fail++;
printf("\nfail zero test %d\n", ret);
}
((char *)(buffs[0]))[TEST_LEN - 2] = 0x7; // corrupt buffer
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
if (ret == 0) {
fail++;
printf("\nfail corrupt buffer test %d\n", ret);
}
((char *)(buffs[0]))[TEST_LEN - 2] = 0; // un-corrupt buffer
// Test corrupted buffer any location on all sources
for (j = 0; j < TEST_SOURCES + 2; j++) {
for (i = TEST_LEN - 1; i >= 0; i--) {
((char *)buffs[j])[i] = 0x5; // corrupt buffer
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
if (ret == 0) {
fail++;
printf("\nfail corrupt zero buffer test j=%d, i=%d\n", j, i);
return 1;
}
((char *)buffs[j])[i] = 0; // un-corrupt buffer
}
putchar('.');
}
// Test rand1
for (i = 0; i < TEST_SOURCES + 2; i++)
rand_buffer(buffs[i], TEST_LEN);
ref_multi_pq(TEST_SOURCES + 2, TEST_LEN, buffs);
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
if (ret != 0) {
fail++;
printf("fail first rand test %d\n", ret);
}
c = ((char *)(buffs[0]))[TEST_LEN - 2];
((char *)(buffs[0]))[TEST_LEN - 2] = c ^ 0x1;
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
if (ret == 0) {
fail++;
printf("\nFail corrupt buffer test, passed when should have failed\n");
}
((char *)(buffs[0]))[TEST_LEN - 2] = c; // un-corrupt buffer
// Test corrupted buffer any location on all sources w/ random data
for (j = 0; j < TEST_SOURCES + 2; j++) {
for (i = TEST_LEN - 1; i >= 0; i--) {
// Check it still passes
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
if (ret != 0) { // should pass
fail++;
printf
("\nFail rand test with un-corrupted buffer j=%d, i=%d\n",
j, i);
return 1;
}
c = ((char *)buffs[j])[i];
((char *)buffs[j])[i] = c ^ 1; // corrupt buffer
ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
if (ret == 0) { // Check it now fails
fail++;
printf("\nfail corrupt buffer test j=%d, i=%d\n", j, i);
return 1;
}
((char *)buffs[j])[i] = c; // un-corrupt buffer
}
putchar('.');
}
// Test various number of sources, full length
for (j = 4; j <= TEST_SOURCES + 2; j++) {
// New random data
for (i = 0; i < j; i++)
rand_buffer(buffs[i], TEST_LEN);
// Generate p,q parity for this number of sources
ref_multi_pq(j, TEST_LEN, buffs);
// Set errors up in each source and len position
for (i = 0; i < j; i++) {
for (k = 0; k < TEST_LEN; k++) {
// See if it still passes
ret = pq_check(j, TEST_LEN, buffs);
if (ret != 0) { // Should pass
printf("\nfail rand fixed len test %d sources\n", j);
fail++;
return 1;
}
c = ((char *)buffs[i])[k];
((char *)buffs[i])[k] = c ^ 1; // corrupt buffer
ret = pq_check(j, TEST_LEN, buffs);
if (ret == 0) { // Should fail
printf
("\nfail rand fixed len test corrupted buffer %d sources\n",
j);
fail++;
return 1;
}
((char *)buffs[i])[k] = c; // un-corrupt buffer
}
}
putchar('.');
}
fflush(0);
// Test various number of sources and len
k = 16;
while (k <= TEST_LEN) {
char *tmp;
for (j = 4; j <= TEST_SOURCES + 2; j++) {
for (i = 0; i < j; i++)
rand_buffer(buffs[i], k);
// Generate p,q parity for this number of sources
ref_multi_pq(j, k, buffs);
// Inject errors at various source and len positions
for (lerr = 0; lerr < k; lerr++) {
for (serr = 0; serr < j; serr++) {
// See if it still passes
ret = pq_check(j, k, buffs);
if (ret != 0) { // Should pass
printf
("\nfail rand var src, len test %d sources, len=%d\n",
j, k);
fail++;
return 1;
}
tmp = (char *)buffs[serr];
c = tmp[lerr];
((char *)buffs[serr])[lerr] = c ^ 1; // corrupt buffer
ret = pq_check(j, k, buffs);
if (ret == 0) { // Should fail
printf
("\nfail rand var src, len test corrupted buffer "
"%d sources, len=%d, ret=%d\n", j, k,
ret);
fail++;
return 1;
}
((char *)buffs[serr])[lerr] = c; // un-corrupt buffer
}
}
putchar('.');
fflush(0);
}
k += 16;
}
// Test at the end of buffer
for (i = 0; i < TEST_LEN; i += 16) {
for (j = 0; j < TEST_SOURCES + 2; j++) {
rand_buffer(buffs[j], TEST_LEN - i);
tmp_buf[j] = (char *)buffs[j] + i;
}
pq_gen_base(TEST_SOURCES + 2, TEST_LEN - i, (void *)tmp_buf);
// Test good data
ret = pq_check(TEST_SOURCES + 2, TEST_LEN - i, (void *)tmp_buf);
if (ret != 0) {
printf("fail end test - offset: %d, len: %d\n", i, TEST_LEN - i);
fail++;
return 1;
}
// Test bad data
for (serr = 0; serr < TEST_SOURCES + 2; serr++) {
for (lerr = 0; lerr < (TEST_LEN - i); lerr++) {
c = tmp_buf[serr][lerr];
tmp_buf[serr][lerr] = c ^ 1;
ret =
pq_check(TEST_SOURCES + 2, TEST_LEN - i, (void *)tmp_buf);
if (ret == 0) {
printf("fail end test corrupted buffer - "
"offset: %d, len: %d, ret: %d\n", i,
TEST_LEN - i, ret);
fail++;
return 1;
}
tmp_buf[serr][lerr] = c;
}
}
putchar('.');
fflush(0);
}
if (fail == 0)
printf("Pass\n");
return fail;
}

254
raid/pq_gen_avx.asm Normal file
View File

@ -0,0 +1,254 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Optimized pq of N source vectors using AVX
;;; int pq_gen_avx(int vects, int len, void **array)
;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
;;; (**array). Last two pointers are the P and Q destinations respectively.
;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
%include "reg_sizes.asm"
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp3 arg4
%define return rax
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
%define arg2 r8
%define arg3 r9
%define tmp r11
%define tmp3 r10
%define return rax
%define stack_size 8*16 + 8 ; must be an odd multiple of 8
%define func(x) proc_frame x
%macro FUNC_SAVE 0
alloc_stack stack_size
save_xmm128 xmm6, 0*16
save_xmm128 xmm7, 1*16
save_xmm128 xmm8, 2*16
save_xmm128 xmm9, 3*16
save_xmm128 xmm10, 4*16
save_xmm128 xmm11, 5*16
save_xmm128 xmm14, 6*16
save_xmm128 xmm15, 7*16
end_prolog
%endmacro
%macro FUNC_RESTORE 0
movdqa xmm6, [rsp + 0*16]
movdqa xmm7, [rsp + 1*16]
movdqa xmm8, [rsp + 2*16]
movdqa xmm9, [rsp + 3*16]
movdqa xmm10, [rsp + 4*16]
movdqa xmm11, [rsp + 5*16]
movdqa xmm14, [rsp + 6*16]
movdqa xmm15, [rsp + 7*16]
add rsp, stack_size
%endmacro
%endif
%define vec arg0
%define len arg1
%define ptr arg3
%define pos rax
%define xp1 xmm0
%define xq1 xmm1
%define xtmp1 xmm2
%define xs1 xmm3
%define xp2 xmm4
%define xq2 xmm5
%define xtmp2 xmm6
%define xs2 xmm7
%define xp3 xmm8
%define xq3 xmm9
%define xtmp3 xmm10
%define xs3 xmm11
%define xzero xmm14
%define xpoly xmm15
;;; Use Non-temporal load/stor
%ifdef NO_NT_LDST
%define XLDR vmovdqa
%define XSTR vmovdqa
%else
%define XLDR vmovntdqa
%define XSTR vmovntdq
%endif
default rel
[bits 64]
section .text
align 16
global pq_gen_avx:function
func(pq_gen_avx)
FUNC_SAVE
sub vec, 3 ;Keep as offset to last source
jng return_fail ;Must have at least 2 sources
cmp len, 0
je return_pass
test len, (16-1) ;Check alignment of length
jnz return_fail
mov pos, 0
vmovdqa xpoly, [poly]
vpxor xzero, xzero, xzero
cmp len, 48
jl loop16
len_aligned_32bytes:
sub len, 48 ;Len points to last block
loop48:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
XLDR xs3, [ptr+pos+32] ;Preload last vector (source)
vpxor xp1, xp1, xp1 ;p1 = 0
vpxor xp2, xp2, xp2 ;p2 = 0
vpxor xp3, xp3, xp3 ;p3 = 0
vpxor xq1, xq1, xq1 ;q1 = 0
vpxor xq2, xq2, xq2 ;q2 = 0
vpxor xq3, xq3, xq3 ;q3 = 0
next_vect:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
vpxor xq1, xq1, xs1 ; q1 ^= s1
vpxor xq2, xq2, xs2 ; q2 ^= s2
vpxor xq3, xq3, xs3 ; q3 ^= s3
vpxor xp1, xp1, xs1 ; p1 ^= s1
vpxor xp2, xp2, xs2 ; p2 ^= s2
vpxor xp3, xp3, xs3 ; p3 ^= s2
vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
vpblendvb xtmp2, xzero, xpoly, xq2 ; xtmp2 = poly or 0x00
vpblendvb xtmp3, xzero, xpoly, xq3 ; xtmp3 = poly or 0x00
XLDR xs1, [ptr+pos] ; Get next vector (source data1)
XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
XLDR xs3, [ptr+pos+32] ; Get next vector (source data3)
vpaddb xq1, xq1, xq1 ; q1 = q1<<1
vpaddb xq2, xq2, xq2 ; q2 = q2<<1
vpaddb xq3, xq3, xq3 ; q3 = q3<<1
vpxor xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
vpxor xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
vpxor xq3, xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
jg next_vect ; Loop for each vect except 0
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
vpxor xp1, xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
vpxor xq1, xq1, xs1 ;q1 ^= 1 * s1[0]
vpxor xp2, xp2, xs2 ;p2 ^= s2[0]
vpxor xq2, xq2, xs2 ;q2 ^= 1 * s2[0]
vpxor xp3, xp3, xs3 ;p3 ^= s3[0]
vpxor xq3, xq3, xs3 ;q3 ^= 1 * s3[0]
XSTR [ptr+pos], xp1 ;Write parity P1 vector
XSTR [ptr+pos+16], xp2 ;Write parity P2 vector
XSTR [ptr+pos+32], xp3 ;Write parity P3 vector
XSTR [tmp+pos], xq1 ;Write parity Q1 vector
XSTR [tmp+pos+16], xq2 ;Write parity Q2 vector
XSTR [tmp+pos+32], xq3 ;Write parity Q3 vector
add pos, 48
cmp pos, len
jle loop48
;; ------------------------------
;; Do last 16 or 32 Bytes remaining
add len, 48
cmp pos, len
je return_pass
loop16:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
vpxor xp1, xp1, xp1 ;p = 0
vpxor xq1, xq1, xq1 ;q = 0
next_vect16:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
vpxor xq1, xq1, xs1 ; q1 ^= s1
vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
vpxor xp1, xp1, xs1 ; p ^= s
vpaddb xq1, xq1, xq1 ; q = q<<1
vpxor xq1, xq1, xtmp1 ; q = q<<1 ^ poly_masked
XLDR xs1, [ptr+pos] ; Get next vector (source data)
jg next_vect16 ; Loop for each vect except 0
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
vpxor xp1, xp1, xs1 ;p ^= s[0] - last source is already loaded
vpxor xq1, xq1, xs1 ;q ^= 1 * s[0]
XSTR [ptr+pos], xp1 ;Write parity P vector
XSTR [tmp+pos], xq1 ;Write parity Q vector
add pos, 16
cmp pos, len
jl loop16
return_pass:
mov return, 0
FUNC_RESTORE
ret
return_fail:
mov return, 1
FUNC_RESTORE
ret
endproc_frame
section .data
align 16
poly:
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
;;; func core, ver, snum
slversion pq_gen_avx, 02, 0a, 0039

256
raid/pq_gen_avx2.asm Normal file
View File

@ -0,0 +1,256 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Optimized pq of N source vectors using AVX
;;; int pq_gen_avx(int vects, int len, void **array)
;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
;;; (**array). Last two pointers are the P and Q destinations respectively.
;;; Vectors must be aligned to 32 bytes. Length must be 32 byte aligned.
%include "reg_sizes.asm"
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp3 arg4
%define return rax
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
%define arg2 r8
%define arg3 r9
%define tmp r11
%define tmp3 r10
%define return rax
%define stack_size 8*32 + 8 ; must be an odd multiple of 8
%define func(x) proc_frame x
%macro FUNC_SAVE 0
alloc_stack stack_size
;; Until a sav_ymm256 is defined
vmovdqu [rsp + 0*32], ymm6
vmovdqu [rsp + 1*32], ymm7
vmovdqu [rsp + 2*32], ymm8
vmovdqu [rsp + 3*32], ymm9
vmovdqu [rsp + 4*32], ymm10
vmovdqu [rsp + 5*32], ymm11
vmovdqu [rsp + 6*32], ymm14
vmovdqu [rsp + 7*32], ymm15
end_prolog
%endmacro
%macro FUNC_RESTORE 0
vmovdqu ymm6, [rsp + 0*32]
vmovdqu ymm7, [rsp + 1*32]
vmovdqu ymm8, [rsp + 2*32]
vmovdqu ymm9, [rsp + 3*32]
vmovdqu ymm10, [rsp + 4*32]
vmovdqu ymm11, [rsp + 5*32]
vmovdqu ymm14, [rsp + 6*32]
vmovdqu ymm15, [rsp + 7*32]
add rsp, stack_size
%endmacro
%endif
%define vec arg0
%define len arg1
%define ptr arg3
%define pos rax
%define xp1 ymm0
%define xq1 ymm1
%define xtmp1 ymm2
%define xs1 ymm3
%define xp2 ymm4
%define xq2 ymm5
%define xtmp2 ymm6
%define xs2 ymm7
%define xp3 ymm8
%define xq3 ymm9
%define xtmp3 ymm10
%define xs3 ymm11
%define xzero ymm14
%define xpoly ymm15
;;; Use Non-temporal load/stor
%ifdef NO_NT_LDST
%define XLDR vmovdqa
%define XSTR vmovdqa
%else
%define XLDR vmovntdqa
%define XSTR vmovntdq
%endif
default rel
[bits 64]
section .text
align 16
global pq_gen_avx2:function
func(pq_gen_avx2)
FUNC_SAVE
sub vec, 3 ;Keep as offset to last source
jng return_fail ;Must have at least 2 sources
cmp len, 0
je return_pass
test len, (32-1) ;Check alignment of length
jnz return_fail
mov pos, 0
vmovdqa xpoly, [poly]
vpxor xzero, xzero, xzero
cmp len, 96
jl loop32
len_aligned_32bytes:
sub len, 3*32 ;Len points to last block
loop96:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
XLDR xs2, [ptr+pos+32] ;Preload last vector (source)
XLDR xs3, [ptr+pos+64] ;Preload last vector (source)
vpxor xp1, xp1, xp1 ;p1 = 0
vpxor xp2, xp2, xp2 ;p2 = 0
vpxor xp3, xp3, xp3 ;p3 = 0
vpxor xq1, xq1, xq1 ;q1 = 0
vpxor xq2, xq2, xq2 ;q2 = 0
vpxor xq3, xq3, xq3 ;q3 = 0
next_vect:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
vpxor xq1, xq1, xs1 ; q1 ^= s1
vpxor xq2, xq2, xs2 ; q2 ^= s2
vpxor xq3, xq3, xs3 ; q3 ^= s3
vpxor xp1, xp1, xs1 ; p1 ^= s1
vpxor xp2, xp2, xs2 ; p2 ^= s2
vpxor xp3, xp3, xs3 ; p3 ^= s2
vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
vpblendvb xtmp2, xzero, xpoly, xq2 ; xtmp2 = poly or 0x00
vpblendvb xtmp3, xzero, xpoly, xq3 ; xtmp3 = poly or 0x00
XLDR xs1, [ptr+pos] ; Get next vector (source data1)
XLDR xs2, [ptr+pos+32] ; Get next vector (source data2)
XLDR xs3, [ptr+pos+64] ; Get next vector (source data3)
vpaddb xq1, xq1, xq1 ; q1 = q1<<1
vpaddb xq2, xq2, xq2 ; q2 = q2<<1
vpaddb xq3, xq3, xq3 ; q3 = q3<<1
vpxor xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
vpxor xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
vpxor xq3, xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
jg next_vect ; Loop for each vect except 0
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
vpxor xp1, xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
vpxor xq1, xq1, xs1 ;q1 ^= 1 * s1[0]
vpxor xp2, xp2, xs2 ;p2 ^= s2[0]
vpxor xq2, xq2, xs2 ;q2 ^= 1 * s2[0]
vpxor xp3, xp3, xs3 ;p3 ^= s3[0]
vpxor xq3, xq3, xs3 ;q3 ^= 1 * s3[0]
XSTR [ptr+pos], xp1 ;Write parity P1 vector
XSTR [ptr+pos+32], xp2 ;Write parity P2 vector
XSTR [ptr+pos+64], xp3 ;Write parity P3 vector
XSTR [tmp+pos], xq1 ;Write parity Q1 vector
XSTR [tmp+pos+32], xq2 ;Write parity Q2 vector
XSTR [tmp+pos+64], xq3 ;Write parity Q3 vector
add pos, 3*32
cmp pos, len
jle loop96
;; ------------------------------
;; Do last 16 or 32 Bytes remaining
add len, 3*32
cmp pos, len
je return_pass
loop32:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
vpxor xp1, xp1, xp1 ;p = 0
vpxor xq1, xq1, xq1 ;q = 0
next_vect32:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
vpxor xq1, xq1, xs1 ; q1 ^= s1
vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
vpxor xp1, xp1, xs1 ; p ^= s
vpaddb xq1, xq1, xq1 ; q = q<<1
vpxor xq1, xq1, xtmp1 ; q = q<<1 ^ poly_masked
XLDR xs1, [ptr+pos] ; Get next vector (source data)
jg next_vect32 ; Loop for each vect except 0
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
vpxor xp1, xp1, xs1 ;p ^= s[0] - last source is already loaded
vpxor xq1, xq1, xs1 ;q ^= 1 * s[0]
XSTR [ptr+pos], xp1 ;Write parity P vector
XSTR [tmp+pos], xq1 ;Write parity Q vector
add pos, 32
cmp pos, len
jl loop32
return_pass:
mov return, 0
FUNC_RESTORE
ret
return_fail:
mov return, 1
FUNC_RESTORE
ret
endproc_frame
section .data
align 32
poly:
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
;;; func core, ver, snum
slversion pq_gen_avx2, 04, 03, 0041

97
raid/pq_gen_perf.c Normal file
View File

@ -0,0 +1,97 @@
/**********************************************************************
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include<stdio.h>
#include<stdint.h>
#include<string.h>
#include<stdlib.h>
#include<sys/time.h>
#include "raid.h"
#include "test.h"
//#define CACHED_TEST
#ifdef CACHED_TEST
// Cached test, loop many times over small dataset
# define TEST_SOURCES 10
# define TEST_LEN 8*1024
# define TEST_LOOPS 40000
# define TEST_TYPE_STR "_warm"
#else
# ifndef TEST_CUSTOM
// Uncached test. Pull from large mem base.
# define TEST_SOURCES 10
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
# define TEST_LOOPS 1000
# define TEST_TYPE_STR "_cold"
# else
# define TEST_TYPE_STR "_cus"
# ifndef TEST_LOOPS
# define TEST_LOOPS 1000
# endif
# endif
#endif
#define TEST_MEM ((TEST_SOURCES + 2)*(TEST_LEN))
int main(int argc, char *argv[])
{
int i;
void *buffs[TEST_SOURCES + 2];
struct perf start, stop;
printf("Test pq_gen_perf %d sources X %d bytes\n", TEST_SOURCES, TEST_LEN);
// Allocate the arrays
for (i = 0; i < TEST_SOURCES + 2; i++) {
int ret;
void *buf;
ret = posix_memalign(&buf, 32, TEST_LEN);
if (ret) {
printf("alloc error: Fail");
return 1;
}
buffs[i] = buf;
}
// Setup data
for (i = 0; i < TEST_SOURCES + 2; i++)
memset(buffs[i], 0, TEST_LEN);
// Warm up
pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++)
pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs);
perf_stop(&stop);
printf("pq_gen" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_MEM * i);
return 0;
}

258
raid/pq_gen_sse.asm Normal file
View File

@ -0,0 +1,258 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Optimized pq of N source vectors using SSE3
;;; int pq_gen_sse(int vects, int len, void **array)
;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
;;; (**array). Last two pointers are the P and Q destinations respectively.
;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
%include "reg_sizes.asm"
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp3 arg4
%define return rax
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
%define arg2 r8
%define arg3 r9
%define tmp r11
%define tmp3 r10
%define return rax
%define stack_size 7*16 + 8 ; must be an odd multiple of 8
%define func(x) proc_frame x
%macro FUNC_SAVE 0
alloc_stack stack_size
save_xmm128 xmm6, 0*16
save_xmm128 xmm7, 1*16
save_xmm128 xmm8, 2*16
save_xmm128 xmm9, 3*16
save_xmm128 xmm10, 4*16
save_xmm128 xmm11, 5*16
save_xmm128 xmm15, 6*16
end_prolog
%endmacro
%macro FUNC_RESTORE 0
movdqa xmm6, [rsp + 0*16]
movdqa xmm7, [rsp + 1*16]
movdqa xmm8, [rsp + 2*16]
movdqa xmm9, [rsp + 3*16]
movdqa xmm10, [rsp + 4*16]
movdqa xmm11, [rsp + 5*16]
movdqa xmm15, [rsp + 6*16]
add rsp, stack_size
%endmacro
%endif
%define vec arg0
%define len arg1
%define ptr arg3
%define pos rax
%define xp1 xmm0
%define xq1 xmm1
%define xtmp1 xmm2
%define xs1 xmm3
%define xp2 xmm4
%define xq2 xmm5
%define xtmp2 xmm6
%define xs2 xmm7
%define xp3 xmm8
%define xq3 xmm9
%define xtmp3 xmm10
%define xs3 xmm11
%define xpoly xmm15
;;; Use Non-temporal load/stor
%ifdef NO_NT_LDST
%define XLDR movdqa
%define XSTR movdqa
%else
%define XLDR movntdqa
%define XSTR movntdq
%endif
default rel
[bits 64]
section .text
align 16
global pq_gen_sse:function
func(pq_gen_sse)
FUNC_SAVE
sub vec, 3 ;Keep as offset to last source
jng return_fail ;Must have at least 2 sources
cmp len, 0
je return_pass
test len, (16-1) ;Check alignment of length
jnz return_fail
mov pos, 0
movdqa xpoly, [poly]
cmp len, 48
jl loop16
len_aligned_32bytes:
sub len, 48 ;Len points to last block
loop48:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
XLDR xs3, [ptr+pos+32] ;Preload last vector (source)
pxor xp1, xp1 ;p1 = 0
pxor xp2, xp2 ;p2 = 0
pxor xp3, xp3 ;p3 = 0
pxor xq1, xq1 ;q1 = 0
pxor xq2, xq2 ;q2 = 0
pxor xq3, xq3 ;q3 = 0
next_vect:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
pxor xq1, xs1 ; q1 ^= s1
pxor xq2, xs2 ; q2 ^= s2
pxor xq3, xs3 ; q3 ^= s3
pxor xp1, xs1 ; p1 ^= s1
pxor xp2, xs2 ; p2 ^= s2
pxor xp3, xs3 ; p3 ^= s2
pxor xtmp1, xtmp1 ; xtmp1 = 0 - for compare to 0
pxor xtmp2, xtmp2 ; xtmp2 = 0
pxor xtmp3, xtmp3 ; xtmp3 = 0
pcmpgtb xtmp1, xq1 ; xtmp1 = mask 0xff or 0x00 if bit7 set
pcmpgtb xtmp2, xq2 ; xtmp2 = mask 0xff or 0x00 if bit7 set
pcmpgtb xtmp3, xq3 ; xtmp3 = mask 0xff or 0x00 if bit7 set
pand xtmp1, xpoly ; xtmp1 = poly or 0x00
pand xtmp2, xpoly ; xtmp2 = poly or 0x00
pand xtmp3, xpoly ; xtmp3 = poly or 0x00
XLDR xs1, [ptr+pos] ; Get next vector (source data1)
XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
XLDR xs3, [ptr+pos+32] ; Get next vector (source data3)
paddb xq1, xq1 ; q1 = q1<<1
paddb xq2, xq2 ; q2 = q2<<1
paddb xq3, xq3 ; q3 = q3<<1
pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
pxor xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
jg next_vect ; Loop for each vect except 0
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
pxor xq1, xs1 ;q1 ^= 1 * s1[0]
pxor xp2, xs2 ;p2 ^= s2[0]
pxor xq2, xs2 ;q2 ^= 1 * s2[0]
pxor xp3, xs3 ;p3 ^= s3[0]
pxor xq3, xs3 ;q3 ^= 1 * s3[0]
XSTR [ptr+pos], xp1 ;Write parity P1 vector
XSTR [ptr+pos+16], xp2 ;Write parity P2 vector
XSTR [ptr+pos+32], xp3 ;Write parity P3 vector
XSTR [tmp+pos], xq1 ;Write parity Q1 vector
XSTR [tmp+pos+16], xq2 ;Write parity Q2 vector
XSTR [tmp+pos+32], xq3 ;Write parity Q3 vector
add pos, 48
cmp pos, len
jle loop48
;; ------------------------------
;; Do last 16 or 32 Bytes remaining
add len, 48
cmp pos, len
je return_pass
loop16:
mov ptr, [arg2+vec*8] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
pxor xp1, xp1 ;p = 0
pxor xq1, xq1 ;q = 0
next_vect16:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*8] ; get pointer to next vect
pxor xq1, xs1 ; q1 ^= s1
pxor xtmp1, xtmp1 ; xtmp = 0
pcmpgtb xtmp1, xq1 ; xtmp = mask 0xff or 0x00 if bit7 set
pand xtmp1, xpoly ; xtmp = poly or 0x00
pxor xp1, xs1 ; p ^= s
paddb xq1, xq1 ; q = q<<1
pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
XLDR xs1, [ptr+pos] ; Get next vector (source data)
jg next_vect16 ; Loop for each vect except 0
mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
pxor xq1, xs1 ;q ^= 1 * s[0]
XSTR [ptr+pos], xp1 ;Write parity P vector
XSTR [tmp+pos], xq1 ;Write parity Q vector
add pos, 16
cmp pos, len
jl loop16
return_pass:
mov return, 0
FUNC_RESTORE
ret
return_fail:
mov return, 1
FUNC_RESTORE
ret
endproc_frame
section .data
align 16
poly:
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
;;; func core, ver, snum
slversion pq_gen_sse, 00, 09, 0032

264
raid/pq_gen_sse_i32.asm Normal file
View File

@ -0,0 +1,264 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Optimized pq of N source vectors using SSE3
;;; int pq_gen_sse(int vects, int len, void **array)
;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
;;; (**array). Last two pointers are the P and Q destinations respectively.
;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
%include "reg_sizes.asm"
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define return rax
%define PS 8
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
%define arg2 r8
%define arg3 r9
%define return rax
%define PS 8
%define tmp r10
%define stack_size 2*16 + 8 ; must be an odd multiple of 8
%define func(x) proc_frame x
%macro FUNC_SAVE 0
alloc_stack stack_size
save_xmm128 xmm6, 0*16
save_xmm128 xmm7, 1*16
end_prolog
%endmacro
%macro FUNC_RESTORE 0
movdqa xmm6, [rsp + 0*16]
movdqa xmm7, [rsp + 1*16]
add rsp, stack_size
%endmacro
%elifidn __OUTPUT_FORMAT__, elf32
%define arg0 edx
%define arg1 ecx
%define return eax
%define PS 4
%define func(x) x:
%define arg(x) [ebp+8+PS*x]
%define arg2 edi ; must sav/restore
%define arg3 esi
%define tmp ebx
%macro FUNC_SAVE 0
push ebp
mov ebp, esp
push esi
push edi
push ebx
mov arg0, arg(0)
mov arg1, arg(1)
mov arg2, arg(2)
%endmacro
%macro FUNC_RESTORE 0
pop ebx
pop edi
pop esi
mov esp, ebp ;if has frame pointer?
pop ebp
%endmacro
%endif ; output formats
%define vec arg0
%define len arg1
%define ptr arg3
%define pos return
%define xp1 xmm0
%define xq1 xmm1
%define xtmp1 xmm2
%define xs1 xmm3
%define xp2 xmm4
%define xq2 xmm5
%define xtmp2 xmm6
%define xs2 xmm7
%ifidn PS,8 ; 64-bit code
default rel
[bits 64]
%define xpoly xmm15
%elifidn PS,4 ; 32-bit code
%define xpoly [poly]
%endif
;;; Use Non-temporal load/stor
%ifdef NO_NT_LDST
%define XLDR movdqa
%define XSTR movdqa
%else
%define XLDR movntdqa
%define XSTR movntdq
%endif
section .text
align 16
global pq_gen_sse:function
func(pq_gen_sse)
FUNC_SAVE
sub vec, 3 ;Keep as offset to last source
jng return_fail ;Must have at least 2 sources
cmp len, 0
je return_pass
test len, (16-1) ;Check alignment of length
jnz return_fail
mov pos, 0
%ifidn PS,8
movdqa xpoly, [poly] ;For 64-bit, load poly into high xmm reg
%endif
cmp len, 32
jl loop16
len_aligned_32bytes:
sub len, 32 ;Do end of vec first and run backward
loop32:
mov ptr, [arg2+vec*PS] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
pxor xp1, xp1 ;p1 = 0
pxor xq1, xq1 ;q1 = 0
pxor xp2, xp2 ;p2 = 0
pxor xq2, xq2 ;q2 = 0
next_vect:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*PS] ; get pointer to next vect
pxor xq1, xs1 ; q1 ^= s1
pxor xq2, xs2 ; q2 ^= s2
pxor xp1, xs1 ; p1 ^= s1
pxor xp2, xs2 ; p2 ^= s2
pxor xtmp1, xtmp1 ; xtmp1 = 0 - for compare to 0
pxor xtmp2, xtmp2 ; xtmp2 = 0
pcmpgtb xtmp1, xq1 ; xtmp1 = mask 0xff or 0x00 if bit7 set
pcmpgtb xtmp2, xq2 ; xtmp2 = mask 0xff or 0x00 if bit7 set
pand xtmp1, xpoly ; xtmp1 = poly or 0x00
pand xtmp2, xpoly ; xtmp2 = poly or 0x00
XLDR xs1, [ptr+pos] ; Get next vector (source data1)
XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
paddb xq1, xq1 ; q1 = q1<<1
paddb xq2, xq2 ; q2 = q2<<1
pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
jg next_vect ; Loop for each vect except 0
mov ptr, [arg2+PS+vec*PS] ;Get address of P parity vector
mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
pxor xq1, xs1 ;q1 ^= 1 * s1[0]
pxor xp2, xs2 ;p2 ^= s2[0]
pxor xq2, xs2 ;q2 ^= 1 * s2[0]
XSTR [ptr+pos], xp1 ;Write parity P1 vector
XSTR [ptr+pos+16], xp2 ;Write parity P2 vector
XSTR [tmp+pos], xq1 ;Write parity Q1 vector
XSTR [tmp+pos+16], xq2 ;Write parity Q2 vector
add pos, 32
cmp pos, len
jle loop32
;; ------------------------------
;; Do last 16 Bytes remaining
add len, 32
cmp pos, len
je return_pass
loop16:
mov ptr, [arg2+vec*PS] ;Fetch last source pointer
mov tmp, vec ;Set tmp to point back to last vector
XLDR xs1, [ptr+pos] ;Preload last vector (source)
pxor xp1, xp1 ;p = 0
pxor xq1, xq1 ;q = 0
next_vect16:
sub tmp, 1 ;Inner loop for each source vector
mov ptr, [arg2+tmp*PS] ; get pointer to next vect
pxor xq1, xs1 ; q1 ^= s1
pxor xtmp1, xtmp1 ; xtmp = 0
pcmpgtb xtmp1, xq1 ; xtmp = mask 0xff or 0x00 if bit7 set
pand xtmp1, xpoly ; xtmp = poly or 0x00
pxor xp1, xs1 ; p ^= s
paddb xq1, xq1 ; q = q<<1
pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
XLDR xs1, [ptr+pos] ; Get next vector (source data)
jg next_vect16 ; Loop for each vect except 0
mov ptr, [arg2+PS+vec*PS] ;Get address of P parity vector
mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
pxor xq1, xs1 ;q ^= 1 * s[0]
XSTR [ptr+pos], xp1 ;Write parity P vector
XSTR [tmp+pos], xq1 ;Write parity Q vector
add pos, 16
cmp pos, len
jl loop16
return_pass:
mov return, 0
FUNC_RESTORE
ret
return_fail:
mov return, 1
FUNC_RESTORE
ret
endproc_frame
section .data
align 16
poly:
dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
;;; func core, ver, snum
slversion pq_gen_sse, 00, 08, 0032

194
raid/pq_gen_test.c Normal file
View File

@ -0,0 +1,194 @@
/**********************************************************************
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include<stdio.h>
#include<stdint.h>
#include<string.h>
#include<stdlib.h>
#include<limits.h>
#include "raid.h"
#include "types.h"
#define TEST_SOURCES 16
#define TEST_LEN 1024
#define TEST_MEM ((TEST_SOURCES + 2)*(TEST_LEN))
#ifndef TEST_SEED
# define TEST_SEED 0x1234
#endif
// Generates pseudo-random data
void rand_buffer(unsigned char *buf, long buffer_size)
{
long i;
for (i = 0; i < buffer_size; i++)
buf[i] = rand();
}
int dump(unsigned char *buf, int len)
{
int i;
for (i = 0; i < len;) {
printf(" %2x", buf[i++]);
if (i % 16 == 0)
printf("\n");
}
printf("\n");
return 0;
}
int main(int argc, char *argv[])
{
int i, j, k, ret, fail = 0;
void *buffs[TEST_SOURCES + 2]; // Pointers to src and dest
char *tmp_buf[TEST_SOURCES + 2];
printf("Test pq_gen_test ");
srand(TEST_SEED);
// Allocate the arrays
for (i = 0; i < TEST_SOURCES + 2; i++) {
void *buf;
ret = posix_memalign(&buf, 32, TEST_LEN);
if (ret) {
printf("alloc error: Fail");
return 1;
}
buffs[i] = buf;
}
// Test of all zeros
for (i = 0; i < TEST_SOURCES + 2; i++)
memset(buffs[i], 0, TEST_LEN);
pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs);
for (i = 0; i < TEST_LEN; i++) {
if (((char *)buffs[TEST_SOURCES])[i] != 0)
fail++;
}
for (i = 0; i < TEST_LEN; i++) {
if (((char *)buffs[TEST_SOURCES + 1])[i] != 0)
fail++;
}
if (fail > 0) {
printf("fail zero test %d\n", fail);
return 1;
} else
putchar('.');
// Test rand1
for (i = 0; i < TEST_SOURCES + 2; i++)
rand_buffer(buffs[i], TEST_LEN);
ret = pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs);
fail |= pq_check_base(TEST_SOURCES + 2, TEST_LEN, buffs);
if (fail > 0) {
int t;
printf(" Fail rand test1 fail=%d, ret=%d\n", fail, ret);
for (t = 0; t < TEST_SOURCES + 2; t++)
dump(buffs[t], 15);
printf(" reference function p,q\n");
pq_gen_base(TEST_SOURCES + 2, TEST_LEN, buffs);
for (t = TEST_SOURCES; t < TEST_SOURCES + 2; t++)
dump(buffs[t], 15);
return 1;
} else
putchar('.');
// Test various number of sources
for (j = 4; j <= TEST_SOURCES + 2; j++) {
for (i = 0; i < j; i++)
rand_buffer(buffs[i], TEST_LEN);
pq_gen(j, TEST_LEN, buffs);
fail |= pq_check_base(j, TEST_LEN, buffs);
if (fail > 0) {
printf("fail rand test %d sources\n", j);
return 1;
} else
putchar('.');
}
fflush(0);
// Test various number of sources and len
k = 0;
while (k <= TEST_LEN) {
for (j = 4; j <= TEST_SOURCES + 2; j++) {
for (i = 0; i < j; i++)
rand_buffer(buffs[i], k);
ret = pq_gen(j, k, buffs);
fail |= pq_check_base(j, k, buffs);
if (fail > 0) {
printf("fail rand test %d sources, len=%d, fail="
"%d, ret=%d\n", j, k, fail, ret);
return 1;
}
}
putchar('.');
k += 32;
}
// Test at the end of buffer
k = 0;
while (k <= TEST_LEN) {
for (j = 0; j < (TEST_SOURCES + 2); j++) {
rand_buffer(buffs[j], TEST_LEN - k);
tmp_buf[j] = (char *)buffs[j] + k;
}
ret = pq_gen(TEST_SOURCES + 2, TEST_LEN - k, (void *)tmp_buf);
fail |= pq_check_base(TEST_SOURCES + 2, TEST_LEN - k, (void *)tmp_buf);
if (fail > 0) {
printf("fail end test - offset: %d, len: %d, fail: %d, "
"ret: %d\n", k, TEST_LEN - k, fail, ret);
return 1;
}
putchar('.');
fflush(0);
k += 32;
}
if (!fail)
printf(" done: Pass\n");
return fail;
}

147
raid/raid_base.c Normal file
View File

@ -0,0 +1,147 @@
/**********************************************************************
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <limits.h>
#include <stdint.h>
#if __WORDSIZE == 64 || _WIN64 || __x86_64__
# define notbit0 0xfefefefefefefefeULL
# define bit7 0x8080808080808080ULL
# define gf8poly 0x1d1d1d1d1d1d1d1dULL
#else
# define notbit0 0xfefefefeUL
# define bit7 0x80808080UL
# define gf8poly 0x1d1d1d1dUL
#endif
int pq_gen_base(int vects, int len, void **array)
{
int i, j;
unsigned long p, q, s;
unsigned long **src = (unsigned long **)array;
int blocks = len / sizeof(long);
for (i = 0; i < blocks; i++) {
q = p = src[vects - 3][i];
for (j = vects - 4; j >= 0; j--) {
p ^= s = src[j][i];
q = s ^ (((q << 1) & notbit0) ^ // shift each byte
((((q & bit7) << 1) - ((q & bit7) >> 7)) // mask out bytes
& gf8poly)); // apply poly
}
src[vects - 2][i] = p; // second to last pointer is p
src[vects - 1][i] = q; // last pointer is q
}
return 0;
}
int pq_check_base(int vects, int len, void **array)
{
int i, j;
unsigned char p, q, s;
unsigned char **src = (unsigned char **)array;
for (i = 0; i < len; i++) {
q = p = src[vects - 3][i];
for (j = vects - 4; j >= 0; j--) {
s = src[j][i];
p ^= s;
// mult by GF{2}
q = s ^ ((q << 1) ^ ((q & 0x80) ? 0x1d : 0));
}
if (src[vects - 2][i] != p) // second to last pointer is p
return i | 1;
if (src[vects - 1][i] != q) // last pointer is q
return i | 2;
}
return 0;
}
int xor_gen_base(int vects, int len, void **array)
{
int i, j;
unsigned char parity;
unsigned char **src = (unsigned char **)array;
for (i = 0; i < len; i++) {
parity = src[0][i];
for (j = 1; j < vects - 1; j++)
parity ^= src[j][i];
src[vects - 1][i] = parity; // last pointer is dest
}
return 0;
}
int xor_check_base(int vects, int len, void **array)
{
int i, j, fail = 0;
unsigned char parity;
unsigned char **src = (unsigned char **)array;
for (i = 0; i < len; i++) {
parity = 0;
for (j = 0; j < vects; j++)
parity ^= src[j][i];
if (parity != 0) {
fail = 1;
break;
}
}
if (fail && len > 0)
return len;
return fail;
}
struct slver {
unsigned short snum;
unsigned char ver;
unsigned char core;
};
struct slver pq_gen_base_slver_0001012a;
struct slver pq_gen_base_slver = { 0x012a, 0x01, 0x00 };
struct slver xor_gen_base_slver_0001012b;
struct slver xor_gen_base_slver = { 0x012b, 0x01, 0x00 };
struct slver pq_check_base_slver_0001012c;
struct slver pq_check_base_slver = { 0x012c, 0x01, 0x00 };
struct slver xor_check_base_slver_0001012d;
struct slver xor_check_base_slver = { 0x012d, 0x01, 0x00 };

140
raid/raid_multibinary.asm Normal file
View File

@ -0,0 +1,140 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%ifidn __OUTPUT_FORMAT__, elf64
%define WRT_OPT wrt ..plt
%else
%define WRT_OPT
%endif
%include "reg_sizes.asm"
%include "multibinary.asm"
default rel
[bits 64]
extern pq_gen_base
extern pq_gen_sse
extern pq_gen_avx
extern pq_gen_avx2
extern xor_gen_base
extern xor_gen_sse
extern xor_gen_avx
extern pq_check_base
extern pq_check_sse
extern xor_check_base
extern xor_check_sse
mbin_interface xor_gen
mbin_interface pq_gen
mbin_dispatch_init5 xor_gen, xor_gen_base, xor_gen_sse, xor_gen_avx, xor_gen_avx
mbin_dispatch_init5 pq_gen, pq_gen_base, pq_gen_sse, pq_gen_avx, pq_gen_avx2
section .data
xor_check_dispatched:
dq xor_check_mbinit
pq_check_dispatched:
dq pq_check_mbinit
section .text
;;;;
; pq_check multibinary function
;;;;
global pq_check:function
pq_check_mbinit:
call pq_check_dispatch_init
pq_check:
jmp qword [pq_check_dispatched]
pq_check_dispatch_init:
push rax
push rbx
push rcx
push rdx
push rsi
lea rsi, [pq_check_base WRT_OPT] ; Default
mov eax, 1
cpuid
test ecx, FLAG_CPUID1_ECX_SSE4_1
lea rbx, [pq_check_sse WRT_OPT]
cmovne rsi, rbx
mov [pq_check_dispatched], rsi
pop rsi
pop rdx
pop rcx
pop rbx
pop rax
ret
;;;;
; xor_check multibinary function
;;;;
global xor_check:function
xor_check_mbinit:
call xor_check_dispatch_init
xor_check:
jmp qword [xor_check_dispatched]
xor_check_dispatch_init:
push rax
push rbx
push rcx
push rdx
push rsi
lea rsi, [xor_check_base WRT_OPT] ; Default
mov eax, 1
cpuid
test ecx, FLAG_CPUID1_ECX_SSE4_1
lea rbx, [xor_check_sse WRT_OPT]
cmovne rsi, rbx
mov [xor_check_dispatched], rsi
pop rsi
pop rdx
pop rcx
pop rbx
pop rax
ret
;;; func core, ver, snum
slversion xor_gen, 00, 03, 0126
slversion xor_check, 00, 03, 0127
slversion pq_gen, 00, 03, 0128
slversion pq_check, 00, 03, 0129

285
raid/xor_check_sse.asm Normal file
View File

@ -0,0 +1,285 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Optimized xor of N source vectors using SSE
;;; int xor_gen_sse(int vects, int len, void **array)
;;; Generates xor parity vector from N (vects-1) sources in array of pointers
;;; (**array). Last pointer is the dest.
;;; Vectors must be aligned to 16 bytes. Length can be any value.
%include "reg_sizes.asm"
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 rax
%define tmp2.b al
%define tmp3 arg4
%define return rax
%define PS 8
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
%define arg2 r8
%define arg3 r9
%define return rax
%define tmp2 rax
%define tmp2.b al
%define PS 8
%define tmp r11
%define tmp3 r10
%define stack_size 2*16 + 8 ; must be an odd multiple of 8
%define func(x) proc_frame x
%macro FUNC_SAVE 0
alloc_stack stack_size
save_xmm128 xmm6, 0*16
save_xmm128 xmm7, 1*16
end_prolog
%endmacro
%macro FUNC_RESTORE 0
movdqa xmm6, [rsp + 0*16]
movdqa xmm7, [rsp + 1*16]
add rsp, stack_size
%endmacro
%elifidn __OUTPUT_FORMAT__, elf32
%define arg0 arg(0)
%define arg1 ecx
%define tmp2 eax
%define tmp2.b al
%define tmp3 edx
%define return eax
%define PS 4
%define func(x) x:
%define arg(x) [ebp+8+PS*x]
%define arg2 edi ; must sav/restore
%define arg3 esi
%define tmp ebx
%macro FUNC_SAVE 0
push ebp
mov ebp, esp
push esi
push edi
push ebx
mov arg1, arg(1)
mov arg2, arg(2)
%endmacro
%macro FUNC_RESTORE 0
pop ebx
pop edi
pop esi
mov esp, ebp ;if has frame pointer
pop ebp
%endmacro
%endif ; output formats
%define vec arg0
%define len arg1
%define ptr arg3
%define pos tmp3
%ifidn PS,8 ; 64-bit code
default rel
[bits 64]
%endif
;;; Use Non-temporal load/stor
%ifdef NO_NT_LDST
%define XLDR movdqa
%define XSTR movdqa
%else
%define XLDR movntdqa
%define XSTR movntdq
%endif
section .text
align 16
global xor_check_sse:function
func(xor_check_sse)
FUNC_SAVE
%ifidn PS,8 ;64-bit code
sub vec, 1 ; Keep as offset to last source
%else ;32-bit code
mov tmp, arg(0) ; Update vec length arg to last source
sub tmp, 1
mov arg(0), tmp
%endif
jng return_fail ;Must have at least 2 sources
cmp len, 0
je return_pass
test len, (128-1) ;Check alignment of length
jnz len_not_aligned
len_aligned_128bytes:
sub len, 128
mov pos, 0
mov tmp, vec ;Preset to last vector
loop128:
mov tmp2, [arg2+tmp*PS] ;Fetch last pointer in array
sub tmp, 1 ;Next vect
XLDR xmm0, [tmp2+pos] ;Start with end of array in last vector
XLDR xmm1, [tmp2+pos+16] ;Keep xor parity in xmm0-7
XLDR xmm2, [tmp2+pos+(2*16)]
XLDR xmm3, [tmp2+pos+(3*16)]
XLDR xmm4, [tmp2+pos+(4*16)]
XLDR xmm5, [tmp2+pos+(5*16)]
XLDR xmm6, [tmp2+pos+(6*16)]
XLDR xmm7, [tmp2+pos+(7*16)]
next_vect:
mov ptr, [arg2+tmp*PS]
sub tmp, 1
xorpd xmm0, [ptr+pos] ;Get next vector (source)
xorpd xmm1, [ptr+pos+16]
xorpd xmm2, [ptr+pos+(2*16)]
xorpd xmm3, [ptr+pos+(3*16)]
xorpd xmm4, [ptr+pos+(4*16)]
xorpd xmm5, [ptr+pos+(5*16)]
xorpd xmm6, [ptr+pos+(6*16)]
xorpd xmm7, [ptr+pos+(7*16)]
;;; prefetch [ptr+pos+(8*16)]
jge next_vect ;Loop for each vect
;; End of vects, chech that all parity regs = 0
mov tmp, vec ;Back to last vector
por xmm0, xmm1
por xmm0, xmm2
por xmm0, xmm3
por xmm0, xmm4
por xmm0, xmm5
por xmm0, xmm6
por xmm0, xmm7
ptest xmm0, xmm0
jnz return_fail
add pos, 128
cmp pos, len
jle loop128
return_pass:
FUNC_RESTORE
mov return, 0
ret
;;; Do one byte at a time for no alignment case
xor_gen_byte:
mov tmp, vec ;Preset to last vector
loop_1byte:
mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
mov tmp2.b, [ptr+len-1] ;Get array n
sub tmp, 1
nextvect_1byte:
mov ptr, [arg2+tmp*PS]
xor tmp2.b, [ptr+len-1]
sub tmp, 1
jge nextvect_1byte
mov tmp, vec ;Back to last vector
cmp tmp2.b, 0
jne return_fail
sub len, 1
test len, (8-1)
jnz loop_1byte
cmp len, 0
je return_pass
test len, (128-1) ;If not 0 and 128bit aligned
jz len_aligned_128bytes ; then do aligned case. len = y * 128
;; else we are 8-byte aligned so fall through to recheck
;; Unaligned length cases
len_not_aligned:
test len, (PS-1)
jne xor_gen_byte
mov tmp3, len
and tmp3, (128-1) ;Do the unaligned bytes 4-8 at a time
mov tmp, vec ;Preset to last vector
;; Run backwards 8 bytes (4B for 32bit) at a time for (tmp3) bytes
loopN_bytes:
mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
mov tmp2, [ptr+len-PS] ;Get array n
sub tmp, 1
nextvect_Nbytes:
mov ptr, [arg2+tmp*PS] ;Get pointer to next vector
xor tmp2, [ptr+len-PS]
sub tmp, 1
jge nextvect_Nbytes ;Loop for each source
mov tmp, vec ;Back to last vector
cmp tmp2, 0
jne return_fail
sub len, PS
sub tmp3, PS
jg loopN_bytes
cmp len, 128 ;Now len is aligned to 128B
jge len_aligned_128bytes ;We can do the rest aligned
cmp len, 0
je return_pass
return_fail:
mov return, 1
FUNC_RESTORE
ret
endproc_frame
section .data
;;; func core, ver, snum
slversion xor_check_sse, 00, 03, 0031

280
raid/xor_check_test.c Normal file
View File

@ -0,0 +1,280 @@
/**********************************************************************
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include<stdio.h>
#include<stdint.h>
#include<string.h>
#include<stdlib.h>
#include "raid.h"
#include "types.h"
#define TEST_SOURCES 16
#define TEST_LEN 1024
#define TEST_MEM ((TEST_SOURCES + 1)*(TEST_LEN))
#ifndef TEST_SEED
# define TEST_SEED 0x1234
#endif
// Generates pseudo-random data
void rand_buffer(unsigned char *buf, long buffer_size)
{
long i;
for (i = 0; i < buffer_size; i++)
buf[i] = rand();
}
int main(int argc, char *argv[])
{
int i, j, k, ret, fail = 0;
void *buffs[TEST_SOURCES + 1];
char c;
int serr, lerr;
char *tmp_buf[TEST_SOURCES + 1];
printf("Test xor_check_test %d sources X %d bytes\n", TEST_SOURCES, TEST_LEN);
srand(TEST_SEED);
// Allocate the arrays
for (i = 0; i < TEST_SOURCES + 1; i++) {
void *buf;
if (posix_memalign(&buf, 16, TEST_LEN)) {
printf("alloc error: Fail");
return 1;
}
buffs[i] = buf;
}
// Test of all zeros
for (i = 0; i < TEST_SOURCES + 1; i++)
memset(buffs[i], 0, TEST_LEN);
xor_gen_base(TEST_SOURCES + 1, TEST_LEN, buffs);
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
if (ret != 0) {
fail++;
printf("\nfail zero test %d\n", ret);
}
((char *)(buffs[0]))[TEST_LEN - 2] = 0x7; // corrupt buffer
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
if (ret == 0) {
fail++;
printf("\nfail corrupt buffer test %d\n", ret);
}
((char *)(buffs[0]))[TEST_LEN - 2] = 0; // un-corrupt buffer
// Test corrupted buffer any location on all sources
for (j = 0; j < TEST_SOURCES + 1; j++) {
for (i = TEST_LEN - 1; i >= 0; i--) {
((char *)buffs[j])[i] = 0x5; // corrupt buffer
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
if (ret == 0) {
fail++;
printf("\nfail corrupt buffer test j=%d, i=%d\n", j, i);
return 1;
}
((char *)buffs[j])[i] = 0; // un-corrupt buffer
}
putchar('.');
}
// Test rand1
for (i = 0; i < TEST_SOURCES + 1; i++)
rand_buffer(buffs[i], TEST_LEN);
xor_gen_base(TEST_SOURCES + 1, TEST_LEN, buffs);
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
if (ret != 0) {
fail++;
printf("fail first rand test %d\n", ret);
}
c = ((char *)(buffs[0]))[TEST_LEN - 2];
((char *)(buffs[0]))[TEST_LEN - 2] = c ^ 0x1;
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
if (ret == 0) {
fail++;
printf("\nFail corrupt buffer test, passed when should have failed\n");
}
((char *)(buffs[0]))[TEST_LEN - 2] = c; // un-corrupt buffer
// Test corrupted buffer any location on all sources w/ random data
for (j = 0; j < TEST_SOURCES + 1; j++) {
for (i = TEST_LEN - 1; i >= 0; i--) {
// Check it still passes
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
if (ret != 0) { // should pass
fail++;
printf
("\nFail rand test with un-corrupted buffer j=%d, i=%d\n",
j, i);
return 1;
}
c = ((char *)buffs[j])[i];
((char *)buffs[j])[i] = c ^ 1; // corrupt buffer
ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
if (ret == 0) { // Check it now fails
fail++;
printf("\nfail corrupt buffer test j=%d, i=%d\n", j, i);
return 1;
}
((char *)buffs[j])[i] = c; // un-corrupt buffer
}
putchar('.');
}
// Test various number of sources, full length
for (j = 3; j <= TEST_SOURCES + 1; j++) {
// New random data
for (i = 0; i < j; i++)
rand_buffer(buffs[i], TEST_LEN);
// Generate xor parity for this number of sources
xor_gen_base(j, TEST_LEN, buffs);
// Set errors up in each source and len position
for (i = 0; i < j; i++) {
for (k = 0; k < TEST_LEN; k++) {
// See if it still passes
ret = xor_check(j, TEST_LEN, buffs);
if (ret != 0) { // Should pass
printf("\nfail rand test %d sources\n", j);
fail++;
return 1;
}
c = ((char *)buffs[i])[k];
((char *)buffs[i])[k] = c ^ 1; // corrupt buffer
ret = xor_check(j, TEST_LEN, buffs);
if (ret == 0) { // Should fail
printf
("\nfail rand test corrupted buffer %d sources\n",
j);
fail++;
return 1;
}
((char *)buffs[i])[k] = c; // un-corrupt buffer
}
}
putchar('.');
}
fflush(0);
// Test various number of sources and len
k = 1;
while (k <= TEST_LEN) {
for (j = 3; j <= TEST_SOURCES + 1; j++) {
for (i = 0; i < j; i++)
rand_buffer(buffs[i], k);
// Generate xor parity for this number of sources
xor_gen_base(j, k, buffs);
// Inject errors at various source and len positions
for (lerr = 0; lerr < k; lerr += 10) {
for (serr = 0; serr < j; serr++) {
// See if it still passes
ret = xor_check(j, k, buffs);
if (ret != 0) { // Should pass
printf("\nfail rand test %d sources\n", j);
fail++;
return 1;
}
c = ((char *)buffs[serr])[lerr];
((char *)buffs[serr])[lerr] = c ^ 1; // corrupt buffer
ret = xor_check(j, k, buffs);
if (ret == 0) { // Should fail
printf("\nfail rand test corrupted buffer "
"%d sources, len=%d, ret=%d\n", j, k,
ret);
fail++;
return 1;
}
((char *)buffs[serr])[lerr] = c; // un-corrupt buffer
}
}
}
putchar('.');
fflush(0);
k += 1;
}
// Test at the end of buffer
for (i = 0; i < TEST_LEN; i += 32) {
for (j = 0; j < TEST_SOURCES + 1; j++) {
rand_buffer(buffs[j], TEST_LEN - i);
tmp_buf[j] = (char *)buffs[j] + i;
}
xor_gen_base(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
// Test good data
ret = xor_check(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
if (ret != 0) {
printf("fail end test - offset: %d, len: %d\n", i, TEST_LEN - i);
fail++;
return 1;
}
// Test bad data
for (serr = 0; serr < TEST_SOURCES + 1; serr++) {
for (lerr = 0; lerr < (TEST_LEN - i); lerr++) {
c = tmp_buf[serr][lerr];
tmp_buf[serr][lerr] = c ^ 1;
ret =
xor_check(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
if (ret == 0) {
printf("fail end test corrupted buffer - "
"offset: %d, len: %d, ret: %d\n", i,
TEST_LEN - i, ret);
fail++;
return 1;
}
tmp_buf[serr][lerr] = c;
}
}
putchar('.');
fflush(0);
}
if (fail == 0)
printf("Pass\n");
return fail;
}

70
raid/xor_example.c Normal file
View File

@ -0,0 +1,70 @@
/**********************************************************************
Copyright(c) 2011-2013 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "raid.h"
#include "types.h"
#define TEST_SOURCES 16
#define TEST_LEN 16*1024
int main(int argc, char *argv[])
{
int i, j, should_pass, should_fail;
void *buffs[TEST_SOURCES + 1];
printf("XOR example\n");
for (i = 0; i < TEST_SOURCES + 1; i++) {
void *buf;
if (posix_memalign(&buf, 16, TEST_LEN)) {
printf("alloc error: Fail");
return 1;
}
buffs[i] = buf;
}
printf("Make random data\n");
for (i = 0; i < TEST_SOURCES + 1; i++)
for (j = 0; j < TEST_LEN; j++)
((char *)buffs[i])[j] = rand();
printf("Generate xor parity\n");
xor_gen_sse(TEST_SOURCES + 1, TEST_LEN, buffs);
printf("Check parity: ");
should_pass = xor_check_sse(TEST_SOURCES + 1, TEST_LEN, buffs);
printf("%s\n", should_pass == 0 ? "Pass" : "Fail");
printf("Find corruption: ");
((char *)buffs[TEST_SOURCES / 2])[TEST_LEN / 2] ^= 1; // flip one bit
should_fail = xor_check_sse(TEST_SOURCES + 1, TEST_LEN, buffs); //recheck
printf("%s\n", should_fail != 0 ? "Pass" : "Fail");
return 0;
}

228
raid/xor_gen_avx.asm Normal file
View File

@ -0,0 +1,228 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Optimized xor of N source vectors using AVX
;;; int xor_gen_avx(int vects, int len, void **array)
;;; Generates xor parity vector from N (vects-1) sources in array of pointers
;;; (**array). Last pointer is the dest.
;;; Vectors must be aligned to 32 bytes. Length can be any value.
%include "reg_sizes.asm"
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp3 arg4
%define func(x) x:
%define return rax
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
%define arg2 r8
%define arg3 r9
%define tmp r11
%define tmp3 r10
%define func(x) proc_frame x
%define return rax
%define stack_size 2*32 + 8 ;must be an odd multiple of 8
%macro FUNC_SAVE 0
alloc_stack stack_size
vmovdqu [rsp + 0*32], ymm6
vmovdqu [rsp + 1*32], ymm7
end_prolog
%endmacro
%macro FUNC_RESTORE 0
vmovdqu ymm6, [rsp + 0*32]
vmovdqu ymm7, [rsp + 1*32]
add rsp, stack_size
%endmacro
%endif ;output formats
%define vec arg0
%define len arg1
%define ptr arg3
%define tmp2 rax
%define tmp2.b al
%define pos tmp3
%define PS 8
;;; Use Non-temporal load/stor
%ifdef NO_NT_LDST
%define XLDR vmovdqa
%define XSTR vmovdqa
%else
%define XLDR vmovdqa
%define XSTR vmovntdq
%endif
default rel
[bits 64]
section .text
align 16
global xor_gen_avx:function
func(xor_gen_avx)
FUNC_SAVE
sub vec, 2 ;Keep as offset to last source
jng return_fail ;Must have at least 2 sources
cmp len, 0
je return_pass
test len, (128-1) ;Check alignment of length
jnz len_not_aligned
len_aligned_128bytes:
sub len, 128
mov pos, 0
loop128:
mov tmp, vec ;Back to last vector
mov tmp2, [arg2+vec*PS] ;Fetch last pointer in array
sub tmp, 1 ;Next vect
XLDR ymm0, [tmp2+pos] ;Start with end of array in last vector
XLDR ymm1, [tmp2+pos+32] ;Keep xor parity in xmm0-7
XLDR ymm2, [tmp2+pos+(2*32)]
XLDR ymm3, [tmp2+pos+(3*32)]
next_vect:
mov ptr, [arg2+tmp*PS]
sub tmp, 1
XLDR ymm4, [ptr+pos] ;Get next vector (source)
XLDR ymm5, [ptr+pos+32]
XLDR ymm6, [ptr+pos+(2*32)]
XLDR ymm7, [ptr+pos+(3*32)]
vxorpd ymm0, ymm0, ymm4 ;Add to xor parity
vxorpd ymm1, ymm1, ymm5
vxorpd ymm2, ymm2, ymm6
vxorpd ymm3, ymm3, ymm7
jge next_vect ;Loop for each source
mov ptr, [arg2+PS+vec*PS] ;Address of parity vector
XSTR [ptr+pos], ymm0 ;Write parity xor vector
XSTR [ptr+pos+(1*32)], ymm1
XSTR [ptr+pos+(2*32)], ymm2
XSTR [ptr+pos+(3*32)], ymm3
add pos, 128
cmp pos, len
jle loop128
return_pass:
FUNC_RESTORE
mov return, 0
ret
;;; Do one byte at a time for no alignment case
loop_1byte:
mov tmp, vec ;Back to last vector
mov ptr, [arg2+vec*PS] ;Fetch last pointer in array
mov tmp2.b, [ptr+len-1] ;Get array n
sub tmp, 1
nextvect_1byte:
mov ptr, [arg2+tmp*PS]
xor tmp2.b, [ptr+len-1]
sub tmp, 1
jge nextvect_1byte
mov tmp, vec
add tmp, 1 ;Add back to point to last vec
mov ptr, [arg2+tmp*PS]
mov [ptr+len-1], tmp2.b ;Write parity
sub len, 1
test len, (PS-1)
jnz loop_1byte
cmp len, 0
je return_pass
test len, (128-1) ;If not 0 and 128bit aligned
jz len_aligned_128bytes ; then do aligned case. len = y * 128
;; else we are 8-byte aligned so fall through to recheck
;; Unaligned length cases
len_not_aligned:
test len, (PS-1)
jne loop_1byte
mov tmp3, len
and tmp3, (128-1) ;Do the unaligned bytes 8 at a time
;; Run backwards 8 bytes at a time for (tmp3) bytes
loop8_bytes:
mov tmp, vec ;Back to last vector
mov ptr, [arg2+vec*PS] ;Fetch last pointer in array
mov tmp2, [ptr+len-PS] ;Get array n
sub tmp, 1
nextvect_8bytes:
mov ptr, [arg2+tmp*PS] ;Get pointer to next vector
xor tmp2, [ptr+len-PS]
sub tmp, 1
jge nextvect_8bytes ;Loop for each source
mov tmp, vec
add tmp, 1 ;Add back to point to last vec
mov ptr, [arg2+tmp*PS]
mov [ptr+len-PS], tmp2 ;Write parity
sub len, PS
sub tmp3, PS
jg loop8_bytes
cmp len, 128 ;Now len is aligned to 128B
jge len_aligned_128bytes ;We can do the rest aligned
cmp len, 0
je return_pass
return_fail:
FUNC_RESTORE
mov return, 1
ret
endproc_frame
section .data
;;; func core, ver, snum
slversion xor_gen_avx, 02, 05, 0037

98
raid/xor_gen_perf.c Normal file
View File

@ -0,0 +1,98 @@
/**********************************************************************
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include<stdio.h>
#include<stdint.h>
#include<string.h>
#include<stdlib.h>
#include<sys/time.h>
#include "raid.h"
#include "test.h"
//#define CACHED_TEST
#ifdef CACHED_TEST
// Loop many times over same
# define TEST_SOURCES 10
# define TEST_LEN 8*1024
# define TEST_LOOPS 400000
# define TEST_TYPE_STR "_warm"
#else
// Uncached test. Pull from large mem base.
# define TEST_SOURCES 10
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN GT_L3_CACHE / TEST_SOURCES
# define TEST_LOOPS 1000
# define TEST_TYPE_STR "_cold"
#endif
#define TEST_MEM ((TEST_SOURCES + 1)*(TEST_LEN))
int main(int argc, char *argv[])
{
int i, ret, fail = 0;
void **buffs;
void *buff;
struct perf start, stop;
printf("Test xor_gen_perf\n");
ret = posix_memalign((void **)&buff, 8, sizeof(int *) * (TEST_SOURCES + 6));
if (ret) {
printf("alloc error: Fail");
return 1;
}
buffs = buff;
// Allocate the arrays
for (i = 0; i < TEST_SOURCES + 1; i++) {
void *buf;
ret = posix_memalign(&buf, 32, TEST_LEN);
if (ret) {
printf("alloc error: Fail");
return 1;
}
buffs[i] = buf;
}
// Setup data
for (i = 0; i < TEST_SOURCES + 1; i++)
memset(buffs[i], 0, TEST_LEN);
// Warm up
xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++)
xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs);
perf_stop(&stop);
printf("xor_gen" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_MEM * i);
return fail;
}

284
raid/xor_gen_sse.asm Normal file
View File

@ -0,0 +1,284 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Optimized xor of N source vectors using SSE
;;; int xor_gen_sse(int vects, int len, void **array)
;;; Generates xor parity vector from N (vects-1) sources in array of pointers
;;; (**array). Last pointer is the dest.
;;; Vectors must be aligned to 16 bytes. Length can be any value.
%include "reg_sizes.asm"
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 rax
%define tmp2.b al
%define tmp3 arg4
%define return rax
%define PS 8
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
%define arg2 r8
%define arg3 r9
%define return rax
%define tmp2 rax
%define tmp2.b al
%define PS 8
%define tmp r11
%define tmp3 r10
%define stack_size 2*16 + 8 ; must be an odd multiple of 8
%define func(x) proc_frame x
%macro FUNC_SAVE 0
alloc_stack stack_size
save_xmm128 xmm6, 0*16
save_xmm128 xmm7, 1*16
end_prolog
%endmacro
%macro FUNC_RESTORE 0
movdqa xmm6, [rsp + 0*16]
movdqa xmm7, [rsp + 1*16]
add rsp, stack_size
%endmacro
%elifidn __OUTPUT_FORMAT__, elf32
%define arg0 arg(0)
%define arg1 ecx
%define tmp2 eax
%define tmp2.b al
%define tmp3 edx
%define return eax
%define PS 4
%define func(x) x:
%define arg(x) [ebp+8+PS*x]
%define arg2 edi ; must sav/restore
%define arg3 esi
%define tmp ebx
%macro FUNC_SAVE 0
push ebp
mov ebp, esp
push esi
push edi
push ebx
mov arg1, arg(1)
mov arg2, arg(2)
%endmacro
%macro FUNC_RESTORE 0
pop ebx
pop edi
pop esi
mov esp, ebp ;if has frame pointer
pop ebp
%endmacro
%endif ; output formats
%define vec arg0
%define len arg1
%define ptr arg3
%define pos tmp3
%ifidn PS,8 ; 64-bit code
default rel
[bits 64]
%endif
;;; Use Non-temporal load/stor
%ifdef NO_NT_LDST
%define XLDR movdqa
%define XSTR movdqa
%else
%define XLDR movntdqa
%define XSTR movntdq
%endif
section .text
align 16
global xor_gen_sse:function
func(xor_gen_sse)
FUNC_SAVE
%ifidn PS,8 ;64-bit code
sub vec, 2 ; Keep as offset to last source
%else ;32-bit code
mov tmp, arg(0) ; Update vec length arg to last source
sub tmp, 2
mov arg(0), tmp
%endif
jng return_fail ;Must have at least 2 sources
cmp len, 0
je return_pass
test len, (128-1) ;Check alignment of length
jnz len_not_aligned
len_aligned_128bytes:
sub len, 128
mov pos, 0
mov tmp, vec ;Preset to last vector
loop128:
mov tmp2, [arg2+tmp*PS] ;Fetch last pointer in array
sub tmp, 1 ;Next vect
XLDR xmm0, [tmp2+pos] ;Start with end of array in last vector
XLDR xmm1, [tmp2+pos+16] ;Keep xor parity in xmm0-7
XLDR xmm2, [tmp2+pos+(2*16)]
XLDR xmm3, [tmp2+pos+(3*16)]
XLDR xmm4, [tmp2+pos+(4*16)]
XLDR xmm5, [tmp2+pos+(5*16)]
XLDR xmm6, [tmp2+pos+(6*16)]
XLDR xmm7, [tmp2+pos+(7*16)]
next_vect:
mov ptr, [arg2+tmp*PS]
sub tmp, 1
xorpd xmm0, [ptr+pos] ;Get next vector (source)
xorpd xmm1, [ptr+pos+16]
xorpd xmm2, [ptr+pos+(2*16)]
xorpd xmm3, [ptr+pos+(3*16)]
xorpd xmm4, [ptr+pos+(4*16)]
xorpd xmm5, [ptr+pos+(5*16)]
xorpd xmm6, [ptr+pos+(6*16)]
xorpd xmm7, [ptr+pos+(7*16)]
;;; prefetch [ptr+pos+(8*16)]
jge next_vect ;Loop for each vect
mov tmp, vec ;Back to last vector
mov ptr, [arg2+PS+tmp*PS] ;Address of parity vector
XSTR [ptr+pos], xmm0 ;Write parity xor vector
XSTR [ptr+pos+(1*16)], xmm1
XSTR [ptr+pos+(2*16)], xmm2
XSTR [ptr+pos+(3*16)], xmm3
XSTR [ptr+pos+(4*16)], xmm4
XSTR [ptr+pos+(5*16)], xmm5
XSTR [ptr+pos+(6*16)], xmm6
XSTR [ptr+pos+(7*16)], xmm7
add pos, 128
cmp pos, len
jle loop128
return_pass:
mov return, 0
FUNC_RESTORE
ret
;;; Do one byte at a time for no alignment case
xor_gen_byte:
mov tmp, vec ;Preset to last vector
loop_1byte:
mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
mov tmp2.b, [ptr+len-1] ;Get array n
sub tmp, 1
nextvect_1byte:
mov ptr, [arg2+tmp*PS]
xor tmp2.b, [ptr+len-1]
sub tmp, 1
jge nextvect_1byte
mov tmp, vec ;Back to last vector
mov ptr, [arg2+PS+tmp*PS] ;Get last vec
mov [ptr+len-1], tmp2.b ;Write parity
sub len, 1
test len, (8-1)
jnz loop_1byte
cmp len, 0
je return_pass
test len, (128-1) ;If not 0 and 128bit aligned
jz len_aligned_128bytes ; then do aligned case. len = y * 128
;; else we are 8-byte aligned so fall through to recheck
;; Unaligned length cases
len_not_aligned:
test len, (PS-1)
jne xor_gen_byte
mov tmp3, len
and tmp3, (128-1) ;Do the unaligned bytes 4-8 at a time
mov tmp, vec ;Preset to last vector
;; Run backwards 8 bytes (4B for 32bit) at a time for (tmp3) bytes
loopN_bytes:
mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
mov tmp2, [ptr+len-PS] ;Get array n
sub tmp, 1
nextvect_Nbytes:
mov ptr, [arg2+tmp*PS] ;Get pointer to next vector
xor tmp2, [ptr+len-PS]
sub tmp, 1
jge nextvect_Nbytes ;Loop for each source
mov tmp, vec ;Back to last vector
mov ptr, [arg2+PS+tmp*PS] ;Get last vec
mov [ptr+len-PS], tmp2 ;Write parity
sub len, PS
sub tmp3, PS
jg loopN_bytes
cmp len, 128 ;Now len is aligned to 128B
jge len_aligned_128bytes ;We can do the rest aligned
cmp len, 0
je return_pass
return_fail:
mov return, 1
FUNC_RESTORE
ret
endproc_frame
section .data
;;; func core, ver, snum
slversion xor_gen_sse, 00, 0c, 0030

165
raid/xor_gen_test.c Normal file
View File

@ -0,0 +1,165 @@
/**********************************************************************
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include<stdio.h>
#include<stdint.h>
#include<string.h>
#include<stdlib.h>
#include "raid.h"
#include "types.h"
#define TEST_SOURCES 16
#define TEST_LEN 1024
#define TEST_MEM ((TEST_SOURCES + 1)*(TEST_LEN))
#ifndef TEST_SEED
# define TEST_SEED 0x1234
#endif
// Generates pseudo-random data
void rand_buffer(unsigned char *buf, long buffer_size)
{
long i;
for (i = 0; i < buffer_size; i++)
buf[i] = rand();
}
int main(int argc, char *argv[])
{
int i, j, k, ret, fail = 0;
void *buffs[TEST_SOURCES + 1];
char *tmp_buf[TEST_SOURCES + 1];
printf("Test xor_gen_test ");
srand(TEST_SEED);
// Allocate the arrays
for (i = 0; i < TEST_SOURCES + 1; i++) {
void *buf;
ret = posix_memalign(&buf, 32, TEST_LEN);
if (ret) {
printf("alloc error: Fail");
return 1;
}
buffs[i] = buf;
}
// Test of all zeros
for (i = 0; i < TEST_SOURCES + 1; i++)
memset(buffs[i], 0, TEST_LEN);
xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs);
for (i = 0; i < TEST_LEN; i++) {
if (((char *)buffs[TEST_SOURCES])[i] != 0)
fail++;
}
if (fail > 0) {
printf("fail zero test");
return 1;
} else
putchar('.');
// Test rand1
for (i = 0; i < TEST_SOURCES + 1; i++)
rand_buffer(buffs[i], TEST_LEN);
xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs);
fail |= xor_check_base(TEST_SOURCES + 1, TEST_LEN, buffs);
if (fail > 0) {
printf("fail rand test %d\n", fail);
return 1;
} else
putchar('.');
// Test various number of sources
for (j = 3; j <= TEST_SOURCES + 1; j++) {
for (i = 0; i < j; i++)
rand_buffer(buffs[i], TEST_LEN);
xor_gen(j, TEST_LEN, buffs);
fail |= xor_check_base(j, TEST_LEN, buffs);
if (fail > 0) {
printf("fail rand test %d sources\n", j);
return 1;
} else
putchar('.');
}
fflush(0);
// Test various number of sources and len
k = 0;
while (k <= TEST_LEN) {
for (j = 3; j <= TEST_SOURCES + 1; j++) {
for (i = 0; i < j; i++)
rand_buffer(buffs[i], k);
xor_gen(j, k, buffs);
fail |= xor_check_base(j, k, buffs);
if (fail > 0) {
printf("fail rand test %d sources, len=%d, ret=%d\n", j, k,
fail);
return 1;
}
}
putchar('.');
k += 1;
}
// Test at the end of buffer
for (i = 0; i < TEST_LEN; i += 32) {
for (j = 0; j < TEST_SOURCES + 1; j++) {
rand_buffer((unsigned char *)buffs[j] + i, TEST_LEN - i);
tmp_buf[j] = (char *)buffs[j] + i;
}
xor_gen(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
fail |= xor_check_base(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
if (fail > 0) {
printf("fail end test - offset: %d, len: %d\n", i, TEST_LEN - i);
return 1;
}
putchar('.');
fflush(0);
}
if (!fail)
printf(" done: Pass\n");
return fail;
}