The raid6 Q syndrome check has been optimised using the vpermxor
instruction. This instruction was made available with POWER8, ISA version
2.07. It allows for both vperm and vxor instructions to be done in a single
instruction. This has been tested for correctness on a ppc64le vm with a
basic RAID6 setup containing 5 drives.
The performance benchmarks are from the raid6test in the /lib/raid6/test
directory. These results are from an IBM Firestone machine with ppc64le
architecture. The benchmark results show a 35% speed increase over the best
existing algorithm for powerpc (altivec). The raid6test has also been run
on a big-endian ppc64 vm to ensure it also works for big-endian
architectures.
Performance benchmarks:
raid6: altivecx4 gen() 18773 MB/s
raid6: altivecx8 gen() 19438 MB/s
raid6: vpermxor4 gen() 25112 MB/s
raid6: vpermxor8 gen() 26279 MB/s
Bugs fixed:
- A small bug in pq.h regarding a missing and mismatched
ifdef statement
- Fixed test/Makefile to correctly build test on ppc
Signed-off-by: Matt Brown
---
mpe I assume you are ok to take this patch, most of the other ppc raid patches
have gone through you.
Changelog:
v2
- added reference to raid6 paper
- shortened asm lines
- removed redundant Makefile line
- fixed test/Makefile bug
---
include/linux/raid/pq.h | 4 ++
lib/raid6/Makefile | 27 -
lib/raid6/algos.c | 4 ++
lib/raid6/altivec.uc| 3 ++
lib/raid6/test/Makefile | 26 +---
lib/raid6/vpermxor.uc | 104
6 files changed, 161 insertions(+), 7 deletions(-)
create mode 100644 lib/raid6/vpermxor.uc
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bba..3df9aa6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
extern const struct raid6_calls raid6_avx512x4;
extern const struct raid6_calls raid6_tilegx8;
extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3057011..7775aad 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -4,7 +4,8 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o
int4.o \
int8.o int16.o int32.o
raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o
avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -88,6 +89,30 @@ $(obj)/altivec8.c: UNROLL := 8
$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
CFLAGS_neon1.o += $(NEON_FLAGS)
targets += neon1.c
$(obj)/neon1.c: UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7857049..edd4f69 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
_altivec2,
_altivec4,
_altivec8,
+ _vpermxor1,
+ _vpermxor2,
+ _vpermxor4,
+ _vpermxor8,
#endif
#if defined(CONFIG_TILEGX)
_tilegx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8..d20ed0d 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
#include
+#ifdef CONFIG_ALTIVEC
+
#include
#ifdef __KERNEL__
# include
# include
+#endif /* __KERNEL__ */
/*
* This is the C data type to use. We use a vector of
diff --git a/lib/raid6/test/Makefile