Hi Hao Chen, On 10/11/21 12:32 AM, HAO CHEN GUI wrote: > Hi, > > Gentle ping this: > > https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579038.html > > Thanks > > On 8/9/2021 下午 2:42, HAO CHEN GUI wrote: >> Hi, >> >> The patch optimized for vec_reve builtin on rs6000. For V2DI and V2DF, it >> is implemented by xxswapd on all targets. For V16QI, V8HI, V4SI and V4SF, it >> is implemented by quadword byte reverse plus halfword/word byte reverse when >> p9_vector is defined. >> >> Bootstrapped and tested on powerpc64le-linux with no regressions. Is this >> okay for trunk? Any recommendations? Thanks a lot. >> >> >> ChangeLog >> >> 2021-09-08 Haochen Gui <guih...@linux.ibm.com> >> >> gcc/ >> * config/rs6000/altivec.md (altivec_vreve<mode>2 for VEC_K): >> Use xxbrq for v16qi, xxbrq + xxbrh for v8hi and xxbrq + xxbrw >> for v4si or v4sf when p9_vector is defined. >> (altivec_vreve<mode>2 for VEC_64): Defined. Implemented by >> xxswapd. >> >> gcc/testsuite/ >> * gcc.target/powerpc/vec_reve_1.c: New test. >> * gcc.target/powerpc/vec_reve_2.c: Likewise. >> >> >> patch.diff >> >> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md >> index 1351dafbc41..a1698ce85c0 100644 >> --- a/gcc/config/rs6000/altivec.md >> +++ b/gcc/config/rs6000/altivec.md >> @@ -4049,13 +4049,43 @@ (define_expand "altivec_negv4sf2" >> DONE; >> }) >> >> -;; Vector reverse elements >> +;; Vector reverse elements for V16QI V8HI V4SI V4SF >> (define_expand "altivec_vreve<mode>2" >> - [(set (match_operand:VEC_A 0 "register_operand" "=v") >> - (unspec:VEC_A [(match_operand:VEC_A 1 "register_operand" "v")] >> + [(set (match_operand:VEC_K 0 "register_operand" "=v") >> + (unspec:VEC_K [(match_operand:VEC_K 1 "register_operand" "v")] >> UNSPEC_VREVEV))] >> "TARGET_ALTIVEC" >> { >> + if (TARGET_P9_VECTOR) >> + { >> + if (<MODE>mode == V16QImode) >> + emit_insn (gen_p9_xxbrq_v16qi (operands[0], operands[1])); >> + else if (<MODE>mode == V8HImode) >> + { >> + rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1], >> + <MODE>mode, 0); >> + rtx temp = gen_reg_rtx (V1TImode); >> + emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1)); >> + rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp, >> + V1TImode, 0); >> + emit_insn (gen_p9_xxbrh_v8hi (operands[0], subreg2)); >> + } >> + else /* V4SI and V4SF. */ >> + { >> + rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1], >> + <MODE>mode, 0); >> + rtx temp = gen_reg_rtx (V1TImode); >> + emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1)); >> + rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp, >> + V1TImode, 0); >> + if (<MODE>mode == V4SImode) >> + emit_insn (gen_p9_xxbrw_v4si (operands[0], subreg2)); >> + else >> + emit_insn (gen_p9_xxbrw_v4sf (operands[0], subreg2)); >> + } >> + DONE; >> + } >> + >> int i, j, size, num_elements; >> rtvec v = rtvec_alloc (16); >> rtx mask = gen_reg_rtx (V16QImode); >> @@ -4074,6 +4104,17 @@ (define_expand "altivec_vreve<mode>2" >> DONE; >> }) >> >> +;; Vector reverse elements for V2DI V2DF >> +(define_expand "altivec_vreve<mode>2" >> + [(set (match_operand:VEC_64 0 "register_operand" "=v") >> + (unspec:VEC_64 [(match_operand:VEC_64 1 "register_operand" "v")] >> + UNSPEC_VREVEV))] >> + "TARGET_ALTIVEC" >> +{ >> + emit_insn (gen_xxswapd_<mode> (operands[0], operands[1])); >> + DONE; >> +}) >> + >> ;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL, >> ;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell. >> (define_insn "altivec_lvlx" >> diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c >> b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c >> new file mode 100644 >> index 00000000000..83a9206758b >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c >> @@ -0,0 +1,16 @@ >> +/* { dg-require-effective-target powerpc_altivec_ok } */ >> +/* { dg-options "-O2 -maltivec" } */ >> + >> +#include <altivec.h> >> + >> +vector double foo1 (vector double a) >> +{ >> + return vec_reve (a); >> +} >> + >> +vector long long foo2 (vector long long a) >> +{ >> + return vec_reve (a); >> +} >> + >> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 2 } } */ >> diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c >> b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c >> new file mode 100644 >> index 00000000000..b6dd33d6d79 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c >> @@ -0,0 +1,28 @@ >> +/* { dg-require-effective-target powerpc_p9vector_ok } */ >> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -maltivec" } */
One nit here -- you don't need -maltivec as it's redundant with -mdejagnu-cpu=power9. Looks fine to me with or without that fixed. Recommend maintainers approve. Thanks for the improvement! Bill >> + >> +#include <altivec.h> >> + >> +vector int foo1 (vector int a) >> +{ >> + return vec_reve (a); >> +} >> + >> +vector float foo2 (vector float a) >> +{ >> + return vec_reve (a); >> +} >> + >> +vector short foo3 (vector short a) >> +{ >> + return vec_reve (a); >> +} >> + >> +vector char foo4 (vector char a) >> +{ >> + return vec_reve (a); >> +} >> + >> +/* { dg-final { scan-assembler-times {\mxxbrq\M} 4 } } */ >> +/* { dg-final { scan-assembler-times {\mxxbrw\M} 2 } } */ >> +/* { dg-final { scan-assembler-times {\mxxbrh\M} 1 } } */ >>