Hi Hao Chen,

On 10/11/21 12:32 AM, HAO CHEN GUI wrote:
> Hi,
>
>      Gentle ping this:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579038.html
>
> Thanks
>
> On 8/9/2021 下午 2:42, HAO CHEN GUI wrote:
>> Hi,
>>
>>   The patch optimized for vec_reve builtin on rs6000. For V2DI and V2DF, it 
>> is implemented by xxswapd on all targets. For V16QI, V8HI, V4SI and V4SF, it 
>> is implemented by quadword byte reverse plus halfword/word byte reverse when 
>> p9_vector is defined.
>>
>>   Bootstrapped and tested on powerpc64le-linux with no regressions. Is this 
>> okay for trunk? Any recommendations? Thanks a lot.
>>
>>
>> ChangeLog
>>
>> 2021-09-08 Haochen Gui <guih...@linux.ibm.com>
>>
>> gcc/
>>         * config/rs6000/altivec.md (altivec_vreve<mode>2 for VEC_K):
>>         Use xxbrq for v16qi, xxbrq + xxbrh for v8hi and xxbrq + xxbrw
>>         for v4si or v4sf when p9_vector is defined.
>>         (altivec_vreve<mode>2 for VEC_64): Defined. Implemented by
>>         xxswapd.
>>
>> gcc/testsuite/
>>         * gcc.target/powerpc/vec_reve_1.c: New test.
>>         * gcc.target/powerpc/vec_reve_2.c: Likewise.
>>
>>
>> patch.diff
>>
>> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
>> index 1351dafbc41..a1698ce85c0 100644
>> --- a/gcc/config/rs6000/altivec.md
>> +++ b/gcc/config/rs6000/altivec.md
>> @@ -4049,13 +4049,43 @@ (define_expand "altivec_negv4sf2"
>>    DONE;
>>  })
>>
>> -;; Vector reverse elements
>> +;; Vector reverse elements for V16QI V8HI V4SI V4SF
>>  (define_expand "altivec_vreve<mode>2"
>> -  [(set (match_operand:VEC_A 0 "register_operand" "=v")
>> -       (unspec:VEC_A [(match_operand:VEC_A 1 "register_operand" "v")]
>> +  [(set (match_operand:VEC_K 0 "register_operand" "=v")
>> +       (unspec:VEC_K [(match_operand:VEC_K 1 "register_operand" "v")]
>>                       UNSPEC_VREVEV))]
>>    "TARGET_ALTIVEC"
>>  {
>> +  if (TARGET_P9_VECTOR)
>> +    {
>> +      if (<MODE>mode == V16QImode)
>> +       emit_insn (gen_p9_xxbrq_v16qi (operands[0], operands[1]));
>> +      else if (<MODE>mode == V8HImode)
>> +       {
>> +         rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
>> +                                            <MODE>mode, 0);
>> +         rtx temp = gen_reg_rtx (V1TImode);
>> +         emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
>> +         rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
>> +                                            V1TImode, 0);
>> +         emit_insn (gen_p9_xxbrh_v8hi (operands[0], subreg2));
>> +       }
>> +      else /* V4SI and V4SF.  */
>> +       {
>> +         rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
>> +                                            <MODE>mode, 0);
>> +         rtx temp = gen_reg_rtx (V1TImode);
>> +         emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
>> +         rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
>> +                                            V1TImode, 0);
>> +         if (<MODE>mode == V4SImode)
>> +           emit_insn (gen_p9_xxbrw_v4si (operands[0], subreg2));
>> +         else
>> +           emit_insn (gen_p9_xxbrw_v4sf (operands[0], subreg2));
>> +       }
>> +      DONE;
>> +    }
>> +
>>    int i, j, size, num_elements;
>>    rtvec v = rtvec_alloc (16);
>>    rtx mask = gen_reg_rtx (V16QImode);
>> @@ -4074,6 +4104,17 @@ (define_expand "altivec_vreve<mode>2"
>>    DONE;
>>  })
>>
>> +;; Vector reverse elements for V2DI V2DF
>> +(define_expand "altivec_vreve<mode>2"
>> +  [(set (match_operand:VEC_64 0 "register_operand" "=v")
>> +       (unspec:VEC_64 [(match_operand:VEC_64 1 "register_operand" "v")]
>> +                     UNSPEC_VREVEV))]
>> +  "TARGET_ALTIVEC"
>> +{
>> +  emit_insn (gen_xxswapd_<mode> (operands[0], operands[1]));
>> +  DONE;
>> +})
>> +
>>  ;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL,
>>  ;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell.
>>  (define_insn "altivec_lvlx"
>> diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c 
>> b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
>> new file mode 100644
>> index 00000000000..83a9206758b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
>> @@ -0,0 +1,16 @@
>> +/* { dg-require-effective-target powerpc_altivec_ok } */
>> +/* { dg-options "-O2 -maltivec" } */
>> +
>> +#include <altivec.h>
>> +
>> +vector double foo1 (vector double a)
>> +{
>> +   return vec_reve (a);
>> +}
>> +
>> +vector long long foo2 (vector long long a)
>> +{
>> +   return vec_reve (a);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 2 } } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c 
>> b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
>> new file mode 100644
>> index 00000000000..b6dd33d6d79
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
>> @@ -0,0 +1,28 @@
>> +/* { dg-require-effective-target powerpc_p9vector_ok } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -maltivec" } */

One nit here -- you don't need -maltivec as it's redundant with 
-mdejagnu-cpu=power9.

Looks fine to me with or without that fixed.  Recommend maintainers approve.

Thanks for the improvement!
Bill
>> +
>> +#include <altivec.h>
>> +
>> +vector int foo1 (vector int a)
>> +{
>> +   return vec_reve (a);
>> +}
>> +
>> +vector float foo2 (vector float a)
>> +{
>> +   return vec_reve (a);
>> +}
>> +
>> +vector short foo3 (vector short a)
>> +{
>> +   return vec_reve (a);
>> +}
>> +
>> +vector char foo4 (vector char a)
>> +{
>> +   return vec_reve (a);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\mxxbrq\M} 4 } } */
>> +/* { dg-final { scan-assembler-times {\mxxbrw\M} 2 } } */
>> +/* { dg-final { scan-assembler-times {\mxxbrh\M} 1 } } */
>>

Reply via email to