Hi Avinash,

On 14/08/25 11:27 am, Avinash Jayakar wrote:
> Hi all,
> 
> Below is a draft of the patch for PR119702. I request you to 
> please review it.
> 
> In vector extensions for rs6000, there is no immediate version
> of left shift. This leads to having 2 instructions for the simple
> case of left shift by one.
>       vspltisw 0,1
>       vsld 2,2,0
> This could have been performed simply with one add instruction
>       vaddudm 2,2,2
> This patch fixes this issue. During the expansion of vashl op
> check if the operand number 2 is a constant and its value is 1,
> if yes then generate plus op, otherwise materialize the result
> of operand 2 into a register and generate ashift op.
> 

There is code in the routine expand_shift_1() that checks if the left
shift can be implemented as a sequence of ADDs :

  /* Check whether its cheaper to implement a left shift by a constant
     bit count by a sequence of additions.  */
  if (code == LSHIFT_EXPR
      && CONST_INT_P (op1)
      && INTVAL (op1) > 0
      && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
      && INTVAL (op1) < MAX_BITS_PER_WORD
      && (shift_cost (speed, mode, INTVAL (op1))
          > INTVAL (op1) * add_cost (speed, mode))
      && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)

We need to check why this is not working for powerpc. Can the costs
be improved for powerpc to enable replacement of shift by add?

Regards,
Surya


> 
> 2025-08-13  Avinash Jayakar  <avina...@linux.ibm.com>
> 
>       PR target/119702
> gcc:
>       * config/rs6000/vector.md (vashl<mode>3): Generate add when
>         operand 2 is a constant with value 1.
> gcc/testsuite:
>       * gcc.target/powerpc/pr119702-1.c: New test (for
>         checking generation of add for *2, << 1 and x+x).
> ---
>  gcc/config/rs6000/vector.md                   | 24 ++++++++++-
>  gcc/testsuite/gcc.target/powerpc/pr119702-1.c | 40 +++++++++++++++++++
>  2 files changed, 62 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr119702-1.c
> 
> diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
> index f5797387ca7..02e2361e4a3 100644
> --- a/gcc/config/rs6000/vector.md
> +++ b/gcc/config/rs6000/vector.md
> @@ -1391,9 +1391,29 @@
>  (define_expand "vashl<mode>3"
>    [(set (match_operand:VEC_I 0 "vint_operand")
>       (ashift:VEC_I (match_operand:VEC_I 1 "vint_operand")
> -                   (match_operand:VEC_I 2 "vint_operand")))]
> +                   (match_operand:VEC_I 2 "general_operand")))]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
> -  "")
> +{
> +  rtx op2 = operands[2];
> +  if (CONSTANT_P(op2)) {
> +    HOST_WIDE_INT shift = INTVAL(const_vector_elt (op2, 0));
> +
> +    if (shift == 1)
> +      {
> +        emit_insn (gen_rtx_SET (operands[0],
> +                                gen_rtx_PLUS (<MODE>mode,
> +                                              operands[1],
> +                                              operands[1])));
> +        DONE;
> +      }
> +  }
> +  operands[2] = copy_to_mode_reg (<MODE>mode, op2);
> +  emit_insn (gen_rtx_SET (operands[0],
> +                          gen_rtx_ASHIFT (<MODE>mode,
> +                                          operands[1],
> +                                          operands[2])));
> +  DONE;
> +})
>  
>  ;; No immediate version of this 128-bit instruction
>  (define_expand "vashl<mode>3"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr119702-1.c 
> b/gcc/testsuite/gcc.target/powerpc/pr119702-1.c
> new file mode 100644
> index 00000000000..2f53a26eaaf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr119702-1.c
> @@ -0,0 +1,40 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-require-effective-target powerpc_vsx } */
> +#include <altivec.h>
> +#define ull unsigned long long 
> +
> +void lshift1(unsigned long long *a) {
> +  a[0] <<= 1;
> +  a[1] <<= 1;
> +}
> +
> +vector ull lshift1_vector(vector ull a) {
> + return a <<= 1;
> +}
> +
> +void add(unsigned long long *a) 
> +{
> +  a[0] += a[0];
> +  a[1] += a[1];
> +}
> +
> +vector ull add_vector(vector ull a) {
> +  return a + a;
> +}
> +
> +void mult2(unsigned long long *a)
> +{
> +  a[0] *= 2;
> +  a[1] *= 2;
> +}
> +
> +vector ull mult2_vector(vector ull a)
> +{
> +  return a*2;
> +}
> +
> +/* { dg-final { scan-assembler-times {\mvaddudm?\M} 6 } } */
> +

Reply via email to