Hi Avinash, On 14/08/25 11:27 am, Avinash Jayakar wrote: > Hi all, > > Below is a draft of the patch for PR119702. I request you to > please review it. > > In vector extensions for rs6000, there is no immediate version > of left shift. This leads to having 2 instructions for the simple > case of left shift by one. > vspltisw 0,1 > vsld 2,2,0 > This could have been performed simply with one add instruction > vaddudm 2,2,2 > This patch fixes this issue. During the expansion of vashl op > check if the operand number 2 is a constant and its value is 1, > if yes then generate plus op, otherwise materialize the result > of operand 2 into a register and generate ashift op. >
There is code in the routine expand_shift_1() that checks if the left shift can be implemented as a sequence of ADDs : /* Check whether its cheaper to implement a left shift by a constant bit count by a sequence of additions. */ if (code == LSHIFT_EXPR && CONST_INT_P (op1) && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode) && INTVAL (op1) < MAX_BITS_PER_WORD && (shift_cost (speed, mode, INTVAL (op1)) > INTVAL (op1) * add_cost (speed, mode)) && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) We need to check why this is not working for powerpc. Can the costs be improved for powerpc to enable replacement of shift by add? Regards, Surya > > 2025-08-13 Avinash Jayakar <avina...@linux.ibm.com> > > PR target/119702 > gcc: > * config/rs6000/vector.md (vashl<mode>3): Generate add when > operand 2 is a constant with value 1. > gcc/testsuite: > * gcc.target/powerpc/pr119702-1.c: New test (for > checking generation of add for *2, << 1 and x+x). > --- > gcc/config/rs6000/vector.md | 24 ++++++++++- > gcc/testsuite/gcc.target/powerpc/pr119702-1.c | 40 +++++++++++++++++++ > 2 files changed, 62 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr119702-1.c > > diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md > index f5797387ca7..02e2361e4a3 100644 > --- a/gcc/config/rs6000/vector.md > +++ b/gcc/config/rs6000/vector.md > @@ -1391,9 +1391,29 @@ > (define_expand "vashl<mode>3" > [(set (match_operand:VEC_I 0 "vint_operand") > (ashift:VEC_I (match_operand:VEC_I 1 "vint_operand") > - (match_operand:VEC_I 2 "vint_operand")))] > + (match_operand:VEC_I 2 "general_operand")))] > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > - "") > +{ > + rtx op2 = operands[2]; > + if (CONSTANT_P(op2)) { > + HOST_WIDE_INT shift = INTVAL(const_vector_elt (op2, 0)); > + > + if (shift == 1) > + { > + emit_insn (gen_rtx_SET (operands[0], > + gen_rtx_PLUS (<MODE>mode, > + operands[1], > + operands[1]))); > + DONE; > + } > + } > + operands[2] = copy_to_mode_reg (<MODE>mode, op2); > + emit_insn (gen_rtx_SET (operands[0], > + gen_rtx_ASHIFT (<MODE>mode, > + operands[1], > + operands[2]))); > + DONE; > +}) > > ;; No immediate version of this 128-bit instruction > (define_expand "vashl<mode>3" > diff --git a/gcc/testsuite/gcc.target/powerpc/pr119702-1.c > b/gcc/testsuite/gcc.target/powerpc/pr119702-1.c > new file mode 100644 > index 00000000000..2f53a26eaaf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr119702-1.c > @@ -0,0 +1,40 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +/* { dg-require-effective-target lp64 } */ > +/* { dg-require-effective-target powerpc_vsx } */ > +#include <altivec.h> > +#define ull unsigned long long > + > +void lshift1(unsigned long long *a) { > + a[0] <<= 1; > + a[1] <<= 1; > +} > + > +vector ull lshift1_vector(vector ull a) { > + return a <<= 1; > +} > + > +void add(unsigned long long *a) > +{ > + a[0] += a[0]; > + a[1] += a[1]; > +} > + > +vector ull add_vector(vector ull a) { > + return a + a; > +} > + > +void mult2(unsigned long long *a) > +{ > + a[0] *= 2; > + a[1] *= 2; > +} > + > +vector ull mult2_vector(vector ull a) > +{ > + return a*2; > +} > + > +/* { dg-final { scan-assembler-times {\mvaddudm?\M} 6 } } */ > +