On Tue, 14 Oct 2025, Avinash Jayakar wrote:

> Hi,
> 
> This patch implements vector lowering for MULT_EXPR. This is a very simplistic
> version of what has been done in expand_mult/vect_recog_mult_pattern, where we
> use shifts only when multiplying with constants which are of exact power of 2.
> Bootstrapped and regtested on ppc64le with no regression failures.
> 
> This patch does resolve the PR122065, but a better way would be to reuse the
> code in tree_vect_patterns.cc:vect_synth_mult_by_constant, which handles a lot
> more cases than just power of 2. But I see that the way the statements are
> built in pattern recognition (gimple_build_assign, on gimple) vs when 
> lowering 
> (gimplify_build2, on trees) is different. 
> Are there any suggestions on how to reuse this function? 

I think it's not possible to directly re-use this at this point.  But
using the synth-mult machinery in a similar way tree-vect-pattern does
should be possible.

Richard.

> Thanks and regards,
> Avinash Jayakar
> 
> Use similar logic for lowering the vector operation for MULT_EXPR as done in
> expand_mult in expmed.cc. 
> Previously, if the source code is written in a vector dialect, for example the
> vector types of altivec.h, the vectorizer would lower the MULT_EXPR to scalar
> variant if the target did not support the vector insn for that type. But 
> better
> code could be generated had it recognized the pattern and transformed it to
> shifts.
> For example, this code
> vector unsigned long long
> lshift1_64_altivec (vector unsigned long long a)
> {
>   return a * (vector unsigned long long) { 4, 4 };
> }
> generates the scalar code in power8/9
>       .cfi_startproc
>       xxpermdi 0,34,34,3
>       mfvsrd 9,34
>       mfvsrd 10,0
>       sldi 9,9,2
>       mtvsrd 0,9
>       sldi 10,10,2
>       mtvsrd 34,10
>       xxpermdi 34,0,34,0
>       blr
>       .long 0
>       .byte 0,0,0,0,0,0,0,0
>       .cfi_endproc
> although it has a vector insn for left shift. With this change now the
> following is generated
>       .cfi_startproc
>       lxvd2x 32,0,3
>       vspltisw 1,2
>       vsld 0,0,1
>       stxvd2x 32,0,3
>       blr
>       .long 0
>       .byte 0,0,0,0,0,0,0,0
>       .cfi_endproc
> 
> 2025-11-14  Avinash Jayakar  <[email protected]>
> 
> gcc/ChangeLog:
>       PR vect/122065
>         * tree-vect-generic.cc (add_rshift): Update name and add code 
> parameter.
>         (add_shift): Update name.
>         (expand_vector_mult): New lowering for MULT_EXPR.
>         (expand_vector_divmod): Use updated function name.
>         (expand_vector_operation): Use updated function name.
> 
> ---
>  gcc/tree-vect-generic.cc | 70 +++++++++++++++++++++++++++++++---------
>  1 file changed, 54 insertions(+), 16 deletions(-)
> 
> diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
> index 3c68361870b..6d3572cf22c 100644
> --- a/gcc/tree-vect-generic.cc
> +++ b/gcc/tree-vect-generic.cc
> @@ -460,7 +460,8 @@ expand_vector_comparison (gimple_stmt_iterator *gsi, tree 
> type, tree op0,
>     of OP0 with shift counts in SHIFTCNTS array and return the temporary 
> holding
>     the result if successful, otherwise return NULL_TREE.  */
>  static tree
> -add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
> +add_shift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts,
> +        enum tree_code code)
>  {
>    optab op;
>    unsigned int i, nunits = nunits_for_known_piecewise_op (type);
> @@ -477,26 +478,59 @@ add_rshift (gimple_stmt_iterator *gsi, tree type, tree 
> op0, int *shiftcnts)
>  
>    if (scalar_shift)
>      {
> -      op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
> +      op = optab_for_tree_code (code, type, optab_scalar);
>        if (op != unknown_optab
>         && can_implement_p (op, TYPE_MODE (type)))
> -     return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
> +     return gimplify_build2 (gsi, code, type, op0,
>                               build_int_cst (NULL_TREE, shiftcnts[0]));
>      }
>  
> -  op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
> +  op = optab_for_tree_code (code, type, optab_vector);
>    if (op != unknown_optab
>        && can_implement_p (op, TYPE_MODE (type)))
>      {
>        tree_vector_builder vec (type, nunits, 1);
>        for (i = 0; i < nunits; i++)
>       vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
> -      return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
> +      return gimplify_build2 (gsi, code, type, op0, vec.build ());
>      }
>  
>    return NULL_TREE;
>  }
> +/* Try to expand integer vector multiplication by constant using
> +   shifts, add, mult if native operation not supported.  */
> +static tree
> +expand_vector_mult (gimple_stmt_iterator *gsi, tree type, tree op0,
> +                 tree op1)
> +{
> +  int prec = TYPE_PRECISION (TREE_TYPE (type));
> +  optab op;
> +  unsigned int nunits = nunits_for_known_piecewise_op (type);
> +  int *shifts = XALLOCAVEC (int, nunits * 4);

why * 4?

> +
> +  if (prec > HOST_BITS_PER_WIDE_INT)
> +    return NULL_TREE;

why's this?

> +  op = optab_for_tree_code (LSHIFT_EXPR, type, optab_vector);
> +  if (op == unknown_optab
> +      || !can_implement_p (op, TYPE_MODE (type)))
> +    return NULL_TREE;

This is somewhat redundant - instead (*)

> +
> +  // if all element are same value and a power of 2, then we can use shifts
> +  for (unsigned int i = 0; i < nunits; i++)
> +    {
> +      tree cst = VECTOR_CST_ELT (op1, i);
>  
> +      if ((TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
> +       || !integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1)
> +     return NULL_TREE;
> +
> +      shifts[i] = tree_log2 (cst);
> +      if (shifts[i] != shifts[0])
> +     return NULL_TREE;

The code in add_shift handles both uniform and non-uniform shift
values, so why restrict to a uniform one?

> +    }
> +  tree cur_op = add_shift (gsi, type, op0, shifts, LSHIFT_EXPR);

(*) this will return NULL when the operation isn't supported.

> +  return cur_op;
> +}
>  /* Try to expand integer vector division by constant using
>     widening multiply, shifts and additions.  */
>  static tree
> @@ -705,14 +739,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree 
> type, tree op0,
>           {
>             for (i = 0; i < nunits; i++)
>               shift_temps[i] = prec - 1;
> -           cur_op = add_rshift (gsi, type, op0, shift_temps);
> +           cur_op = add_shift (gsi, type, op0, shift_temps, RSHIFT_EXPR);
>             if (cur_op != NULL_TREE)
>               {
>                 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
>                                           uns_type, cur_op);
>                 for (i = 0; i < nunits; i++)
>                   shift_temps[i] = prec - shifts[i];
> -               cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
> +               cur_op = add_shift (gsi, uns_type, cur_op, shift_temps, 
> RSHIFT_EXPR);
>                 if (cur_op != NULL_TREE)
>                   addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
>                                             type, cur_op);
> @@ -748,7 +782,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree 
> type, tree op0,
>         if (sign_p == UNSIGNED)
>           {
>             /* q = op0 >> shift;  */
> -           cur_op = add_rshift (gsi, type, op0, shifts);
> +           cur_op = add_shift (gsi, type, op0, shifts, RSHIFT_EXPR);
>             if (cur_op != NULL_TREE)
>               return cur_op;
>           }
> @@ -761,7 +795,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree 
> type, tree op0,
>                 && can_implement_p (op, TYPE_MODE (type)))
>               {
>                 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
> -               cur_op = add_rshift (gsi, type, cur_op, shifts);
> +               cur_op = add_shift (gsi, type, cur_op, shifts, RSHIFT_EXPR);
>                 if (cur_op != NULL_TREE)
>                   return cur_op;
>               }
> @@ -823,7 +857,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree 
> type, tree op0,
>        /* t1 = oprnd0 >> pre_shift;
>        t2 = t1 h* ml;
>        q = t2 >> post_shift;  */
> -      cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
> +      cur_op = add_shift (gsi, type, cur_op, pre_shifts, RSHIFT_EXPR);
>        if (cur_op == NULL_TREE)
>       return NULL_TREE;
>        break;
> @@ -860,7 +894,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree 
> type, tree op0,
>        /* t1 = oprnd0 >> pre_shift;
>        t2 = t1 h* ml;
>        q = t2 >> post_shift;  */
> -      cur_op = add_rshift (gsi, type, cur_op, post_shifts);
> +      cur_op = add_shift (gsi, type, cur_op, post_shifts, RSHIFT_EXPR);
>        break;
>      case 1:
>        /* t1 = oprnd0 h* ml;
> @@ -873,13 +907,13 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree 
> type, tree op0,
>         || !can_implement_p (op, TYPE_MODE (type)))
>       return NULL_TREE;
>        tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
> -      tem = add_rshift (gsi, type, tem, shift_temps);
> +      tem = add_shift (gsi, type, tem, shift_temps, RSHIFT_EXPR);
>        op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
>        if (op == unknown_optab
>         || !can_implement_p (op, TYPE_MODE (type)))
>       return NULL_TREE;
>        tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
> -      cur_op = add_rshift (gsi, type, tem, post_shifts);
> +      cur_op = add_shift (gsi, type, tem, post_shifts, RSHIFT_EXPR);
>        if (cur_op == NULL_TREE)
>       return NULL_TREE;
>        break;
> @@ -902,10 +936,10 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree 
> type, tree op0,
>           return NULL_TREE;
>         cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
>       }
> -      cur_op = add_rshift (gsi, type, cur_op, post_shifts);
> +      cur_op = add_shift (gsi, type, cur_op, post_shifts, RSHIFT_EXPR);
>        if (cur_op == NULL_TREE)
>       return NULL_TREE;
> -      tem = add_rshift (gsi, type, op0, shift_temps);
> +      tem = add_shift (gsi, type, op0, shift_temps, RSHIFT_EXPR);
>        if (tem == NULL_TREE)
>       return NULL_TREE;
>        op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
> @@ -1130,6 +1164,7 @@ expand_vector_operation (gimple_stmt_iterator *gsi, 
> tree type, tree compute_type
>  
>        case TRUNC_DIV_EXPR:
>        case TRUNC_MOD_EXPR:
> +      case MULT_EXPR:
>       {
>         tree rhs1 = gimple_assign_rhs1 (assign);
>         tree rhs2 = gimple_assign_rhs2 (assign);
> @@ -1141,7 +1176,10 @@ expand_vector_operation (gimple_stmt_iterator *gsi, 
> tree type, tree compute_type
>             || !VECTOR_MODE_P (TYPE_MODE (type)))
>           break;
>  
> -       ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
> +       if (code == MULT_EXPR)
> +         ret = expand_vector_mult (gsi, type, rhs1, rhs2);
> +       else
> +         ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
>         if (ret != NULL_TREE)
>           return ret;

I think it's better to add a separate MULT_EXPR case, even if this
involves some duplication.

Otherwise looks like a reasonable first step.

Richard.

>         break;
> 

-- 
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to