On Tue, 14 Oct 2025, Avinash Jayakar wrote:
> Hi,
>
> This patch implements vector lowering for MULT_EXPR. This is a very simplistic
> version of what has been done in expand_mult/vect_recog_mult_pattern, where we
> use shifts only when multiplying with constants which are of exact power of 2.
> Bootstrapped and regtested on ppc64le with no regression failures.
>
> This patch does resolve the PR122065, but a better way would be to reuse the
> code in tree_vect_patterns.cc:vect_synth_mult_by_constant, which handles a lot
> more cases than just power of 2. But I see that the way the statements are
> built in pattern recognition (gimple_build_assign, on gimple) vs when
> lowering
> (gimplify_build2, on trees) is different.
> Are there any suggestions on how to reuse this function?
I think it's not possible to directly re-use this at this point. But
using the synth-mult machinery in a similar way tree-vect-pattern does
should be possible.
Richard.
> Thanks and regards,
> Avinash Jayakar
>
> Use similar logic for lowering the vector operation for MULT_EXPR as done in
> expand_mult in expmed.cc.
> Previously, if the source code is written in a vector dialect, for example the
> vector types of altivec.h, the vectorizer would lower the MULT_EXPR to scalar
> variant if the target did not support the vector insn for that type. But
> better
> code could be generated had it recognized the pattern and transformed it to
> shifts.
> For example, this code
> vector unsigned long long
> lshift1_64_altivec (vector unsigned long long a)
> {
> return a * (vector unsigned long long) { 4, 4 };
> }
> generates the scalar code in power8/9
> .cfi_startproc
> xxpermdi 0,34,34,3
> mfvsrd 9,34
> mfvsrd 10,0
> sldi 9,9,2
> mtvsrd 0,9
> sldi 10,10,2
> mtvsrd 34,10
> xxpermdi 34,0,34,0
> blr
> .long 0
> .byte 0,0,0,0,0,0,0,0
> .cfi_endproc
> although it has a vector insn for left shift. With this change now the
> following is generated
> .cfi_startproc
> lxvd2x 32,0,3
> vspltisw 1,2
> vsld 0,0,1
> stxvd2x 32,0,3
> blr
> .long 0
> .byte 0,0,0,0,0,0,0,0
> .cfi_endproc
>
> 2025-11-14 Avinash Jayakar <[email protected]>
>
> gcc/ChangeLog:
> PR vect/122065
> * tree-vect-generic.cc (add_rshift): Update name and add code
> parameter.
> (add_shift): Update name.
> (expand_vector_mult): New lowering for MULT_EXPR.
> (expand_vector_divmod): Use updated function name.
> (expand_vector_operation): Use updated function name.
>
> ---
> gcc/tree-vect-generic.cc | 70 +++++++++++++++++++++++++++++++---------
> 1 file changed, 54 insertions(+), 16 deletions(-)
>
> diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
> index 3c68361870b..6d3572cf22c 100644
> --- a/gcc/tree-vect-generic.cc
> +++ b/gcc/tree-vect-generic.cc
> @@ -460,7 +460,8 @@ expand_vector_comparison (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> of OP0 with shift counts in SHIFTCNTS array and return the temporary
> holding
> the result if successful, otherwise return NULL_TREE. */
> static tree
> -add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
> +add_shift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts,
> + enum tree_code code)
> {
> optab op;
> unsigned int i, nunits = nunits_for_known_piecewise_op (type);
> @@ -477,26 +478,59 @@ add_rshift (gimple_stmt_iterator *gsi, tree type, tree
> op0, int *shiftcnts)
>
> if (scalar_shift)
> {
> - op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
> + op = optab_for_tree_code (code, type, optab_scalar);
> if (op != unknown_optab
> && can_implement_p (op, TYPE_MODE (type)))
> - return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
> + return gimplify_build2 (gsi, code, type, op0,
> build_int_cst (NULL_TREE, shiftcnts[0]));
> }
>
> - op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
> + op = optab_for_tree_code (code, type, optab_vector);
> if (op != unknown_optab
> && can_implement_p (op, TYPE_MODE (type)))
> {
> tree_vector_builder vec (type, nunits, 1);
> for (i = 0; i < nunits; i++)
> vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
> - return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
> + return gimplify_build2 (gsi, code, type, op0, vec.build ());
> }
>
> return NULL_TREE;
> }
> +/* Try to expand integer vector multiplication by constant using
> + shifts, add, mult if native operation not supported. */
> +static tree
> +expand_vector_mult (gimple_stmt_iterator *gsi, tree type, tree op0,
> + tree op1)
> +{
> + int prec = TYPE_PRECISION (TREE_TYPE (type));
> + optab op;
> + unsigned int nunits = nunits_for_known_piecewise_op (type);
> + int *shifts = XALLOCAVEC (int, nunits * 4);
why * 4?
> +
> + if (prec > HOST_BITS_PER_WIDE_INT)
> + return NULL_TREE;
why's this?
> + op = optab_for_tree_code (LSHIFT_EXPR, type, optab_vector);
> + if (op == unknown_optab
> + || !can_implement_p (op, TYPE_MODE (type)))
> + return NULL_TREE;
This is somewhat redundant - instead (*)
> +
> + // if all element are same value and a power of 2, then we can use shifts
> + for (unsigned int i = 0; i < nunits; i++)
> + {
> + tree cst = VECTOR_CST_ELT (op1, i);
>
> + if ((TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
> + || !integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1)
> + return NULL_TREE;
> +
> + shifts[i] = tree_log2 (cst);
> + if (shifts[i] != shifts[0])
> + return NULL_TREE;
The code in add_shift handles both uniform and non-uniform shift
values, so why restrict to a uniform one?
> + }
> + tree cur_op = add_shift (gsi, type, op0, shifts, LSHIFT_EXPR);
(*) this will return NULL when the operation isn't supported.
> + return cur_op;
> +}
> /* Try to expand integer vector division by constant using
> widening multiply, shifts and additions. */
> static tree
> @@ -705,14 +739,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> {
> for (i = 0; i < nunits; i++)
> shift_temps[i] = prec - 1;
> - cur_op = add_rshift (gsi, type, op0, shift_temps);
> + cur_op = add_shift (gsi, type, op0, shift_temps, RSHIFT_EXPR);
> if (cur_op != NULL_TREE)
> {
> cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
> uns_type, cur_op);
> for (i = 0; i < nunits; i++)
> shift_temps[i] = prec - shifts[i];
> - cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
> + cur_op = add_shift (gsi, uns_type, cur_op, shift_temps,
> RSHIFT_EXPR);
> if (cur_op != NULL_TREE)
> addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
> type, cur_op);
> @@ -748,7 +782,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> if (sign_p == UNSIGNED)
> {
> /* q = op0 >> shift; */
> - cur_op = add_rshift (gsi, type, op0, shifts);
> + cur_op = add_shift (gsi, type, op0, shifts, RSHIFT_EXPR);
> if (cur_op != NULL_TREE)
> return cur_op;
> }
> @@ -761,7 +795,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> && can_implement_p (op, TYPE_MODE (type)))
> {
> cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
> - cur_op = add_rshift (gsi, type, cur_op, shifts);
> + cur_op = add_shift (gsi, type, cur_op, shifts, RSHIFT_EXPR);
> if (cur_op != NULL_TREE)
> return cur_op;
> }
> @@ -823,7 +857,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> /* t1 = oprnd0 >> pre_shift;
> t2 = t1 h* ml;
> q = t2 >> post_shift; */
> - cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
> + cur_op = add_shift (gsi, type, cur_op, pre_shifts, RSHIFT_EXPR);
> if (cur_op == NULL_TREE)
> return NULL_TREE;
> break;
> @@ -860,7 +894,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> /* t1 = oprnd0 >> pre_shift;
> t2 = t1 h* ml;
> q = t2 >> post_shift; */
> - cur_op = add_rshift (gsi, type, cur_op, post_shifts);
> + cur_op = add_shift (gsi, type, cur_op, post_shifts, RSHIFT_EXPR);
> break;
> case 1:
> /* t1 = oprnd0 h* ml;
> @@ -873,13 +907,13 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> || !can_implement_p (op, TYPE_MODE (type)))
> return NULL_TREE;
> tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
> - tem = add_rshift (gsi, type, tem, shift_temps);
> + tem = add_shift (gsi, type, tem, shift_temps, RSHIFT_EXPR);
> op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
> if (op == unknown_optab
> || !can_implement_p (op, TYPE_MODE (type)))
> return NULL_TREE;
> tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
> - cur_op = add_rshift (gsi, type, tem, post_shifts);
> + cur_op = add_shift (gsi, type, tem, post_shifts, RSHIFT_EXPR);
> if (cur_op == NULL_TREE)
> return NULL_TREE;
> break;
> @@ -902,10 +936,10 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> return NULL_TREE;
> cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
> }
> - cur_op = add_rshift (gsi, type, cur_op, post_shifts);
> + cur_op = add_shift (gsi, type, cur_op, post_shifts, RSHIFT_EXPR);
> if (cur_op == NULL_TREE)
> return NULL_TREE;
> - tem = add_rshift (gsi, type, op0, shift_temps);
> + tem = add_shift (gsi, type, op0, shift_temps, RSHIFT_EXPR);
> if (tem == NULL_TREE)
> return NULL_TREE;
> op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
> @@ -1130,6 +1164,7 @@ expand_vector_operation (gimple_stmt_iterator *gsi,
> tree type, tree compute_type
>
> case TRUNC_DIV_EXPR:
> case TRUNC_MOD_EXPR:
> + case MULT_EXPR:
> {
> tree rhs1 = gimple_assign_rhs1 (assign);
> tree rhs2 = gimple_assign_rhs2 (assign);
> @@ -1141,7 +1176,10 @@ expand_vector_operation (gimple_stmt_iterator *gsi,
> tree type, tree compute_type
> || !VECTOR_MODE_P (TYPE_MODE (type)))
> break;
>
> - ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
> + if (code == MULT_EXPR)
> + ret = expand_vector_mult (gsi, type, rhs1, rhs2);
> + else
> + ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
> if (ret != NULL_TREE)
> return ret;
I think it's better to add a separate MULT_EXPR case, even if this
involves some duplication.
Otherwise looks like a reasonable first step.
Richard.
> break;
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)