On Fri, 24 Oct 2025, Avinash Jayakar wrote:
> Hi,
>
> I have incorporated the changes suggested in the previous version. For now the
> bare bones version of multiply by constant pattern recog is implemented in
> tree-vect-generic.cc, which should help fixing the failing test cases in
> 119702.
>
> But to fix PR122065, will need to implement the lowering as done in
> tree-vect-patterns.cc, which I can do in a separate patch.
>
> Bootstrapped and regtested on ppc64le and x86_64. Ok for trunk?
OK.
Thanks,
richard.
> Changes from v1:
> 1. Streamline conditional checks done in expand_vector_mult.
> 2. Use separate case for MULT_EXPR in expand_vector_operation.
>
> Thanks and regards,
> Avinash Jayakar
>
> Use similar logic for lowering the vector operation for MULT_EXPR as done in
> expand_mult in expmed.cc, but in this commit only bare bones version of what
> is done in vect_synth_mult_by_constant is implemented that only works if
> constant is a positive power of 2 constant.
> Previously, if the source code is written in a vector dialect, for example the
> vector types of altivec.h, the vectorizer would lower the MULT_EXPR to scalar
> variant if the target did not support the vector insn for that type. But
> better
> code could be generated had it recognized the pattern and transformed it to
> shifts.
>
> For example, this code
>
> vector unsigned long long
> lshift1_64_altivec (vector unsigned long long a)
> {
> return a * (vector unsigned long long) { 4, 4 };
> }
> generates the scalar code in power8/9
> .cfi_startproc
> xxpermdi 0,34,34,3
> mfvsrd 9,34
> mfvsrd 10,0
> sldi 9,9,2
> mtvsrd 0,9
> sldi 10,10,2
> mtvsrd 34,10
> xxpermdi 34,0,34,0
> blr
> .long 0
> .byte 0,0,0,0,0,0,0,0
> .cfi_endproc
> although it has a vector insn for left shift. With this change now the
> following is generated
> .cfi_startproc
> lxvd2x 32,0,3
> vspltisw 1,2
> vsld 0,0,1
> stxvd2x 32,0,3
> blr
> .long 0
> .byte 0,0,0,0,0,0,0,0
> .cfi_endproc
>
> 2025-11-24 Avinash Jayakar <[email protected]>
>
> gcc/ChangeLog:
> PR vect/122065
> * tree-vect-generic.cc (add_rshift): Update name and add code
> parameter.
> (add_shift): Update name.
> (expand_vector_mult): New lowering for MULT_EXPR.
> (expand_vector_divmod): Use updated function name.
> (expand_vector_operation): Use updated function name.
>
> ---
> gcc/tree-vect-generic.cc | 68 +++++++++++++++++++++++++++++++---------
> 1 file changed, 53 insertions(+), 15 deletions(-)
>
> diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
> index 3c68361870b..a1e73a1f67e 100644
> --- a/gcc/tree-vect-generic.cc
> +++ b/gcc/tree-vect-generic.cc
> @@ -460,7 +460,8 @@ expand_vector_comparison (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> of OP0 with shift counts in SHIFTCNTS array and return the temporary
> holding
> the result if successful, otherwise return NULL_TREE. */
> static tree
> -add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
> +add_shift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts,
> + enum tree_code code)
> {
> optab op;
> unsigned int i, nunits = nunits_for_known_piecewise_op (type);
> @@ -477,26 +478,47 @@ add_rshift (gimple_stmt_iterator *gsi, tree type, tree
> op0, int *shiftcnts)
>
> if (scalar_shift)
> {
> - op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
> + op = optab_for_tree_code (code, type, optab_scalar);
> if (op != unknown_optab
> && can_implement_p (op, TYPE_MODE (type)))
> - return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
> + return gimplify_build2 (gsi, code, type, op0,
> build_int_cst (NULL_TREE, shiftcnts[0]));
> }
>
> - op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
> + op = optab_for_tree_code (code, type, optab_vector);
> if (op != unknown_optab
> && can_implement_p (op, TYPE_MODE (type)))
> {
> tree_vector_builder vec (type, nunits, 1);
> for (i = 0; i < nunits; i++)
> vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
> - return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
> + return gimplify_build2 (gsi, code, type, op0, vec.build ());
> }
>
> return NULL_TREE;
> }
> +/* Try to expand integer vector multiplication by constant of power 2 using
> + left shifts. */
> +static tree
> +expand_vector_mult (gimple_stmt_iterator *gsi, tree type, tree op0,
> + tree op1)
> +{
> + unsigned int nunits = nunits_for_known_piecewise_op (type);
> + int *shifts = XALLOCAVEC (int, nunits);
> +
> + // if all element are same value and a power of 2, then we can use shifts
> + for (unsigned int i = 0; i < nunits; i++)
> + {
> + tree cst = VECTOR_CST_ELT (op1, i);
> + if ((TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
> + || !integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1)
> + return NULL_TREE;
>
> + shifts[i] = tree_log2 (cst);
> + }
> + tree cur_op = add_shift (gsi, type, op0, shifts, LSHIFT_EXPR);
> + return cur_op;
> +}
> /* Try to expand integer vector division by constant using
> widening multiply, shifts and additions. */
> static tree
> @@ -705,14 +727,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> {
> for (i = 0; i < nunits; i++)
> shift_temps[i] = prec - 1;
> - cur_op = add_rshift (gsi, type, op0, shift_temps);
> + cur_op = add_shift (gsi, type, op0, shift_temps, RSHIFT_EXPR);
> if (cur_op != NULL_TREE)
> {
> cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
> uns_type, cur_op);
> for (i = 0; i < nunits; i++)
> shift_temps[i] = prec - shifts[i];
> - cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
> + cur_op = add_shift (gsi, uns_type, cur_op, shift_temps,
> RSHIFT_EXPR);
> if (cur_op != NULL_TREE)
> addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
> type, cur_op);
> @@ -748,7 +770,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> if (sign_p == UNSIGNED)
> {
> /* q = op0 >> shift; */
> - cur_op = add_rshift (gsi, type, op0, shifts);
> + cur_op = add_shift (gsi, type, op0, shifts, RSHIFT_EXPR);
> if (cur_op != NULL_TREE)
> return cur_op;
> }
> @@ -761,7 +783,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> && can_implement_p (op, TYPE_MODE (type)))
> {
> cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
> - cur_op = add_rshift (gsi, type, cur_op, shifts);
> + cur_op = add_shift (gsi, type, cur_op, shifts, RSHIFT_EXPR);
> if (cur_op != NULL_TREE)
> return cur_op;
> }
> @@ -823,7 +845,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> /* t1 = oprnd0 >> pre_shift;
> t2 = t1 h* ml;
> q = t2 >> post_shift; */
> - cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
> + cur_op = add_shift (gsi, type, cur_op, pre_shifts, RSHIFT_EXPR);
> if (cur_op == NULL_TREE)
> return NULL_TREE;
> break;
> @@ -860,7 +882,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> /* t1 = oprnd0 >> pre_shift;
> t2 = t1 h* ml;
> q = t2 >> post_shift; */
> - cur_op = add_rshift (gsi, type, cur_op, post_shifts);
> + cur_op = add_shift (gsi, type, cur_op, post_shifts, RSHIFT_EXPR);
> break;
> case 1:
> /* t1 = oprnd0 h* ml;
> @@ -873,13 +895,13 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> || !can_implement_p (op, TYPE_MODE (type)))
> return NULL_TREE;
> tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
> - tem = add_rshift (gsi, type, tem, shift_temps);
> + tem = add_shift (gsi, type, tem, shift_temps, RSHIFT_EXPR);
> op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
> if (op == unknown_optab
> || !can_implement_p (op, TYPE_MODE (type)))
> return NULL_TREE;
> tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
> - cur_op = add_rshift (gsi, type, tem, post_shifts);
> + cur_op = add_shift (gsi, type, tem, post_shifts, RSHIFT_EXPR);
> if (cur_op == NULL_TREE)
> return NULL_TREE;
> break;
> @@ -902,10 +924,10 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
> type, tree op0,
> return NULL_TREE;
> cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
> }
> - cur_op = add_rshift (gsi, type, cur_op, post_shifts);
> + cur_op = add_shift (gsi, type, cur_op, post_shifts, RSHIFT_EXPR);
> if (cur_op == NULL_TREE)
> return NULL_TREE;
> - tem = add_rshift (gsi, type, op0, shift_temps);
> + tem = add_shift (gsi, type, op0, shift_temps, RSHIFT_EXPR);
> if (tem == NULL_TREE)
> return NULL_TREE;
> op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
> @@ -1146,7 +1168,23 @@ expand_vector_operation (gimple_stmt_iterator *gsi,
> tree type, tree compute_type
> return ret;
> break;
> }
> + case MULT_EXPR:
> + {
> + tree rhs1 = gimple_assign_rhs1 (assign);
> + tree rhs2 = gimple_assign_rhs2 (assign);
> + tree ret;
> +
> + if (!optimize
> + || !VECTOR_INTEGER_TYPE_P (type)
> + || TREE_CODE (rhs2) != VECTOR_CST
> + || !VECTOR_MODE_P (TYPE_MODE (type)))
> + break;
>
> + ret = expand_vector_mult (gsi, type, rhs1, rhs2);
> + if (ret != NULL_TREE)
> + return ret;
> + break;
> + }
> default:
> break;
> }
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)