https://gcc.gnu.org/g:abbed7806672d0d6cfd6169d084d031d6e737fe1
commit r16-4675-gabbed7806672d0d6cfd6169d084d031d6e737fe1 Author: Avinash Jayakar <[email protected]> Date: Tue Oct 28 09:49:21 2025 +0530 vect: Add vector lowering for MULT_EXPR for constant pow2 multiplication. Use logic similar to lowering the vector operation for MULT_EXPR as done in expand_mult in expmed.cc, but in this commit only bare bones version of what is done in vect_synth_mult_by_constant is implemented that only works if constant is a positive power of 2 constant. Previously, if the source code is written in a vector dialect, for example the vector types of altivec.h, the vectorizer would lower the MULT_EXPR to scalar variant if the target did not support the vector insn for that type. But better code could be generated had it recognized the pattern and transformed it to shifts. For example, this code vector unsigned long long lshift1_64_altivec (vector unsigned long long a) { return a * (vector unsigned long long) { 4, 4 }; } generates the scalar code in power8/9 .cfi_startproc xxpermdi 0,34,34,3 mfvsrd 9,34 mfvsrd 10,0 sldi 9,9,2 mtvsrd 0,9 sldi 10,10,2 mtvsrd 34,10 xxpermdi 34,0,34,0 blr .long 0 .byte 0,0,0,0,0,0,0,0 .cfi_endproc although it has a vector insn for left shift. With this change now the following is generated .cfi_startproc lxvd2x 32,0,3 vspltisw 1,2 vsld 0,0,1 stxvd2x 32,0,3 blr .long 0 .byte 0,0,0,0,0,0,0,0 .cfi_endproc 2025-11-28 Avinash Jayakar <[email protected]> gcc/ChangeLog: PR tree-optimization/122065 * tree-vect-generic.cc (add_rshift): Update name and add code parameter. (add_shift): Update name. (expand_vector_mult): New lowering for MULT_EXPR. (expand_vector_divmod): Use updated function name. (expand_vector_operation): Use updated function name. Diff: --- gcc/tree-vect-generic.cc | 69 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index 3c68361870bf..b8e6a7168ff4 100644 --- a/gcc/tree-vect-generic.cc +++ b/gcc/tree-vect-generic.cc @@ -460,7 +460,8 @@ expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0, of OP0 with shift counts in SHIFTCNTS array and return the temporary holding the result if successful, otherwise return NULL_TREE. */ static tree -add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts) +add_shift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts, + enum tree_code code) { optab op; unsigned int i, nunits = nunits_for_known_piecewise_op (type); @@ -477,26 +478,47 @@ add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts) if (scalar_shift) { - op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar); + op = optab_for_tree_code (code, type, optab_scalar); if (op != unknown_optab && can_implement_p (op, TYPE_MODE (type))) - return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, + return gimplify_build2 (gsi, code, type, op0, build_int_cst (NULL_TREE, shiftcnts[0])); } - op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); + op = optab_for_tree_code (code, type, optab_vector); if (op != unknown_optab && can_implement_p (op, TYPE_MODE (type))) { tree_vector_builder vec (type, nunits, 1); for (i = 0; i < nunits; i++) vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i])); - return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ()); + return gimplify_build2 (gsi, code, type, op0, vec.build ()); } return NULL_TREE; } +/* Try to expand integer vector multiplication by constant of power 2 using + left shifts. */ +static tree +expand_vector_mult (gimple_stmt_iterator *gsi, tree type, tree op0, + tree op1) +{ + unsigned int nunits = nunits_for_known_piecewise_op (type); + int *shifts = XALLOCAVEC (int, nunits); + + // if all element are same value and a power of 2, then we can use shifts + for (unsigned int i = 0; i < nunits; i++) + { + tree cst = VECTOR_CST_ELT (op1, i); + if ((TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst)) + || !integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1) + return NULL_TREE; + shifts[i] = tree_log2 (cst); + } + tree cur_op = add_shift (gsi, type, op0, shifts, LSHIFT_EXPR); + return cur_op; +} /* Try to expand integer vector division by constant using widening multiply, shifts and additions. */ static tree @@ -705,14 +727,15 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, { for (i = 0; i < nunits; i++) shift_temps[i] = prec - 1; - cur_op = add_rshift (gsi, type, op0, shift_temps); + cur_op = add_shift (gsi, type, op0, shift_temps, RSHIFT_EXPR); if (cur_op != NULL_TREE) { cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, uns_type, cur_op); for (i = 0; i < nunits; i++) shift_temps[i] = prec - shifts[i]; - cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps); + cur_op = add_shift (gsi, uns_type, cur_op, shift_temps, + RSHIFT_EXPR); if (cur_op != NULL_TREE) addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, cur_op); @@ -748,7 +771,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, if (sign_p == UNSIGNED) { /* q = op0 >> shift; */ - cur_op = add_rshift (gsi, type, op0, shifts); + cur_op = add_shift (gsi, type, op0, shifts, RSHIFT_EXPR); if (cur_op != NULL_TREE) return cur_op; } @@ -761,7 +784,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, && can_implement_p (op, TYPE_MODE (type))) { cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend); - cur_op = add_rshift (gsi, type, cur_op, shifts); + cur_op = add_shift (gsi, type, cur_op, shifts, RSHIFT_EXPR); if (cur_op != NULL_TREE) return cur_op; } @@ -823,7 +846,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, /* t1 = oprnd0 >> pre_shift; t2 = t1 h* ml; q = t2 >> post_shift; */ - cur_op = add_rshift (gsi, type, cur_op, pre_shifts); + cur_op = add_shift (gsi, type, cur_op, pre_shifts, RSHIFT_EXPR); if (cur_op == NULL_TREE) return NULL_TREE; break; @@ -860,7 +883,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, /* t1 = oprnd0 >> pre_shift; t2 = t1 h* ml; q = t2 >> post_shift; */ - cur_op = add_rshift (gsi, type, cur_op, post_shifts); + cur_op = add_shift (gsi, type, cur_op, post_shifts, RSHIFT_EXPR); break; case 1: /* t1 = oprnd0 h* ml; @@ -873,13 +896,13 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, || !can_implement_p (op, TYPE_MODE (type))) return NULL_TREE; tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op); - tem = add_rshift (gsi, type, tem, shift_temps); + tem = add_shift (gsi, type, tem, shift_temps, RSHIFT_EXPR); op = optab_for_tree_code (PLUS_EXPR, type, optab_default); if (op == unknown_optab || !can_implement_p (op, TYPE_MODE (type))) return NULL_TREE; tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem); - cur_op = add_rshift (gsi, type, tem, post_shifts); + cur_op = add_shift (gsi, type, tem, post_shifts, RSHIFT_EXPR); if (cur_op == NULL_TREE) return NULL_TREE; break; @@ -902,10 +925,10 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, return NULL_TREE; cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0); } - cur_op = add_rshift (gsi, type, cur_op, post_shifts); + cur_op = add_shift (gsi, type, cur_op, post_shifts, RSHIFT_EXPR); if (cur_op == NULL_TREE) return NULL_TREE; - tem = add_rshift (gsi, type, op0, shift_temps); + tem = add_shift (gsi, type, op0, shift_temps, RSHIFT_EXPR); if (tem == NULL_TREE) return NULL_TREE; op = optab_for_tree_code (MINUS_EXPR, type, optab_default); @@ -1146,7 +1169,23 @@ expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type return ret; break; } + case MULT_EXPR: + { + tree rhs1 = gimple_assign_rhs1 (assign); + tree rhs2 = gimple_assign_rhs2 (assign); + tree ret; + + if (!optimize + || !VECTOR_INTEGER_TYPE_P (type) + || TREE_CODE (rhs2) != VECTOR_CST + || !VECTOR_MODE_P (TYPE_MODE (type))) + break; + ret = expand_vector_mult (gsi, type, rhs1, rhs2); + if (ret != NULL_TREE) + return ret; + break; + } default: break; }
