> -----Original Message-----
> From: Richard Biener <[email protected]>
> Sent: 26 January 2026 11:31
> To: [email protected]
> Cc: RISC-V <[email protected]>; Tamar Christina
> <[email protected]>
> Subject: [PATCH][v2] tree-optimization/122474 - adjust check for
> .VEC_SHL_INSERT
>
> The r16-4558-g1b387bd8978577 change added a check that does not
> match what the commit message says. The following fixes this,
> trying to more closely match up what we later do during transform
> and what we check here. As cleanup this also makes sure that
> we compute the neutral value for the scalar type and consistently
> when it depends on the initial value, recording it in a new
> VECT_REDUC_INFO_NEUTRAL_OP. This avoids issues with comparing
> the neutral and initial value when it is bool. This also
> refactors vect_transform_cycle_phi a bit to remove dead code
> and make the flow easier to follow.
Nice! I was wondering how to deal with the mismatch between scalar
Boolean and vector boolean neutral ops. COND_EXPR is a nice trick :)
Thanks for the fix!
Tamar
>
> Bootstrap and regtest running on x86_64-unknown-linux-gnu and
> aarch64-linux-gnu.
>
> PR tree-optimization/122474
> * tree-vect-loop.cc (vectorizable_reduction): Adjust condition
> guarding the check for .VEC_SHL_INSERT.
>
> * gcc.target/aarch64/sve2/pr123053.c: New testcase.
> * gcc.target/riscv/rvv/pr122474.c: Likewise.
> ---
> .../gcc.target/aarch64/sve2/pr123053.c | 25 +++++
> gcc/testsuite/gcc.target/riscv/rvv/pr122474.c | 15 +++
> gcc/tree-vect-loop.cc | 93 ++++++++++---------
> gcc/tree-vectorizer.h | 4 +
> 4 files changed, 91 insertions(+), 46 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/pr123053.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/pr122474.c
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr123053.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/pr123053.c
> new file mode 100644
> index 00000000000..9f2fc438a62
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr123053.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2 -march=armv9-a" } */
> +
> +#include <stdint.h>
> +
> +struct
> +{
> + int32_t f3;
> + int64_t f4;
> +} g_137, g_138;
> +struct
> +{
> + int8_t f0;
> +} g_174;
> +int32_t g_67;
> +extern uint32_t g_179[];
> +uint16_t func_71()
> +{
> + for (; g_174.f0; g_174.f0 -= 1)
> + {
> + g_137.f3 = 0;
> + for (; g_137.f3 <= 4; g_137.f3 += 1)
> + for (g_67 = 3; g_67; g_67 -= 1) g_179[g_67] || (g_138.f4 = 0);
> + }
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/pr122474.c
> b/gcc/testsuite/gcc.target/riscv/rvv/pr122474.c
> new file mode 100644
> index 00000000000..314b0f93d71
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/pr122474.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvl256b -O3 -fwrapv -w" } */
> +
> +_Bool a;
> +void b(unsigned c[][8][8][8][8][8]) {
> + for (int d = 1; d; d += 3)
> + for (int e = 0; e < 11; e += 2)
> + for (short f = 0; f < 1; f = 30482)
> + for (short g = 0; g < 014; g++)
> + a = ({
> + int h = a;
> + int i = c[2][f][d][d][d][d] ^ c[g][g][1][2][g][g];
> + i ? h : i;
> + });
> +}
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index aa59cd1a39d..cbde8010e6d 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -4967,7 +4967,16 @@ get_initial_defs_for_reduction (loop_vec_info
> loop_vinfo,
> if (neutral_op
> && !useless_type_conversion_p (vector_elt_type,
> TREE_TYPE (neutral_op)))
> - neutral_op = gimple_convert (&ctor_seq, vector_elt_type, neutral_op);
> + {
> + if (VECTOR_BOOLEAN_TYPE_P (vector_type))
> + neutral_op = gimple_build (&ctor_seq, COND_EXPR,
> + vector_elt_type,
> + neutral_op,
> + build_all_ones_cst (vector_elt_type),
> + build_zero_cst (vector_elt_type));
> + else
> + neutral_op = gimple_convert (&ctor_seq, vector_elt_type,
> neutral_op);
> + }
> for (j = 0; j < nunits * number_of_vectors; ++j)
> {
> tree op;
> @@ -5159,9 +5168,7 @@ vect_find_reusable_accumulator (loop_vec_info
> loop_vinfo,
> initialize the accumulator with a neutral value instead. */
> if (!operand_equal_p (initial_value, main_adjustment))
> return false;
> - code_helper code = VECT_REDUC_INFO_CODE (reduc_info);
> - initial_values[0] = neutral_op_for_reduction (TREE_TYPE
> (initial_value),
> - code, initial_value);
> + initial_values[0] = VECT_REDUC_INFO_NEUTRAL_OP (reduc_info);
> }
> VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info) =
> main_adjustment;
> VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).truncate (0);
> @@ -7612,8 +7619,10 @@ vectorizable_reduction (loop_vec_info
> loop_vinfo,
> tree initial_value = NULL_TREE;
> if (reduc_chain)
> initial_value = vect_phi_initial_value (reduc_def_phi);
> - neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out),
> + neutral_op = neutral_op_for_reduction (TREE_TYPE
> + (gimple_phi_result (reduc_def_phi)),
> orig_code, initial_value);
> + VECT_REDUC_INFO_NEUTRAL_OP (reduc_info) = neutral_op;
>
> if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
> {
> @@ -7653,15 +7662,19 @@ vectorizable_reduction (loop_vec_info
> loop_vinfo,
> /* For double reductions, and for SLP reductions with a neutral value,
> we construct a variable-length initial vector by loading a vector
> full of the neutral value and then shift-and-inserting the start
> - values into the low-numbered elements. */
> + values into the low-numbered elements. This is however not needed
> + when neutral and initial value are equal or we can handle the
> + initial value via adjustment in the epilogue. */
> if ((double_reduc || neutral_op)
> && !nunits_out.is_constant ()
> - && (SLP_TREE_LANES (slp_node) != 1 && !reduc_chain)
> - && (!neutral_op
> - || !operand_equal_p (neutral_op,
> - vect_phi_initial_value (reduc_def_phi)))
> + && reduction_type != INTEGER_INDUC_COND_REDUCTION
> + && !((SLP_TREE_LANES (slp_node) == 1 || reduc_chain)
> + && neutral_op
> + && (!double_reduc
> + || operand_equal_p (neutral_op,
> + vect_phi_initial_value (reduc_def_phi))))
> && !direct_internal_fn_supported_p (IFN_VEC_SHL_INSERT,
> - vectype_out,
> OPTIMIZE_FOR_SPEED))
> + vectype_out,
> OPTIMIZE_FOR_BOTH))
> {
> if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -8332,7 +8345,6 @@ vect_transform_cycle_phi (loop_vec_info
> loop_vinfo,
> vectype_out);
>
> /* Get the loop-entry arguments. */
> - tree vec_initial_def = NULL_TREE;
> auto_vec<tree> vec_initial_defs;
> vec_initial_defs.reserve (vec_num);
> /* Optimize: if initial_def is for REDUC_MAX smaller than the base
> @@ -8381,40 +8393,29 @@ vect_transform_cycle_phi (loop_vec_info
> loop_vinfo,
> gphi *this_phi = as_a<gphi *> (stmts[i]->stmt);
> initial_values.quick_push (vect_phi_initial_value (this_phi));
> }
> - if (vec_num == 1)
> - vect_find_reusable_accumulator (loop_vinfo, reduc_info,
> vectype_out);
> - if (!initial_values.is_empty ())
> - {
> - tree initial_value
> - = (num_phis == 1 ? initial_values[0] : NULL_TREE);
> - code_helper code = VECT_REDUC_INFO_CODE (reduc_info);
> - tree neutral_op
> - = neutral_op_for_reduction (TREE_TYPE (vectype_out),
> - code, initial_value);
> - /* Try to simplify the vector initialization by applying an
> - adjustment after the reduction has been performed. This
> - can also break a critical path but on the other hand
> - requires to keep the initial value live across the loop. */
> - if (neutral_op
> - && initial_values.length () == 1
> - && !VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info)
> - && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
> - && !operand_equal_p (neutral_op, initial_values[0]))
> - {
> - VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info)
> - = initial_values[0];
> - initial_values[0] = neutral_op;
> - }
> - get_initial_defs_for_reduction (loop_vinfo, reduc_info, vectype_out,
> - &vec_initial_defs, vec_num,
> - stmts.length (), neutral_op);
> - }
> - }
> -
> - if (vec_initial_def)
> - {
> - vec_initial_defs.create (1);
> - vec_initial_defs.quick_push (vec_initial_def);
> + tree neutral_op = VECT_REDUC_INFO_NEUTRAL_OP (reduc_info);
> + if (vec_num == 1
> + && vect_find_reusable_accumulator (loop_vinfo,
> + reduc_info, vectype_out))
> + ;
> + /* Try to simplify the vector initialization by applying an
> + adjustment after the reduction has been performed. This
> + can also break a critical path but on the other hand
> + requires to keep the initial value live across the loop. */
> + else if (neutral_op
> + && initial_values.length () == 1
> + && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
> + && !operand_equal_p (neutral_op, initial_values[0]))
> + {
> + VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info)
> + = initial_values[0];
> + initial_values[0] = neutral_op;
> + }
> + if (!VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info)
> + || loop_vinfo->main_loop_edge)
> + get_initial_defs_for_reduction (loop_vinfo, reduc_info, vectype_out,
> + &vec_initial_defs, vec_num,
> + stmts.length (), neutral_op);
> }
>
> if (reduc_info)
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 7d66efee0f5..7bc141a78c0 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -876,6 +876,9 @@ public:
> when we reduce a mask. */
> tree reduc_vectype_for_mask;
>
> + /* The neutral operand to use, if any. */
> + tree neutral_op;
> +
> /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used. */
> tree induc_cond_initial_val;
>
> @@ -912,6 +915,7 @@ typedef class vect_reduc_info_s *vect_reduc_info;
> #define VECT_REDUC_INFO_VECTYPE_FOR_MASK(I) ((I)-
> >reduc_vectype_for_mask)
> #define VECT_REDUC_INFO_FORCE_SINGLE_CYCLE(I) ((I)->force_single_cycle)
> #define VECT_REDUC_INFO_RESULT_POS(I) ((I)->reduc_result_pos)
> +#define VECT_REDUC_INFO_NEUTRAL_OP(I) ((I)->neutral_op)
>
> /* Information about a reduction accumulator from the main loop that could
> conceivably be reused as the input to a reduction in an epilogue loop. */
> --
> 2.51.0