> -----Original Message-----
> From: Richard Biener <[email protected]>
> Sent: 26 January 2026 11:31
> To: [email protected]
> Cc: RISC-V <[email protected]>; Tamar Christina
> <[email protected]>
> Subject: [PATCH][v2] tree-optimization/122474 - adjust check for
> .VEC_SHL_INSERT
> 
> The r16-4558-g1b387bd8978577 change added a check that does not
> match what the commit message says.  The following fixes this,
> trying to more closely match up what we later do during transform
> and what we check here.  As cleanup this also makes sure that
> we compute the neutral value for the scalar type and consistently
> when it depends on the initial value, recording it in a new
> VECT_REDUC_INFO_NEUTRAL_OP.  This avoids issues with comparing
> the neutral and initial value when it is bool.  This also
> refactors vect_transform_cycle_phi a bit to remove dead code
> and make the flow easier to follow.

Nice! I was wondering how to deal with the mismatch between scalar
Boolean and vector boolean neutral ops. COND_EXPR is a nice trick :)

Thanks for the fix!
Tamar

> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu and
> aarch64-linux-gnu.
> 
>       PR tree-optimization/122474
>       * tree-vect-loop.cc (vectorizable_reduction): Adjust condition
>       guarding the check for .VEC_SHL_INSERT.
> 
>       * gcc.target/aarch64/sve2/pr123053.c: New testcase.
>       * gcc.target/riscv/rvv/pr122474.c: Likewise.
> ---
>  .../gcc.target/aarch64/sve2/pr123053.c        | 25 +++++
>  gcc/testsuite/gcc.target/riscv/rvv/pr122474.c | 15 +++
>  gcc/tree-vect-loop.cc                         | 93 ++++++++++---------
>  gcc/tree-vectorizer.h                         |  4 +
>  4 files changed, 91 insertions(+), 46 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/pr123053.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/pr122474.c
> 
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr123053.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/pr123053.c
> new file mode 100644
> index 00000000000..9f2fc438a62
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr123053.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2 -march=armv9-a" } */
> +
> +#include <stdint.h>
> +
> +struct
> +{
> +    int32_t f3;
> +    int64_t f4;
> +} g_137, g_138;
> +struct
> +{
> +    int8_t f0;
> +} g_174;
> +int32_t g_67;
> +extern uint32_t g_179[];
> +uint16_t func_71()
> +{
> +    for (; g_174.f0; g_174.f0 -= 1)
> +    {
> +        g_137.f3 = 0;
> +        for (; g_137.f3 <= 4; g_137.f3 += 1)
> +            for (g_67 = 3; g_67; g_67 -= 1) g_179[g_67] || (g_138.f4 = 0);
> +    }
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/pr122474.c
> b/gcc/testsuite/gcc.target/riscv/rvv/pr122474.c
> new file mode 100644
> index 00000000000..314b0f93d71
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/pr122474.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvl256b -O3 -fwrapv -w" } */
> +
> +_Bool a;
> +void b(unsigned c[][8][8][8][8][8]) {
> +  for (int d = 1; d; d += 3)
> +    for (int e = 0; e < 11; e += 2)
> +      for (short f = 0; f < 1; f = 30482)
> +        for (short g = 0; g < 014; g++)
> +          a = ({
> +            int h = a;
> +            int i = c[2][f][d][d][d][d] ^ c[g][g][1][2][g][g];
> +            i ? h : i;
> +          });
> +}
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index aa59cd1a39d..cbde8010e6d 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -4967,7 +4967,16 @@ get_initial_defs_for_reduction (loop_vec_info
> loop_vinfo,
>    if (neutral_op
>        && !useless_type_conversion_p (vector_elt_type,
>                                    TREE_TYPE (neutral_op)))
> -    neutral_op = gimple_convert (&ctor_seq, vector_elt_type, neutral_op);
> +    {
> +      if (VECTOR_BOOLEAN_TYPE_P (vector_type))
> +     neutral_op = gimple_build (&ctor_seq, COND_EXPR,
> +                                vector_elt_type,
> +                                neutral_op,
> +                                build_all_ones_cst (vector_elt_type),
> +                                build_zero_cst (vector_elt_type));
> +      else
> +     neutral_op = gimple_convert (&ctor_seq, vector_elt_type,
> neutral_op);
> +    }
>    for (j = 0; j < nunits * number_of_vectors; ++j)
>      {
>        tree op;
> @@ -5159,9 +5168,7 @@ vect_find_reusable_accumulator (loop_vec_info
> loop_vinfo,
>        initialize the accumulator with a neutral value instead.  */
>        if (!operand_equal_p (initial_value, main_adjustment))
>       return false;
> -      code_helper code = VECT_REDUC_INFO_CODE (reduc_info);
> -      initial_values[0] = neutral_op_for_reduction (TREE_TYPE 
> (initial_value),
> -                                                 code, initial_value);
> +      initial_values[0] = VECT_REDUC_INFO_NEUTRAL_OP (reduc_info);
>      }
>    VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info) =
> main_adjustment;
>    VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).truncate (0);
> @@ -7612,8 +7619,10 @@ vectorizable_reduction (loop_vec_info
> loop_vinfo,
>    tree initial_value = NULL_TREE;
>    if (reduc_chain)
>      initial_value = vect_phi_initial_value (reduc_def_phi);
> -  neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out),
> +  neutral_op = neutral_op_for_reduction (TREE_TYPE
> +                                        (gimple_phi_result (reduc_def_phi)),
>                                        orig_code, initial_value);
> +  VECT_REDUC_INFO_NEUTRAL_OP (reduc_info) = neutral_op;
> 
>    if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
>      {
> @@ -7653,15 +7662,19 @@ vectorizable_reduction (loop_vec_info
> loop_vinfo,
>    /* For double reductions, and for SLP reductions with a neutral value,
>       we construct a variable-length initial vector by loading a vector
>       full of the neutral value and then shift-and-inserting the start
> -     values into the low-numbered elements.  */
> +     values into the low-numbered elements.  This is however not needed
> +     when neutral and initial value are equal or we can handle the
> +     initial value via adjustment in the epilogue.  */
>    if ((double_reduc || neutral_op)
>        && !nunits_out.is_constant ()
> -      && (SLP_TREE_LANES (slp_node) != 1 && !reduc_chain)
> -      && (!neutral_op
> -       || !operand_equal_p (neutral_op,
> -                            vect_phi_initial_value (reduc_def_phi)))
> +      && reduction_type != INTEGER_INDUC_COND_REDUCTION
> +      && !((SLP_TREE_LANES (slp_node) == 1 || reduc_chain)
> +        && neutral_op
> +        && (!double_reduc
> +            || operand_equal_p (neutral_op,
> +                                vect_phi_initial_value (reduc_def_phi))))
>        && !direct_internal_fn_supported_p (IFN_VEC_SHL_INSERT,
> -                                       vectype_out,
> OPTIMIZE_FOR_SPEED))
> +                                       vectype_out,
> OPTIMIZE_FOR_BOTH))
>      {
>        if (dump_enabled_p ())
>       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -8332,7 +8345,6 @@ vect_transform_cycle_phi (loop_vec_info
> loop_vinfo,
>                                              vectype_out);
> 
>    /* Get the loop-entry arguments.  */
> -  tree vec_initial_def = NULL_TREE;
>    auto_vec<tree> vec_initial_defs;
>    vec_initial_defs.reserve (vec_num);
>    /* Optimize: if initial_def is for REDUC_MAX smaller than the base
> @@ -8381,40 +8393,29 @@ vect_transform_cycle_phi (loop_vec_info
> loop_vinfo,
>         gphi *this_phi = as_a<gphi *> (stmts[i]->stmt);
>         initial_values.quick_push (vect_phi_initial_value (this_phi));
>       }
> -      if (vec_num == 1)
> -     vect_find_reusable_accumulator (loop_vinfo, reduc_info,
> vectype_out);
> -      if (!initial_values.is_empty ())
> -     {
> -       tree initial_value
> -         = (num_phis == 1 ? initial_values[0] : NULL_TREE);
> -       code_helper code = VECT_REDUC_INFO_CODE (reduc_info);
> -       tree neutral_op
> -         = neutral_op_for_reduction (TREE_TYPE (vectype_out),
> -                                     code, initial_value);
> -       /* Try to simplify the vector initialization by applying an
> -          adjustment after the reduction has been performed.  This
> -          can also break a critical path but on the other hand
> -          requires to keep the initial value live across the loop.  */
> -       if (neutral_op
> -           && initial_values.length () == 1
> -           && !VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info)
> -           && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
> -           && !operand_equal_p (neutral_op, initial_values[0]))
> -         {
> -           VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info)
> -             = initial_values[0];
> -           initial_values[0] = neutral_op;
> -         }
> -       get_initial_defs_for_reduction (loop_vinfo, reduc_info, vectype_out,
> -                                       &vec_initial_defs, vec_num,
> -                                       stmts.length (), neutral_op);
> -     }
> -    }
> -
> -  if (vec_initial_def)
> -    {
> -      vec_initial_defs.create (1);
> -      vec_initial_defs.quick_push (vec_initial_def);
> +      tree neutral_op = VECT_REDUC_INFO_NEUTRAL_OP (reduc_info);
> +      if (vec_num == 1
> +       && vect_find_reusable_accumulator (loop_vinfo,
> +                                          reduc_info, vectype_out))
> +     ;
> +      /* Try to simplify the vector initialization by applying an
> +      adjustment after the reduction has been performed.  This
> +      can also break a critical path but on the other hand
> +      requires to keep the initial value live across the loop.  */
> +      else if (neutral_op
> +            && initial_values.length () == 1
> +            && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
> +            && !operand_equal_p (neutral_op, initial_values[0]))
> +     {
> +       VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info)
> +         = initial_values[0];
> +       initial_values[0] = neutral_op;
> +     }
> +      if (!VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info)
> +       || loop_vinfo->main_loop_edge)
> +     get_initial_defs_for_reduction (loop_vinfo, reduc_info, vectype_out,
> +                                     &vec_initial_defs, vec_num,
> +                                     stmts.length (), neutral_op);
>      }
> 
>    if (reduc_info)
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 7d66efee0f5..7bc141a78c0 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -876,6 +876,9 @@ public:
>       when we reduce a mask.  */
>    tree reduc_vectype_for_mask;
> 
> +  /* The neutral operand to use, if any.  */
> +  tree neutral_op;
> +
>    /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used.  */
>    tree induc_cond_initial_val;
> 
> @@ -912,6 +915,7 @@ typedef class vect_reduc_info_s *vect_reduc_info;
>  #define VECT_REDUC_INFO_VECTYPE_FOR_MASK(I) ((I)-
> >reduc_vectype_for_mask)
>  #define VECT_REDUC_INFO_FORCE_SINGLE_CYCLE(I) ((I)->force_single_cycle)
>  #define VECT_REDUC_INFO_RESULT_POS(I) ((I)->reduc_result_pos)
> +#define VECT_REDUC_INFO_NEUTRAL_OP(I) ((I)->neutral_op)
> 
>  /* Information about a reduction accumulator from the main loop that could
>     conceivably be reused as the input to a reduction in an epilogue loop.  */
> --
> 2.51.0

Reply via email to