On Tue, 21 Nov 2023, Robin Dapp wrote:

> Hi,
> 
> in PR112406 Tamar found another problem with COND_OP reductions.
> I wrongly assumed that the reduction variable will always remain in
> operand 1, just as we create the COND_OP in ifcvt.  But of course,
> addition being commutative, we are free to swap operand 1 and 2 and
> can end up with e.g.
> 
>  _ifc__60 = .COND_ADD (_2, _6, MADPictureC1_lsm.10_25, 
> MADPictureC1_lsm.10_25);
> 
> which does not pass the asserts I put in place.
> 
> This patch removes this restriction and allows the reduction index to be
> 2 as well.
> 
> Bootstrapped and regtested on aarch64 and regtested on riscv.  x86 is
> still running.

LGTM.

Thanks,
Richard.

> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>       PR middle-end/112406
> 
>       * tree-vect-loop.cc (vectorize_fold_left_reduction): Allow
>       reduction index != 1.
>       (vect_transform_reduction): Handle reduction index != 1.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/aarch64/pr112406-2.c: New test.
> ---
>  gcc/testsuite/gcc.target/aarch64/pr112406-2.c | 20 +++++++++++++++++++
>  gcc/tree-vect-loop.cc                         | 15 +++++++++-----
>  2 files changed, 30 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr112406-2.c
> 
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr112406-2.c 
> b/gcc/testsuite/gcc.target/aarch64/pr112406-2.c
> new file mode 100644
> index 00000000000..bb6e9cf7c70
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr112406-2.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile { target { aarch64*-*-* } } } */
> +/* { dg-options "-march=armv8-a+sve -Ofast" } */
> +
> +double MADPictureC1;
> +extern int PictureRejected[];
> +int PictureMAD_0, MADModelEstimator_n_windowSize_i, 
> MADModelEstimator_n_windowSize_oneSampleQ;
> +
> +void MADModelEstimator_n_windowSize() {
> +  int estimateX2 = 0;
> +  for (; MADModelEstimator_n_windowSize_i; 
> MADModelEstimator_n_windowSize_i++) {
> +    if (MADModelEstimator_n_windowSize_oneSampleQ &&
> +        !PictureRejected[MADModelEstimator_n_windowSize_i])
> +      estimateX2 = 1;
> +    if (!PictureRejected[MADModelEstimator_n_windowSize_i])
> +      MADPictureC1 += PictureMAD_0;
> +  }
> +  if (estimateX2)
> +    for (;;)
> +      ;
> +}
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 58679e91c0a..044eacddf7e 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -7070,7 +7070,7 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
>      op0 = ops[1 - reduc_index];
>    else
>      {
> -      op0 = ops[2];
> +      op0 = ops[2 + (1 - reduc_index)];
>        opmask = ops[0];
>        gcc_assert (!slp_node);
>      }
> @@ -8455,7 +8455,9 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
>        gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB
>                 || code == IFN_COND_MUL || code == IFN_COND_AND
>                 || code == IFN_COND_IOR || code == IFN_COND_XOR);
> -      gcc_assert (op.num_ops == 4 && (op.ops[1] == op.ops[3]));
> +      gcc_assert (op.num_ops == 4
> +               && (op.ops[reduc_index]
> +                   == op.ops[internal_fn_else_index ((internal_fn) code)]));
>      }
>  
>    bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
> @@ -8498,12 +8500,15 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
>      {
>        /* For a conditional operation pass the truth type as mask
>        vectype.  */
> -      gcc_assert (single_defuse_cycle && reduc_index == 1);
> +      gcc_assert (single_defuse_cycle
> +               && (reduc_index == 1 || reduc_index == 2));
>        vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies,
>                        op.ops[0], &vec_oprnds0,
>                        truth_type_for (vectype_in),
> -                      NULL_TREE, &vec_oprnds1, NULL_TREE,
> -                      op.ops[2], &vec_oprnds2, NULL_TREE);
> +                      reduc_index == 1 ? NULL_TREE : op.ops[1],
> +                      &vec_oprnds1, NULL_TREE,
> +                      reduc_index == 2 ? NULL_TREE : op.ops[2],
> +                      &vec_oprnds2, NULL_TREE);
>      }
>  
>    /* For single def-use cycles get one copy of the vectorized reduction
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to