On Fri, 3 Oct 2025, Tamar Christina wrote:
> This patch changes the widen_[us]sum optabs into a convert optabs such that
> targets and specify more than one conversion.
>
> Following this patch are patches rewriting all targets using this change.
>
> While working on this I noticed that the pattern does miss some cases it
> could handle if it tried multiple attempts. e.g. if the promotion is from
> qi to si, and the target doesn't have this, it should try hi -> si.
>
> But I'm leaving that for now.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues
>
> Ok for master?
OK.
I'll note we might want to document that this, the dot_prod and
the sad patterns are working on integer vector modes only and
copy the part of the docs from dot_prod that specifies which
of the vector output lanes the accumulation happens on (in case
this is now fully consistent on all targets).
I do wonder whether it makes sense to differentiate between vector
and non-vector modes in optabs.def and gen*, but that's a much
larger task.
Thanks,
Richard.
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR middle-end/122069
> * doc/md.texi (widen_ssum@var{n}@var{m}3, widen_usum@var{n}@var{m}3):
> Update docs.
> * optabs.cc (expand_widen_pattern_expr): Add WIDEN_SUM_EXPR as widening.
> * optabs.def (ssum_widen_optab, usum_widen_optab): Convert from direct
> to a conversion optab.
> * tree-vect-patterns.cc (vect_recog_widen_sum_pattern): Change
> vect_supportable_direct_optab_p into vect_supportable_conv_optab_p.
>
> gcc/testsuite/ChangeLog:
>
> PR middle-end/122069
> * gcc.dg/vect/slp-reduc-3.c: vect_widen_sum_hi_to_si_pattern targets now
> pass.
>
> ---
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index
> 44e1149bea89b18903061713e8319d834b76adbf..97d21b90a650e5e5fad5cd72b01f30983ca4ab43
> 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5847,15 +5847,15 @@ equal or wider than the mode of the absolute
> difference. The result is placed
> in operand 0, which is of the same mode as operand 3.
> @var{m} is the mode of operand 1 and operand 2.
>
> -@cindex @code{widen_ssum@var{m}3} instruction pattern
> -@cindex @code{widen_usum@var{m}3} instruction pattern
> -@item @samp{widen_ssum@var{m}3}
> -@itemx @samp{widen_usum@var{m}3}
> +@cindex @code{widen_ssum@var{n}@var{m}3} instruction pattern
> +@cindex @code{widen_usum@var{n}@var{m}3} instruction pattern
> +@item @samp{widen_ssum@var{n}@var{m}3}
> +@itemx @samp{widen_usum@var{n}@var{m}3}
> Operands 0 and 2 are of the same mode, which is wider than the mode of
> operand 1. Add operand 1 to operand 2 and place the widened result in
> operand 0. (This is used express accumulation of elements into an accumulator
> of a wider mode.)
> -@var{m} is the mode of operand 1.
> +@var{m} is the mode of operand 1 and @var{n} is the mode of operand 0.
>
> @cindex @code{smulhs@var{m}3} instruction pattern
> @cindex @code{umulhs@var{m}3} instruction pattern
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index
> 5c9450f61450fa4425d08339a1c2b5f7f5e654ec..0865fc2e19aeb2b3056c8634334d6c1644a3cc96
> 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -322,6 +322,10 @@ expand_widen_pattern_expr (const_sepops ops, rtx op0,
> rtx op1, rtx wide_op,
> icode = find_widening_optab_handler (widen_pattern_optab,
> TYPE_MODE (TREE_TYPE (ops->op2)),
> tmode0);
> + else if (ops->code == WIDEN_SUM_EXPR)
> + icode = find_widening_optab_handler (widen_pattern_optab,
> + TYPE_MODE (TREE_TYPE (ops->op1)),
> + tmode0);
> else
> icode = optab_handler (widen_pattern_optab, tmode0);
> gcc_assert (icode != CODE_FOR_nothing);
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index
> 790e43f08f476c8025dc2797f9ecaffe5b66acc5..e2ffb2b6423893b5dd757af1ed3f342ce8c9f76a
> 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -85,6 +85,8 @@ OPTAB_CD(smsub_widen_optab, "msub$b$a4")
> OPTAB_CD(umsub_widen_optab, "umsub$b$a4")
> OPTAB_CD(ssmsub_widen_optab, "ssmsub$b$a4")
> OPTAB_CD(usmsub_widen_optab, "usmsub$a$b4")
> +OPTAB_CD(ssum_widen_optab, "widen_ssum$I$a$b3")
> +OPTAB_CD(usum_widen_optab, "widen_usum$I$a$b3")
> OPTAB_CD(crc_optab, "crc$a$b4")
> OPTAB_CD(crc_rev_optab, "crc_rev$a$b4")
> OPTAB_CD(vec_load_lanes_optab, "vec_load_lanes$a$b")
> @@ -415,8 +417,6 @@ OPTAB_D (savg_floor_optab, "avg$a3_floor")
> OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
> OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> OPTAB_D (uavg_ceil_optab, "uavg$a3_ceil")
> -OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
> -OPTAB_D (usum_widen_optab, "widen_usum$I$a3")
> OPTAB_D (usad_optab, "usad$I$a")
> OPTAB_D (ssad_optab, "ssad$I$a")
> OPTAB_D (smulhs_optab, "smulhs$a3")
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c
> b/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c
> index
> 614d8ad17ca1629af9f43cedec3cbed197d9a582..b8aff98990b202eae2a7c367457113aa1b811eda
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c
> +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c
> @@ -60,6 +60,6 @@ int main (void)
> /* The initialization loop in main also gets vectorized. */
> /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected"
> 1 "vect" { xfail *-*-* } } } */
> /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target
> { vect_short_mult && { vect_widen_sum_hi_to_si && vect_unpack } } } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"
> { xfail { vect_widen_sum_hi_to_si_pattern || { ! { vect_short_mult && {
> vect_widen_sum_hi_to_si && vect_unpack } } } } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"
> { xfail { ! { vect_short_mult && { vect_widen_sum_hi_to_si && vect_unpack }
> } } } } } */
> /* Check we can elide permutes if SLP vectorizing the reduction. */
> /* { dg-final { scan-tree-dump-times " = VEC_PERM_EXPR" 0 "vect" { xfail { {
> { vect_widen_sum_hi_to_si_pattern || { ! vect_unpack } } && { !
> vect_load_lanes } } && { vect_short_mult && { vect_widen_sum_hi_to_si &&
> vect_unpack } } } } } } */
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index
> 782327235db16384c2d71186911802daf7a15ebc..38695647f602792909c486ae52a3fbf8cc28b39e
> 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -2544,8 +2544,8 @@ vect_recog_widen_sum_pattern (vec_info *vinfo,
>
> vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
>
> - if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
> - unprom0.type, type_out))
> + if (!vect_supportable_conv_optab_p (vinfo, type, WIDEN_SUM_EXPR,
> + unprom0.type, type_out))
> return NULL;
>
> var = vect_recog_temp_ssa_var (type, NULL);
>
>
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)