Add a new SSA pass (pass_widen_accum) that widens narrow integer
loop accumulators (e.g. short, char) to int-width, eliminating
per-iteration sign-/zero-extension truncations.
The pass is gated on -ftree-widen-accum, enabled at -O2 and above.
gcc/ChangeLog:
* common.opt (ftree-widen-accum): New flag.
* opts.cc (default_options_table): Enable at -O2+.
* params.opt (max-widen-accum-chain-depth): New param.
* tree-ssa-loop-widen-accum.cc: New file.
* Makefile.in (OBJS): Add tree-ssa-loop-widen-accum.o.
* passes.def (pass_widen_accumulator): Schedule after phiopt,
before widening_mul which needs LCSSA.
* timevar.def (TV_TREE_WIDEN_ACCUM): New timevar.
* tree-pass.h (make_pass_widen_accumulator): Declare.
* doc/invoke.texi (-ftree-widen-accum): Document new flag.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/widen-accum-1.c: New test.
* gcc.dg/tree-ssa/widen-accum-2.c: New test.
* gcc.dg/tree-ssa/widen-accum-3.c: New test.
* gcc.dg/tree-ssa/widen-accum-4.c: New test.
* gcc.dg/tree-ssa/widen-accum-5.c: New test.
* gcc.dg/tree-ssa/widen-accum-6.c: New test.
* gcc.dg/tree-ssa/widen-accum-7.c: New test.
* gcc.dg/tree-ssa/widen-accum-8.c: New test.
* gcc.target/riscv/widen-accum-1.c: New test.
* gcc.target/aarch64/widen-accum-1.c: New test.
---
gcc/Makefile.in | 1 +
gcc/common.opt | 4 +
gcc/doc/invoke.texi | 14 +-
gcc/opts.cc | 1 +
gcc/params.opt | 4 +
gcc/passes.def | 1 +
gcc/testsuite/gcc.dg/tree-ssa/widen-accum-1.c | 20 +
gcc/testsuite/gcc.dg/tree-ssa/widen-accum-2.c | 19 +
gcc/testsuite/gcc.dg/tree-ssa/widen-accum-3.c | 26 +
gcc/testsuite/gcc.dg/tree-ssa/widen-accum-4.c | 20 +
gcc/testsuite/gcc.dg/tree-ssa/widen-accum-5.c | 33 +
gcc/testsuite/gcc.dg/tree-ssa/widen-accum-6.c | 15 +
gcc/testsuite/gcc.dg/tree-ssa/widen-accum-7.c | 15 +
gcc/testsuite/gcc.dg/tree-ssa/widen-accum-8.c | 15 +
.../gcc.target/aarch64/widen-accum-1.c | 15 +
.../gcc.target/riscv/widen-accum-1.c | 15 +
gcc/timevar.def | 1 +
gcc/tree-pass.h | 1 +
gcc/tree-ssa-loop-widen-accum.cc | 713 ++++++++++++++++++
19 files changed, 932 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-1.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-3.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-4.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-5.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-6.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-7.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-8.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/widen-accum-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/widen-accum-1.c
create mode 100644 gcc/tree-ssa-loop-widen-accum.cc
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 9e8da255186a..c94a8a0a5940 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1799,6 +1799,7 @@ OBJS = \
tree-ssa-loop-prefetch.o \
tree-ssa-loop-split.o \
tree-ssa-loop-unswitch.o \
+ tree-ssa-loop-widen-accum.o \
tree-ssa-loop.o \
tree-ssa-math-opts.o \
tree-ssa-operands.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index 88b79bbf8f56..bd3e2707ab4c 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3380,6 +3380,10 @@ ftree-vrp
Common Var(flag_tree_vrp) Init(0) Optimization
Perform Value Range Propagation on trees.
+ftree-widen-accum
+Common Var(flag_tree_widen_accum) Optimization
+Widen narrow-type loop accumulators to avoid per-iteration truncations.
+
fsplit-paths
Common Var(flag_split_paths) Init(0) Optimization
Split paths leading to loop backedges.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index fe20ae66c00b..d38333b59c4d 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -688,7 +688,7 @@ Objective-C and Objective-C++ Dialects}.
-ftree-parallelize-loops[=@var{n}] -ftree-pre -ftree-partial-pre -ftree-pta
-ftree-reassoc -ftree-scev-cprop -ftree-sink -ftree-slsr -ftree-sra
-ftree-switch-conversion -ftree-tail-merge
--ftree-ter -ftree-vectorize -ftree-vrp -ftrivial-auto-var-init
+-ftree-ter -ftree-vectorize -ftree-vrp -ftree-widen-accum
-ftrivial-auto-var-init
-funconstrained-commons -funit-at-a-time -funroll-all-loops
-funroll-loops -funsafe-math-optimizations -funswitch-loops
-fipa-ra -fvariable-expansion-in-unroller -fvect-cost-model -fvpt
@@ -13463,6 +13463,7 @@ also turns on the following optimization flags:
-ftree-slp-vectorize
-ftree-switch-conversion -ftree-tail-merge
-ftree-vrp
+-ftree-widen-accum
-fvect-cost-model=very-cheap}
Please note the warning under @option{-fgcse} about
@@ -15322,6 +15323,17 @@ enabled by default at @option{-O2} and higher. Null
pointer check
elimination is only done if @option{-fdelete-null-pointer-checks} is
enabled.
+@opindex ftree-widen-accum
+@opindex fno-tree-widen-accum
+@item -ftree-widen-accum
+Widen narrow-type loop accumulators (e.g.@: @code{short}, @code{char})
+to @code{int} width, eliminating per-iteration sign- or zero-extension
+truncations. Since two's-complement addition is associative modulo
+@math{2^N}, the truncation can be deferred to loop exits without
+changing the observable result. This is enabled by default at
+@option{-O2} and higher and is not applied when @option{-ftrapv} is
+in effect.
+
@opindex fsplit-paths
@opindex fno-split-paths
@item -fsplit-paths
diff --git a/gcc/opts.cc b/gcc/opts.cc
index 6658b6acd378..2faab06879cd 100644
--- a/gcc/opts.cc
+++ b/gcc/opts.cc
@@ -673,6 +673,7 @@ static const struct default_options default_options_table[]
=
{ OPT_LEVELS_2_PLUS, OPT_ftree_switch_conversion, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_vrp, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_ftree_widen_accum, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL,
VECT_COST_MODEL_VERY_CHEAP },
{ OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
diff --git a/gcc/params.opt b/gcc/params.opt
index 4420189e9822..305ed70a1256 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -846,6 +846,10 @@ Maximum size of loc list for which reverse ops should be
added.
Common Joined UInteger Var(param_max_vartrack_size) Init(50000000) Param
Optimization
Maximum size of var tracking hash tables.
+-param=max-widen-accum-chain-depth=
+Common Joined UInteger Var(param_max_widen_accum_chain_depth) Init(50)
IntegerRange(1, 200) Param Optimization
+Maximum recursion depth when analyzing or transforming accumulator chains in
the widen_accum pass.
+
-param=max-find-base-term-values=
Common Joined UInteger Var(param_max_find_base_term_values) Init(200) Param
Optimization
Maximum number of VALUEs handled during a single find_base_term call.
diff --git a/gcc/passes.def b/gcc/passes.def
index cdddb87302f6..3469dfd2e512 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -371,6 +371,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_forwprop, /*full_walk=*/false, /*last=*/true);
NEXT_PASS (pass_sink_code, true /* unsplit edges */);
NEXT_PASS (pass_phiopt, false /* early_p */);
+ NEXT_PASS (pass_widen_accumulator); /* Before widening_mul which needs
LCSSA. */
NEXT_PASS (pass_optimize_widening_mul);
NEXT_PASS (pass_store_merging);
/* If DCE is not run before checking for uninitialized uses,
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-1.c
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-1.c
new file mode 100644
index 000000000000..bfadad0201fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+typedef short ee_s16;
+
+ee_s16 __attribute__((noipa))
+test_widen (int N, int *A, int val)
+{
+ ee_s16 ret = 0;
+ for (int i = 0; i < N; i++)
+ {
+ if (A[i] > val)
+ ret += 10;
+ else
+ ret += (A[i] > 0) ? 1 : 0;
+ }
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump "Accumulator widened successfully"
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-2.c
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-2.c
new file mode 100644
index 000000000000..5b3cecbec078
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-2.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+typedef short ee_s16;
+
+ee_s16 __attribute__((noipa))
+test_no_widen (int N, int *A, ee_s16 limit)
+{
+ ee_s16 ret = 0;
+ for (int i = 0; i < N; i++)
+ {
+ ret += A[i];
+ if (ret > limit) /* comparison of narrow accumulator -- blocks widening
*/
+ ret = 0;
+ }
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump-not "Accumulator widened successfully"
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-3.c
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-3.c
new file mode 100644
index 000000000000..24bbaa7f11cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-3.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+typedef short ee_s16;
+
+/* Multi-exit loop: the early return carries an intermediate
+ accumulator value, not the back-edge value. The pass should
+ still widen successfully. */
+
+ee_s16 __attribute__((noipa))
+test_multi_exit (int N, int *A, int val, int sentinel)
+{
+ ee_s16 ret = 0;
+ for (int i = 0; i < N; i++)
+ {
+ if (A[i] == sentinel)
+ return ret; /* early exit with current accumulator value */
+ if (A[i] > val)
+ ret += 10;
+ else
+ ret += (A[i] > 0) ? 1 : 0;
+ }
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump "Accumulator widened successfully"
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-4.c
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-4.c
new file mode 100644
index 000000000000..48f1d739b5b8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-4.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+typedef short ee_s16;
+
+/* Accumulator with int-typed addend: GIMPLE keeps the explicit
+ widen/compute/truncate chain (_1 = (int)ret; _2 = _1 + A[i];
+ ret = (short)_2) because A[i] is int. The pass must look
+ through the int->short truncation in resolve_wide_back_arg. */
+
+ee_s16 __attribute__((noipa))
+test_int_addend (int N, int *A)
+{
+ ee_s16 ret = 0;
+ for (int i = 0; i < N; i++)
+ ret += A[i];
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump "Accumulator widened successfully"
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-5.c
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-5.c
new file mode 100644
index 000000000000..b22c6732d4c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-5.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+/* Verify that widening preserves wraparound semantics. */
+
+short __attribute__((noipa))
+sum_short (int N, int *A)
+{
+ short ret = 0;
+ for (int i = 0; i < N; i++)
+ ret += A[i];
+ return ret;
+}
+
+int
+main (void)
+{
+ int A[100];
+ for (int i = 0; i < 100; i++)
+ A[i] = 400;
+
+ /* 100 * 400 = 40000 which wraps past short (max 32767).
+ Compute expected result via int, then truncate. */
+ int ref = 0;
+ for (int i = 0; i < 100; i++)
+ ref += 400;
+ short expected = (short) ref;
+
+ if (sum_short (100, A) != expected)
+ __builtin_abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-6.c
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-6.c
new file mode 100644
index 000000000000..ce1b527c3062
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-6.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+/* signed char accumulator. */
+
+signed char __attribute__((noipa))
+sum_char (int N, int *A)
+{
+ signed char ret = 0;
+ for (int i = 0; i < N; i++)
+ ret += A[i];
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump "Accumulator widened successfully"
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-7.c
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-7.c
new file mode 100644
index 000000000000..292f0812f178
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-7.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+/* unsigned short accumulator. */
+
+unsigned short __attribute__((noipa))
+sum_ushort (int N, int *A)
+{
+ unsigned short ret = 0;
+ for (int i = 0; i < N; i++)
+ ret += A[i];
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump "Accumulator widened successfully"
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-8.c
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-8.c
new file mode 100644
index 000000000000..8c4e5fc2f9b8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-8.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+/* MINUS_EXPR accumulator. */
+
+short __attribute__((noipa))
+sub_short (int N, int *A)
+{
+ short ret = 0;
+ for (int i = 0; i < N; i++)
+ ret -= A[i];
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump "Accumulator widened successfully"
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/widen-accum-1.c
b/gcc/testsuite/gcc.target/aarch64/widen-accum-1.c
new file mode 100644
index 000000000000..b5c3841fb832
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/widen-accum-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Os" "-Oz" "-Og" } } */
+
+short __attribute__((noipa))
+sum_short (int N, int *A)
+{
+ short ret = 0;
+ for (int i = 0; i < N; i++)
+ ret += A[i];
+ return ret;
+}
+
+/* After widening, the loop should not need sign-extension. */
+/* { dg-final { scan-assembler-not "sxth" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/widen-accum-1.c
b/gcc/testsuite/gcc.target/riscv/widen-accum-1.c
new file mode 100644
index 000000000000..66693abecab7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/widen-accum-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc -mabi=lp64d" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Os" "-Oz" "-Og" } } */
+
+short __attribute__((noipa))
+sum_short (int N, int *A)
+{
+ short ret = 0;
+ for (int i = 0; i < N; i++)
+ ret += A[i];
+ return ret;
+}
+
+/* After widening, the loop should not need sign-extension. */
+/* { dg-final { scan-assembler-not "sext\\.h" } } */
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 3824caa01bc2..78db42f94e8b 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -227,6 +227,7 @@ DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch
lowering")
DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals")
DEFTIMEVAR (TV_TREE_SINCOS , "gimple CSE sin/cos")
DEFTIMEVAR (TV_TREE_POW , "gimple expand pow")
+DEFTIMEVAR (TV_TREE_WIDEN_ACCUM , "gimple widen accumulator")
DEFTIMEVAR (TV_TREE_WIDEN_MUL , "gimple widening/fma detection")
DEFTIMEVAR (TV_TRANS_MEM , "transactional memory")
DEFTIMEVAR (TV_TREE_STRLEN , "tree strlen optimization")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index b3c97658a8fe..96fc35b2a76c 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -456,6 +456,7 @@ extern gimple_opt_pass *make_pass_cse_sincos (gcc::context
*ctxt);
extern gimple_opt_pass *make_pass_expand_pow (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_store_merging (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_widen_accumulator (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_warn_function_return (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_warn_function_noreturn (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-loop-widen-accum.cc b/gcc/tree-ssa-loop-widen-accum.cc
new file mode 100644
index 000000000000..4f41ed1bedaa
--- /dev/null
+++ b/gcc/tree-ssa-loop-widen-accum.cc
@@ -0,0 +1,713 @@
+/* Widen narrow-type loop accumulators to int.
+ Copyright (C) 2026 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Narrow-type loop accumulators (e.g. short) are truncated every
+ iteration, producing redundant sign/zero-extensions on targets such
+ as RISC-V. Since two's-complement addition is associative mod 2^N,
+ the truncation can be deferred to loop exits.
+
+ This pass finds header PHIs whose type is narrower than int and
+ whose in-loop uses are limited to additive operations and
+ same-width casts. It creates a widened (int-typed) copy of the
+ accumulator chain and inserts a single narrowing cast at each loop
+ exit. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "gimple-pretty-print.h"
+#include "fold-const.h"
+#include "gimple-iterator.h"
+#include "tree-cfg.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop.h"
+#include "cfgloop.h"
+#include "tree-dfa.h"
+#include "tree-ssa.h"
+#include "tree-phinodes.h"
+#include "tree-into-ssa.h"
+
+/* Return true if CODE is an additive operation or a type conversion
+ -- the set of operations that the accumulator chain is allowed to
+ contain. Shared between verify_chain_to_phi_p (backward walk) and
+ validate_uses_in_loop_p (forward walk) to keep them in sync. */
+
+static inline bool
+is_additive_or_cast_p (enum tree_code code)
+{
+ return CONVERT_EXPR_CODE_P (code)
+ || code == PLUS_EXPR
+ || code == MINUS_EXPR;
+}
+
+/* Return true if STMT is a CONVERT_EXPR/NOP_EXPR whose input has
+ precision <= NARROW_PREC and whose output has precision >= input.
+ This matches widening casts and same-width sign-changes. */
+
+static bool
+is_widening_or_nop_cast_p (gimple *stmt, unsigned narrow_prec)
+{
+ if (!is_gimple_assign (stmt))
+ return false;
+ if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
+ return false;
+ tree rhs = gimple_assign_rhs1 (stmt);
+ tree lhs = gimple_assign_lhs (stmt);
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
+ return false;
+ return (TYPE_PRECISION (TREE_TYPE (rhs)) <= narrow_prec
+ && TYPE_PRECISION (TREE_TYPE (lhs))
+ >= TYPE_PRECISION (TREE_TYPE (rhs)));
+}
+
+/* Walk backward from NAME through CONVERT_EXPR / PLUS_EXPR / MINUS_EXPR
+ and merge-PHIs, verifying the chain reaches PHI_RESULT within LOOP.
+ UNDO collects nodes added to VISITED so that a failed speculative
+ walk (trying op0 before op1) can be rolled back cheaply. */
+
+static bool
+verify_chain_to_phi_p (tree name, tree phi_result, class loop *loop,
+ hash_set<tree> &visited,
+ auto_vec<tree> &undo, int depth = 0)
+{
+ /* Iteratively follow CONVERT_EXPR chains to avoid recursion on
+ the common linear-cast case; recurse for PHIs and PLUS/MINUS.
+ The depth parameter bounds total recursion across all frames. */
+ for (;;)
+ {
+ if (depth > param_max_widen_accum_chain_depth)
+ return false;
+
+ if (name == phi_result)
+ return true;
+
+ if (TREE_CODE (name) != SSA_NAME)
+ return false;
+
+ /* Already verified on a convergent path -- ok. In-loop merge
+ PHIs cannot form cycles without going through the header
+ PHI, so a previously visited node is safe. */
+ if (visited.add (name))
+ return true;
+ undo.safe_push (name);
+
+ gimple *def = SSA_NAME_DEF_STMT (name);
+ if (!def)
+ return false;
+
+ basic_block bb = gimple_bb (def);
+ if (!bb || !flow_bb_inside_loop_p (loop, bb))
+ return false;
+
+ /* Merge-PHI inside the loop (not header): each in-loop
+ argument must trace back to phi_result. */
+ if (gimple_code (def) == GIMPLE_PHI)
+ {
+ if (bb == loop->header)
+ return false;
+ gphi *phi = as_a<gphi *> (def);
+ for (unsigned i = 0; i < gimple_phi_num_args (phi); i++)
+ {
+ edge e = gimple_phi_arg_edge (phi, i);
+ if (!flow_bb_inside_loop_p (loop, e->src))
+ continue;
+ tree arg = gimple_phi_arg_def (phi, i);
+ if (!verify_chain_to_phi_p (arg, phi_result, loop, visited,
+ undo, depth + 1))
+ return false;
+ }
+ return true;
+ }
+
+ if (!is_gimple_assign (def))
+ return false;
+
+ enum tree_code code = gimple_assign_rhs_code (def);
+ if (!is_additive_or_cast_p (code))
+ return false;
+
+ if (CONVERT_EXPR_CODE_P (code))
+ {
+ name = gimple_assign_rhs1 (def);
+ depth++;
+ continue;
+ }
+
+ /* PLUS_EXPR / MINUS_EXPR: one operand is the accumulator chain,
+ the other is a non-accumulator addend. Try op0 first; on
+ failure, roll back any nodes it added to VISITED before
+ trying op1 -- a polluted set could cause a false-positive
+ convergent-path hit. */
+ tree op0 = gimple_assign_rhs1 (def);
+ tree op1 = gimple_assign_rhs2 (def);
+ unsigned mark = undo.length ();
+ if (verify_chain_to_phi_p (op0, phi_result, loop, visited,
+ undo, depth + 1))
+ return true;
+ /* Roll back nodes added by the failed op0 walk. */
+ while (undo.length () > mark)
+ visited.remove (undo.pop ());
+ return verify_chain_to_phi_p (op1, phi_result, loop, visited,
+ undo, depth + 1);
+ }
+}
+
+/* Validate that all in-loop uses of NAME are safe for widening:
+ PHIs, casts, or additive ops. Out-of-loop uses are fine since
+ they will get the exit-narrowed value. */
+
+static bool
+validate_uses_in_loop_p (tree name, class loop *loop,
+ hash_set<tree> &visited, int depth = 0)
+{
+ if (depth > param_max_widen_accum_chain_depth)
+ return false;
+
+ if (visited.add (name))
+ return true;
+
+ imm_use_iterator iter;
+ use_operand_p use_p;
+ FOR_EACH_IMM_USE_FAST (use_p, iter, name)
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+
+ if (is_gimple_debug (use_stmt))
+ continue;
+
+ basic_block bb = gimple_bb (use_stmt);
+ if (!bb || !flow_bb_inside_loop_p (loop, bb))
+ continue; /* Out-of-loop use -- ok. */
+
+ if (gimple_code (use_stmt) == GIMPLE_PHI)
+ {
+ /* For in-loop merge PHIs, recurse on the result. */
+ if (bb != loop->header)
+ {
+ tree phi_res = gimple_phi_result (use_stmt);
+ if (!validate_uses_in_loop_p (phi_res, loop, visited,
+ depth + 1))
+ return false;
+ }
+ continue;
+ }
+
+ if (!is_gimple_assign (use_stmt))
+ return false;
+
+ enum tree_code code = gimple_assign_rhs_code (use_stmt);
+ if (!is_additive_or_cast_p (code))
+ return false;
+
+ tree lhs = gimple_assign_lhs (use_stmt);
+ if (!validate_uses_in_loop_p (lhs, loop, visited, depth + 1))
+ return false;
+ }
+ return true;
+}
+
+/* Extract the preheader (init) and latch (back-edge) arguments from
+ a two-argument header PHI. Returns true on success. */
+
+static bool
+get_phi_args (gphi *header_phi, class loop *loop,
+ tree *init_arg_out, tree *back_arg_out,
+ location_t *init_loc_out = NULL,
+ location_t *back_loc_out = NULL)
+{
+ if (gimple_phi_num_args (header_phi) != 2)
+ return false;
+
+ edge pre_edge = loop_preheader_edge (loop);
+ edge latch_edge = loop_latch_edge (loop);
+
+ *init_arg_out = NULL_TREE;
+ *back_arg_out = NULL_TREE;
+
+ for (unsigned i = 0; i < 2; i++)
+ {
+ edge e = gimple_phi_arg_edge (header_phi, i);
+ if (e == pre_edge)
+ {
+ *init_arg_out = gimple_phi_arg_def (header_phi, i);
+ if (init_loc_out)
+ *init_loc_out = gimple_phi_arg_location (header_phi, i);
+ }
+ else if (e == latch_edge)
+ {
+ *back_arg_out = gimple_phi_arg_def (header_phi, i);
+ if (back_loc_out)
+ *back_loc_out = gimple_phi_arg_location (header_phi, i);
+ }
+ }
+
+ return *init_arg_out && *back_arg_out;
+}
+
+/* Top-level analysis for a candidate header PHI.
+ Returns true if the PHI is a narrow accumulator that can be
+ widened. The caller has already verified the PHI result is a
+ narrow integral type. */
+
+static bool
+analyze_candidate (gphi *header_phi, class loop *loop)
+{
+ tree phi_result = gimple_phi_result (header_phi);
+
+ tree init_arg, back_arg;
+ if (!get_phi_args (header_phi, loop, &init_arg, &back_arg))
+ return false;
+
+ /* Verify the back-edge argument chains back to the phi_result
+ through additive ops, casts, and merge-PHIs. */
+ hash_set<tree> chain_visited;
+ auto_vec<tree> undo;
+ if (!verify_chain_to_phi_p (back_arg, phi_result, loop, chain_visited,
+ undo))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " reject: back-edge chain does not reach PHI\n");
+ return false;
+ }
+
+ /* Validate that all in-loop uses of the phi_result are safe. */
+ hash_set<tree> use_visited;
+ if (!validate_uses_in_loop_p (phi_result, loop, use_visited))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " reject: unsafe in-loop uses\n");
+ return false;
+ }
+
+ return true;
+}
+
+/* If OPERAND already has a wide mapping in NARROW_TO_WIDE, return it.
+ If OPERAND is an INTEGER_CST, return fold_convert to WIDE_TYPE.
+ Otherwise, insert a widening NOP_EXPR before GSI and return the
+ new wide SSA name. */
+
+static tree
+get_or_widen (tree operand, tree wide_type,
+ hash_map<tree, tree> &narrow_to_wide,
+ gimple_stmt_iterator &gsi)
+{
+ tree *mapped = narrow_to_wide.get (operand);
+ if (mapped)
+ return *mapped;
+
+ if (TREE_CODE (operand) == INTEGER_CST)
+ return fold_convert (wide_type, operand);
+
+ /* Insert a widening cast. */
+ tree wide_name = make_ssa_name (wide_type);
+ gassign *widen_stmt = gimple_build_assign (wide_name, NOP_EXPR, operand);
+ gsi_insert_before (&gsi, widen_stmt, GSI_SAME_STMT);
+ return wide_name;
+}
+
+/* Recursively resolve the wide-type version of the back-edge PHI
+ argument. Handles:
+ - Names already in narrow_to_wide (direct lookup)
+ - Type conversions of any width (look through to source)
+ - Merge-PHIs inside the loop (create wide merge-PHIs) */
+
+static tree
+resolve_wide_back_arg (tree back_arg, tree wide_type,
+ hash_map<tree, tree> &narrow_to_wide,
+ class loop *loop, int depth = 0)
+{
+ if (depth > param_max_widen_accum_chain_depth)
+ return NULL_TREE;
+
+ if (TREE_CODE (back_arg) != SSA_NAME)
+ return NULL_TREE;
+
+ tree *mapped = narrow_to_wide.get (back_arg);
+ if (mapped)
+ return *mapped;
+
+ gimple *def = SSA_NAME_DEF_STMT (back_arg);
+ if (!def)
+ return NULL_TREE;
+
+ /* Any type conversion (same-width sign-change, narrowing truncation,
+ or widening cast): look through to the source operand. This
+ handles the int->short truncation that the worklist skips, as well
+ as same-width casts like signed short <-> unsigned short. The
+ recursion will eventually reach a name in narrow_to_wide. */
+ if (is_gimple_assign (def)
+ && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
+ {
+ tree src = gimple_assign_rhs1 (def);
+ return resolve_wide_back_arg (src, wide_type,
+ narrow_to_wide, loop, depth + 1);
+ }
+
+ /* Merge-PHI inside the loop. */
+ if (gimple_code (def) == GIMPLE_PHI)
+ {
+ gphi *merge_phi = as_a<gphi *> (def);
+ basic_block bb = gimple_bb (merge_phi);
+ if (!bb || !flow_bb_inside_loop_p (loop, bb) || bb == loop->header)
+ return NULL_TREE;
+
+ tree wide_phi_result = make_ssa_name (wide_type);
+ gphi *new_phi = create_phi_node (wide_phi_result, bb);
+
+ /* Map before recursion to break cycles. */
+ narrow_to_wide.put (back_arg, wide_phi_result);
+
+ for (unsigned i = 0; i < gimple_phi_num_args (merge_phi); i++)
+ {
+ tree arg = gimple_phi_arg_def (merge_phi, i);
+ edge e = gimple_phi_arg_edge (merge_phi, i);
+ tree wide_arg = resolve_wide_back_arg (arg, wide_type,
+ narrow_to_wide, loop,
+ depth + 1);
+ if (!wide_arg)
+ {
+ /* Fall back to widening the narrow arg on this edge. */
+ wide_arg = make_ssa_name (wide_type);
+ gassign *cast_stmt
+ = gimple_build_assign (wide_arg, NOP_EXPR, arg);
+ gsi_insert_on_edge (e, cast_stmt);
+ }
+ add_phi_arg (new_phi, wide_arg, e, UNKNOWN_LOCATION);
+ }
+ return wide_phi_result;
+ }
+
+ return NULL_TREE;
+}
+
+/* Perform the widening transformation on HEADER_PHI.
+ Returns true on success. */
+
+static bool
+widen_accumulator (gphi *header_phi, class loop *loop)
+{
+ tree phi_result = gimple_phi_result (header_phi);
+ tree narrow_type = TREE_TYPE (phi_result);
+ unsigned narrow_prec = TYPE_PRECISION (narrow_type);
+ /* Use unsigned to ensure the wide addition wraps on overflow,
+ matching the wrapping semantics of the original narrow-type
+ arithmetic. Using signed int would introduce UB that did not
+ exist in the original program. */
+ tree wide_type = unsigned_type_node;
+
+ /* Ensure we actually widen. */
+ if (TYPE_PRECISION (wide_type) <= narrow_prec)
+ return false;
+
+ tree init_arg, back_arg;
+ location_t init_loc, back_loc;
+ if (!get_phi_args (header_phi, loop, &init_arg, &back_arg,
+ &init_loc, &back_loc))
+ return false;
+
+ edge pre_edge = loop_preheader_edge (loop);
+ edge latch_edge = loop_latch_edge (loop);
+
+ /* 1. Create widened init on the preheader edge. */
+ tree wide_init = make_ssa_name (wide_type);
+ gassign *init_cast = gimple_build_assign (wide_init, NOP_EXPR, init_arg);
+ gsi_insert_on_edge (pre_edge, init_cast);
+
+ /* 2. Create the new wide PHI in the loop header.
+ Add the preheader arg now; the back-edge arg is added later. */
+ tree wide_phi_result = make_ssa_name (wide_type);
+ gphi *wide_phi = create_phi_node (wide_phi_result, loop->header);
+ add_phi_arg (wide_phi, wide_init, pre_edge, init_loc);
+
+ /* 3. Map old phi_result -> wide_phi_result. */
+ hash_map<tree, tree> narrow_to_wide;
+ narrow_to_wide.put (phi_result, wide_phi_result);
+
+ /* 4. Worklist-driven widening of the in-loop chain.
+ The original narrow statements are left in place; they become
+ dead once exit PHIs are patched below and are removed by DCE. */
+ auto_vec<tree> worklist;
+ worklist.safe_push (phi_result);
+
+ while (!worklist.is_empty ())
+ {
+ tree old_name = worklist.pop ();
+ tree *wide_name_p = narrow_to_wide.get (old_name);
+ if (!wide_name_p)
+ continue;
+ tree wide_name = *wide_name_p;
+
+ imm_use_iterator iter;
+ use_operand_p use_p;
+ /* Collect uses first to avoid issues with iteration.
+ Use a hash set to deduplicate -- a statement may appear
+ more than once if it uses old_name in multiple operands. */
+ hash_set<gimple *> seen_stmts;
+ auto_vec<gimple *> use_stmts;
+ FOR_EACH_IMM_USE_FAST (use_p, iter, old_name)
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+ if (is_gimple_debug (use_stmt))
+ continue;
+ basic_block bb = gimple_bb (use_stmt);
+ if (!bb || !flow_bb_inside_loop_p (loop, bb))
+ continue;
+ if (!seen_stmts.add (use_stmt))
+ use_stmts.safe_push (use_stmt);
+ }
+
+ unsigned i;
+ gimple *use_stmt;
+ FOR_EACH_VEC_ELT (use_stmts, i, use_stmt)
+ {
+ /* Skip merge PHIs -- handled by resolve_wide_back_arg. */
+ if (gimple_code (use_stmt) == GIMPLE_PHI)
+ continue;
+
+ if (!is_gimple_assign (use_stmt))
+ continue;
+
+ enum tree_code code = gimple_assign_rhs_code (use_stmt);
+ tree lhs = gimple_assign_lhs (use_stmt);
+
+ if (CONVERT_EXPR_CODE_P (code))
+ {
+ /* Widening or same-width cast: map lhs -> wide_name.
+ This absorbs same-width sign-changes (e.g. short <->
+ unsigned short) by mapping them to the same wide SSA. */
+ if (is_widening_or_nop_cast_p (use_stmt, narrow_prec))
+ {
+ if (!narrow_to_wide.get (lhs))
+ {
+ narrow_to_wide.put (lhs, wide_name);
+ worklist.safe_push (lhs);
+ }
+ continue;
+ }
+ /* Any other cast (truly narrowing from wider type, etc.)
+ -- skip; the value won't be widened. */
+ continue;
+ }
+
+ if (code == PLUS_EXPR || code == MINUS_EXPR)
+ {
+ if (narrow_to_wide.get (lhs))
+ continue; /* Already widened. */
+
+ tree op0 = gimple_assign_rhs1 (use_stmt);
+ tree op1 = gimple_assign_rhs2 (use_stmt);
+
+ gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+
+ tree w0 = get_or_widen (op0, wide_type, narrow_to_wide, gsi);
+ tree w1 = get_or_widen (op1, wide_type, narrow_to_wide, gsi);
+
+ tree wide_lhs = make_ssa_name (wide_type);
+ gassign *wide_stmt
+ = gimple_build_assign (wide_lhs, code, w0, w1);
+ gsi_insert_after (&gsi, wide_stmt, GSI_NEW_STMT);
+
+ narrow_to_wide.put (lhs, wide_lhs);
+ worklist.safe_push (lhs);
+ continue;
+ }
+ }
+ }
+
+ /* 5. Wire the back-edge argument. */
+ tree wide_back = resolve_wide_back_arg (back_arg, wide_type,
+ narrow_to_wide, loop);
+ /* Analysis guarantees the back-edge chain reaches the header PHI
+ through additive ops, casts, and merge-PHIs -- all of which
+ resolve_wide_back_arg handles. A NULL here means a bug. */
+ gcc_assert (wide_back);
+ add_phi_arg (wide_phi, wide_back, latch_edge, back_loc);
+
+ /* Ensure back_arg is in the map so exit PHI replacement finds it.
+ resolve_wide_back_arg maps intermediate nodes (merge-PHIs, cast
+ sources) but not back_arg itself. */
+ if (!narrow_to_wide.get (back_arg))
+ narrow_to_wide.put (back_arg, wide_back);
+
+ /* 6. Insert narrowing casts at loop exits.
+ For each exit edge, scan its destination's PHIs for arguments
+ that reference any name in the widened accumulator chain. For
+ each match, insert a narrowing cast of the corresponding wide
+ value on that edge. This handles both the back-edge value and
+ intermediate accumulator values at early exits. */
+ auto_vec<edge> exits = get_loop_exit_edges (loop);
+ unsigned j;
+ edge ex;
+ FOR_EACH_VEC_ELT (exits, j, ex)
+ {
+ if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
+ continue;
+
+ for (gphi_iterator psi = gsi_start_phis (ex->dest);
+ !gsi_end_p (psi); gsi_next (&psi))
+ {
+ gphi *exit_phi = psi.phi ();
+ for (unsigned k = 0; k < gimple_phi_num_args (exit_phi); k++)
+ {
+ if (gimple_phi_arg_edge (exit_phi, k) != ex)
+ continue;
+ tree arg = gimple_phi_arg_def (exit_phi, k);
+ tree arg_type = TREE_TYPE (arg);
+ /* Only replace args that carry a narrow-type value.
+ The narrow_to_wide map may also contain int-typed
+ intermediates (from widening casts in the accumulator
+ chain); replacing those with a narrow_type cast would
+ create a type mismatch in the exit PHI. */
+ if (INTEGRAL_TYPE_P (arg_type)
+ && TYPE_PRECISION (arg_type) > narrow_prec)
+ continue;
+ tree *wide_p = narrow_to_wide.get (arg);
+ if (!wide_p)
+ continue;
+
+ tree narrow_exit = make_ssa_name (narrow_type);
+ gassign *exit_cast
+ = gimple_build_assign (narrow_exit, NOP_EXPR, *wide_p);
+ gsi_insert_on_edge (ex, exit_cast);
+ SET_PHI_ARG_DEF (exit_phi, k, narrow_exit);
+ }
+ }
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Accumulator widened successfully: ");
+ print_generic_expr (dump_file, phi_result, TDF_SLIM);
+ fprintf (dump_file, " in loop %d\n", loop->num);
+ }
+
+ return true;
+}
+
+namespace {
+
+const pass_data pass_data_widen_accumulator =
+{
+ GIMPLE_PASS, /* type */
+ "widen_accum", /* name */
+ OPTGROUP_LOOP, /* optinfo_flags */
+ TV_TREE_WIDEN_ACCUM, /* tv_id */
+ ( PROP_cfg | PROP_ssa ), /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_cleanup_cfg, /* todo_flags_finish */
+};
+
+class pass_widen_accumulator : public gimple_opt_pass
+{
+public:
+ pass_widen_accumulator (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_widen_accumulator, ctxt) {}
+
+ bool gate (function *) final override
+ {
+ return flag_tree_widen_accum && !flag_trapv;
+ }
+
+ unsigned int execute (function *) final override;
+};
+
+unsigned int
+pass_widen_accumulator::execute (function *fun)
+{
+ bool changed = false;
+
+ loop_optimizer_init (LOOPS_NORMAL);
+
+ if (number_of_loops (fun) <= 1)
+ {
+ loop_optimizer_finalize ();
+ return 0;
+ }
+
+ for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
+ {
+ for (gphi_iterator gsi = gsi_start_phis (loop->header);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gphi *phi = gsi.phi ();
+ tree result = gimple_phi_result (phi);
+
+ if (virtual_operand_p (result))
+ continue;
+
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (result)))
+ continue;
+
+ if (TYPE_PRECISION (TREE_TYPE (result))
+ >= TYPE_PRECISION (integer_type_node))
+ continue;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "\nExamining narrow PHI (prec=%u) in loop %d:\n",
+ TYPE_PRECISION (TREE_TYPE (result)), loop->num);
+ print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
+ }
+
+ if (analyze_candidate (phi, loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "\nCandidate accumulator PHI in loop %d:\n",
+ loop->num);
+ print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
+ }
+ if (widen_accumulator (phi, loop))
+ changed = true;
+ }
+ }
+ }
+
+ if (changed)
+ {
+ gsi_commit_edge_inserts ();
+ /* Rewrite into loop-closed SSA so that subsequent passes that
+ expect LCSSA form (e.g. pass_optimize_widening_mul) see
+ correct PHI nodes at loop exits for the new wide names. */
+ rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
+ }
+
+ loop_optimizer_finalize ();
+
+ return 0;
+}
+
+} // anonymous namespace
+
+gimple_opt_pass *
+make_pass_widen_accumulator (gcc::context *ctxt)
+{
+ return new pass_widen_accumulator (ctxt);
+}
--
2.34.1