Add a new SSA pass (pass_widen_accum) that widens narrow integer
loop accumulators (e.g. short, char) to int-width, eliminating
per-iteration sign-/zero-extension truncations.

The pass is gated on -ftree-widen-accum, enabled at -O2 and above.

gcc/ChangeLog:

        * common.opt (ftree-widen-accum): New flag.
        * opts.cc (default_options_table): Enable at -O2+.
        * params.opt (max-widen-accum-chain-depth): New param.
        * tree-ssa-loop-widen-accum.cc: New file.
        * Makefile.in (OBJS): Add tree-ssa-loop-widen-accum.o.
        * passes.def (pass_widen_accumulator): Schedule after phiopt,
        before widening_mul which needs LCSSA.
        * timevar.def (TV_TREE_WIDEN_ACCUM): New timevar.
        * tree-pass.h (make_pass_widen_accumulator): Declare.
        * doc/invoke.texi (-ftree-widen-accum): Document new flag.

gcc/testsuite/ChangeLog:

        * gcc.dg/tree-ssa/widen-accum-1.c: New test.
        * gcc.dg/tree-ssa/widen-accum-2.c: New test.
        * gcc.dg/tree-ssa/widen-accum-3.c: New test.
        * gcc.dg/tree-ssa/widen-accum-4.c: New test.
        * gcc.dg/tree-ssa/widen-accum-5.c: New test.
        * gcc.dg/tree-ssa/widen-accum-6.c: New test.
        * gcc.dg/tree-ssa/widen-accum-7.c: New test.
        * gcc.dg/tree-ssa/widen-accum-8.c: New test.
        * gcc.target/riscv/widen-accum-1.c: New test.
        * gcc.target/aarch64/widen-accum-1.c: New test.
---
 gcc/Makefile.in                               |   1 +
 gcc/common.opt                                |   4 +
 gcc/doc/invoke.texi                           |  14 +-
 gcc/opts.cc                                   |   1 +
 gcc/params.opt                                |   4 +
 gcc/passes.def                                |   1 +
 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-1.c |  20 +
 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-2.c |  19 +
 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-3.c |  26 +
 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-4.c |  20 +
 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-5.c |  33 +
 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-6.c |  15 +
 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-7.c |  15 +
 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-8.c |  15 +
 .../gcc.target/aarch64/widen-accum-1.c        |  15 +
 .../gcc.target/riscv/widen-accum-1.c          |  15 +
 gcc/timevar.def                               |   1 +
 gcc/tree-pass.h                               |   1 +
 gcc/tree-ssa-loop-widen-accum.cc              | 713 ++++++++++++++++++
 19 files changed, 932 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-4.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-5.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-6.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-7.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/widen-accum-8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/widen-accum-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/widen-accum-1.c
 create mode 100644 gcc/tree-ssa-loop-widen-accum.cc

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 9e8da255186a..c94a8a0a5940 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1799,6 +1799,7 @@ OBJS = \
        tree-ssa-loop-prefetch.o \
        tree-ssa-loop-split.o \
        tree-ssa-loop-unswitch.o \
+       tree-ssa-loop-widen-accum.o \
        tree-ssa-loop.o \
        tree-ssa-math-opts.o \
        tree-ssa-operands.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index 88b79bbf8f56..bd3e2707ab4c 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3380,6 +3380,10 @@ ftree-vrp
 Common Var(flag_tree_vrp) Init(0) Optimization
 Perform Value Range Propagation on trees.
 
+ftree-widen-accum
+Common Var(flag_tree_widen_accum) Optimization
+Widen narrow-type loop accumulators to avoid per-iteration truncations.
+
 fsplit-paths
 Common Var(flag_split_paths) Init(0) Optimization
 Split paths leading to loop backedges.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index fe20ae66c00b..d38333b59c4d 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -688,7 +688,7 @@ Objective-C and Objective-C++ Dialects}.
 -ftree-parallelize-loops[=@var{n}]  -ftree-pre  -ftree-partial-pre  -ftree-pta
 -ftree-reassoc  -ftree-scev-cprop  -ftree-sink  -ftree-slsr  -ftree-sra
 -ftree-switch-conversion  -ftree-tail-merge
--ftree-ter  -ftree-vectorize  -ftree-vrp  -ftrivial-auto-var-init
+-ftree-ter  -ftree-vectorize  -ftree-vrp  -ftree-widen-accum  
-ftrivial-auto-var-init
 -funconstrained-commons  -funit-at-a-time  -funroll-all-loops
 -funroll-loops  -funsafe-math-optimizations  -funswitch-loops
 -fipa-ra  -fvariable-expansion-in-unroller  -fvect-cost-model  -fvpt
@@ -13463,6 +13463,7 @@ also turns on the following optimization flags:
 -ftree-slp-vectorize
 -ftree-switch-conversion  -ftree-tail-merge
 -ftree-vrp
+-ftree-widen-accum
 -fvect-cost-model=very-cheap}
 
 Please note the warning under @option{-fgcse} about
@@ -15322,6 +15323,17 @@ enabled by default at @option{-O2} and higher.  Null 
pointer check
 elimination is only done if @option{-fdelete-null-pointer-checks} is
 enabled.
 
+@opindex ftree-widen-accum
+@opindex fno-tree-widen-accum
+@item -ftree-widen-accum
+Widen narrow-type loop accumulators (e.g.@: @code{short}, @code{char})
+to @code{int} width, eliminating per-iteration sign- or zero-extension
+truncations.  Since two's-complement addition is associative modulo
+@math{2^N}, the truncation can be deferred to loop exits without
+changing the observable result.  This is enabled by default at
+@option{-O2} and higher and is not applied when @option{-ftrapv} is
+in effect.
+
 @opindex fsplit-paths
 @opindex fno-split-paths
 @item -fsplit-paths
diff --git a/gcc/opts.cc b/gcc/opts.cc
index 6658b6acd378..2faab06879cd 100644
--- a/gcc/opts.cc
+++ b/gcc/opts.cc
@@ -673,6 +673,7 @@ static const struct default_options default_options_table[] 
=
     { OPT_LEVELS_2_PLUS, OPT_ftree_switch_conversion, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_ftree_vrp, NULL, 1 },
+    { OPT_LEVELS_2_PLUS, OPT_ftree_widen_accum, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL,
       VECT_COST_MODEL_VERY_CHEAP },
     { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
diff --git a/gcc/params.opt b/gcc/params.opt
index 4420189e9822..305ed70a1256 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -846,6 +846,10 @@ Maximum size of loc list for which reverse ops should be 
added.
 Common Joined UInteger Var(param_max_vartrack_size) Init(50000000) Param 
Optimization
 Maximum size of var tracking hash tables.
 
+-param=max-widen-accum-chain-depth=
+Common Joined UInteger Var(param_max_widen_accum_chain_depth) Init(50) 
IntegerRange(1, 200) Param Optimization
+Maximum recursion depth when analyzing or transforming accumulator chains in 
the widen_accum pass.
+
 -param=max-find-base-term-values=
 Common Joined UInteger Var(param_max_find_base_term_values) Init(200) Param 
Optimization
 Maximum number of VALUEs handled during a single find_base_term call.
diff --git a/gcc/passes.def b/gcc/passes.def
index cdddb87302f6..3469dfd2e512 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -371,6 +371,7 @@ along with GCC; see the file COPYING3.  If not see
       NEXT_PASS (pass_forwprop, /*full_walk=*/false, /*last=*/true);
       NEXT_PASS (pass_sink_code, true /* unsplit edges */);
       NEXT_PASS (pass_phiopt, false /* early_p */);
+      NEXT_PASS (pass_widen_accumulator); /* Before widening_mul which needs 
LCSSA.  */
       NEXT_PASS (pass_optimize_widening_mul);
       NEXT_PASS (pass_store_merging);
       /* If DCE is not run before checking for uninitialized uses,
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-1.c
new file mode 100644
index 000000000000..bfadad0201fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+typedef short ee_s16;
+
+ee_s16 __attribute__((noipa))
+test_widen (int N, int *A, int val)
+{
+  ee_s16 ret = 0;
+  for (int i = 0; i < N; i++)
+    {
+      if (A[i] > val)
+        ret += 10;
+      else
+        ret += (A[i] > 0) ? 1 : 0;
+    }
+  return ret;
+}
+
+/* { dg-final { scan-tree-dump "Accumulator widened successfully" 
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-2.c
new file mode 100644
index 000000000000..5b3cecbec078
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-2.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+typedef short ee_s16;
+
+ee_s16 __attribute__((noipa))
+test_no_widen (int N, int *A, ee_s16 limit)
+{
+  ee_s16 ret = 0;
+  for (int i = 0; i < N; i++)
+    {
+      ret += A[i];
+      if (ret > limit)  /* comparison of narrow accumulator -- blocks widening 
*/
+        ret = 0;
+    }
+  return ret;
+}
+
+/* { dg-final { scan-tree-dump-not "Accumulator widened successfully" 
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-3.c
new file mode 100644
index 000000000000..24bbaa7f11cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-3.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+typedef short ee_s16;
+
+/* Multi-exit loop: the early return carries an intermediate
+   accumulator value, not the back-edge value.  The pass should
+   still widen successfully.  */
+
+ee_s16 __attribute__((noipa))
+test_multi_exit (int N, int *A, int val, int sentinel)
+{
+  ee_s16 ret = 0;
+  for (int i = 0; i < N; i++)
+    {
+      if (A[i] == sentinel)
+        return ret;        /* early exit with current accumulator value */
+      if (A[i] > val)
+        ret += 10;
+      else
+        ret += (A[i] > 0) ? 1 : 0;
+    }
+  return ret;
+}
+
+/* { dg-final { scan-tree-dump "Accumulator widened successfully" 
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-4.c
new file mode 100644
index 000000000000..48f1d739b5b8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-4.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+typedef short ee_s16;
+
+/* Accumulator with int-typed addend: GIMPLE keeps the explicit
+   widen/compute/truncate chain (_1 = (int)ret; _2 = _1 + A[i];
+   ret = (short)_2) because A[i] is int.  The pass must look
+   through the int->short truncation in resolve_wide_back_arg.  */
+
+ee_s16 __attribute__((noipa))
+test_int_addend (int N, int *A)
+{
+  ee_s16 ret = 0;
+  for (int i = 0; i < N; i++)
+    ret += A[i];
+  return ret;
+}
+
+/* { dg-final { scan-tree-dump "Accumulator widened successfully" 
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-5.c 
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-5.c
new file mode 100644
index 000000000000..b22c6732d4c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-5.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+/* Verify that widening preserves wraparound semantics.  */
+
+short __attribute__((noipa))
+sum_short (int N, int *A)
+{
+  short ret = 0;
+  for (int i = 0; i < N; i++)
+    ret += A[i];
+  return ret;
+}
+
+int
+main (void)
+{
+  int A[100];
+  for (int i = 0; i < 100; i++)
+    A[i] = 400;
+
+  /* 100 * 400 = 40000 which wraps past short (max 32767).
+     Compute expected result via int, then truncate.  */
+  int ref = 0;
+  for (int i = 0; i < 100; i++)
+    ref += 400;
+  short expected = (short) ref;
+
+  if (sum_short (100, A) != expected)
+    __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-6.c
new file mode 100644
index 000000000000..ce1b527c3062
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-6.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+/* signed char accumulator.  */
+
+signed char __attribute__((noipa))
+sum_char (int N, int *A)
+{
+  signed char ret = 0;
+  for (int i = 0; i < N; i++)
+    ret += A[i];
+  return ret;
+}
+
+/* { dg-final { scan-tree-dump "Accumulator widened successfully" 
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-7.c 
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-7.c
new file mode 100644
index 000000000000..292f0812f178
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-7.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+/* unsigned short accumulator.  */
+
+unsigned short __attribute__((noipa))
+sum_ushort (int N, int *A)
+{
+  unsigned short ret = 0;
+  for (int i = 0; i < N; i++)
+    ret += A[i];
+  return ret;
+}
+
+/* { dg-final { scan-tree-dump "Accumulator widened successfully" 
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-8.c 
b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-8.c
new file mode 100644
index 000000000000..8c4e5fc2f9b8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/widen-accum-8.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-widen_accum-details" } */
+
+/* MINUS_EXPR accumulator.  */
+
+short __attribute__((noipa))
+sub_short (int N, int *A)
+{
+  short ret = 0;
+  for (int i = 0; i < N; i++)
+    ret -= A[i];
+  return ret;
+}
+
+/* { dg-final { scan-tree-dump "Accumulator widened successfully" 
"widen_accum" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/widen-accum-1.c 
b/gcc/testsuite/gcc.target/aarch64/widen-accum-1.c
new file mode 100644
index 000000000000..b5c3841fb832
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/widen-accum-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Os" "-Oz" "-Og" } } */
+
+short __attribute__((noipa))
+sum_short (int N, int *A)
+{
+  short ret = 0;
+  for (int i = 0; i < N; i++)
+    ret += A[i];
+  return ret;
+}
+
+/* After widening, the loop should not need sign-extension.  */
+/* { dg-final { scan-assembler-not "sxth" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/widen-accum-1.c 
b/gcc/testsuite/gcc.target/riscv/widen-accum-1.c
new file mode 100644
index 000000000000..66693abecab7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/widen-accum-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc -mabi=lp64d" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Os" "-Oz" "-Og" } } */
+
+short __attribute__((noipa))
+sum_short (int N, int *A)
+{
+  short ret = 0;
+  for (int i = 0; i < N; i++)
+    ret += A[i];
+  return ret;
+}
+
+/* After widening, the loop should not need sign-extension.  */
+/* { dg-final { scan-assembler-not "sext\\.h" } } */
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 3824caa01bc2..78db42f94e8b 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -227,6 +227,7 @@ DEFTIMEVAR (TV_TREE_SWITCH_LOWERING,   "tree switch 
lowering")
 DEFTIMEVAR (TV_TREE_RECIP            , "gimple CSE reciprocals")
 DEFTIMEVAR (TV_TREE_SINCOS           , "gimple CSE sin/cos")
 DEFTIMEVAR (TV_TREE_POW              , "gimple expand pow")
+DEFTIMEVAR (TV_TREE_WIDEN_ACCUM      , "gimple widen accumulator")
 DEFTIMEVAR (TV_TREE_WIDEN_MUL        , "gimple widening/fma detection")
 DEFTIMEVAR (TV_TRANS_MEM             , "transactional memory")
 DEFTIMEVAR (TV_TREE_STRLEN           , "tree strlen optimization")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index b3c97658a8fe..96fc35b2a76c 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -456,6 +456,7 @@ extern gimple_opt_pass *make_pass_cse_sincos (gcc::context 
*ctxt);
 extern gimple_opt_pass *make_pass_expand_pow (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_store_merging (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_widen_accumulator (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_warn_function_return (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_warn_function_noreturn (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-loop-widen-accum.cc b/gcc/tree-ssa-loop-widen-accum.cc
new file mode 100644
index 000000000000..4f41ed1bedaa
--- /dev/null
+++ b/gcc/tree-ssa-loop-widen-accum.cc
@@ -0,0 +1,713 @@
+/* Widen narrow-type loop accumulators to int.
+   Copyright (C) 2026 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Narrow-type loop accumulators (e.g. short) are truncated every
+   iteration, producing redundant sign/zero-extensions on targets such
+   as RISC-V.  Since two's-complement addition is associative mod 2^N,
+   the truncation can be deferred to loop exits.
+
+   This pass finds header PHIs whose type is narrower than int and
+   whose in-loop uses are limited to additive operations and
+   same-width casts.  It creates a widened (int-typed) copy of the
+   accumulator chain and inserts a single narrowing cast at each loop
+   exit.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "gimple-pretty-print.h"
+#include "fold-const.h"
+#include "gimple-iterator.h"
+#include "tree-cfg.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop.h"
+#include "cfgloop.h"
+#include "tree-dfa.h"
+#include "tree-ssa.h"
+#include "tree-phinodes.h"
+#include "tree-into-ssa.h"
+
+/* Return true if CODE is an additive operation or a type conversion
+   -- the set of operations that the accumulator chain is allowed to
+   contain.  Shared between verify_chain_to_phi_p (backward walk) and
+   validate_uses_in_loop_p (forward walk) to keep them in sync.  */
+
+static inline bool
+is_additive_or_cast_p (enum tree_code code)
+{
+  return CONVERT_EXPR_CODE_P (code)
+        || code == PLUS_EXPR
+        || code == MINUS_EXPR;
+}
+
+/* Return true if STMT is a CONVERT_EXPR/NOP_EXPR whose input has
+   precision <= NARROW_PREC and whose output has precision >= input.
+   This matches widening casts and same-width sign-changes.  */
+
+static bool
+is_widening_or_nop_cast_p (gimple *stmt, unsigned narrow_prec)
+{
+  if (!is_gimple_assign (stmt))
+    return false;
+  if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
+    return false;
+  tree rhs = gimple_assign_rhs1 (stmt);
+  tree lhs = gimple_assign_lhs (stmt);
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+      || !INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
+    return false;
+  return (TYPE_PRECISION (TREE_TYPE (rhs)) <= narrow_prec
+         && TYPE_PRECISION (TREE_TYPE (lhs))
+            >= TYPE_PRECISION (TREE_TYPE (rhs)));
+}
+
+/* Walk backward from NAME through CONVERT_EXPR / PLUS_EXPR / MINUS_EXPR
+   and merge-PHIs, verifying the chain reaches PHI_RESULT within LOOP.
+   UNDO collects nodes added to VISITED so that a failed speculative
+   walk (trying op0 before op1) can be rolled back cheaply.  */
+
+static bool
+verify_chain_to_phi_p (tree name, tree phi_result, class loop *loop,
+                      hash_set<tree> &visited,
+                      auto_vec<tree> &undo, int depth = 0)
+{
+  /* Iteratively follow CONVERT_EXPR chains to avoid recursion on
+     the common linear-cast case; recurse for PHIs and PLUS/MINUS.
+     The depth parameter bounds total recursion across all frames.  */
+  for (;;)
+    {
+      if (depth > param_max_widen_accum_chain_depth)
+       return false;
+
+      if (name == phi_result)
+       return true;
+
+      if (TREE_CODE (name) != SSA_NAME)
+       return false;
+
+      /* Already verified on a convergent path -- ok.  In-loop merge
+        PHIs cannot form cycles without going through the header
+        PHI, so a previously visited node is safe.  */
+      if (visited.add (name))
+       return true;
+      undo.safe_push (name);
+
+      gimple *def = SSA_NAME_DEF_STMT (name);
+      if (!def)
+       return false;
+
+      basic_block bb = gimple_bb (def);
+      if (!bb || !flow_bb_inside_loop_p (loop, bb))
+       return false;
+
+      /* Merge-PHI inside the loop (not header): each in-loop
+        argument must trace back to phi_result.  */
+      if (gimple_code (def) == GIMPLE_PHI)
+       {
+         if (bb == loop->header)
+           return false;
+         gphi *phi = as_a<gphi *> (def);
+         for (unsigned i = 0; i < gimple_phi_num_args (phi); i++)
+           {
+             edge e = gimple_phi_arg_edge (phi, i);
+             if (!flow_bb_inside_loop_p (loop, e->src))
+               continue;
+             tree arg = gimple_phi_arg_def (phi, i);
+             if (!verify_chain_to_phi_p (arg, phi_result, loop, visited,
+                                         undo, depth + 1))
+               return false;
+           }
+         return true;
+       }
+
+      if (!is_gimple_assign (def))
+       return false;
+
+      enum tree_code code = gimple_assign_rhs_code (def);
+      if (!is_additive_or_cast_p (code))
+       return false;
+
+      if (CONVERT_EXPR_CODE_P (code))
+       {
+         name = gimple_assign_rhs1 (def);
+         depth++;
+         continue;
+       }
+
+      /* PLUS_EXPR / MINUS_EXPR: one operand is the accumulator chain,
+        the other is a non-accumulator addend.  Try op0 first; on
+        failure, roll back any nodes it added to VISITED before
+        trying op1 -- a polluted set could cause a false-positive
+        convergent-path hit.  */
+      tree op0 = gimple_assign_rhs1 (def);
+      tree op1 = gimple_assign_rhs2 (def);
+      unsigned mark = undo.length ();
+      if (verify_chain_to_phi_p (op0, phi_result, loop, visited,
+                                undo, depth + 1))
+       return true;
+      /* Roll back nodes added by the failed op0 walk.  */
+      while (undo.length () > mark)
+       visited.remove (undo.pop ());
+      return verify_chain_to_phi_p (op1, phi_result, loop, visited,
+                                   undo, depth + 1);
+    }
+}
+
+/* Validate that all in-loop uses of NAME are safe for widening:
+   PHIs, casts, or additive ops.  Out-of-loop uses are fine since
+   they will get the exit-narrowed value.  */
+
+static bool
+validate_uses_in_loop_p (tree name, class loop *loop,
+                        hash_set<tree> &visited, int depth = 0)
+{
+  if (depth > param_max_widen_accum_chain_depth)
+    return false;
+
+  if (visited.add (name))
+    return true;
+
+  imm_use_iterator iter;
+  use_operand_p use_p;
+  FOR_EACH_IMM_USE_FAST (use_p, iter, name)
+    {
+      gimple *use_stmt = USE_STMT (use_p);
+
+      if (is_gimple_debug (use_stmt))
+       continue;
+
+      basic_block bb = gimple_bb (use_stmt);
+      if (!bb || !flow_bb_inside_loop_p (loop, bb))
+       continue;  /* Out-of-loop use -- ok.  */
+
+      if (gimple_code (use_stmt) == GIMPLE_PHI)
+       {
+         /* For in-loop merge PHIs, recurse on the result.  */
+         if (bb != loop->header)
+           {
+             tree phi_res = gimple_phi_result (use_stmt);
+             if (!validate_uses_in_loop_p (phi_res, loop, visited,
+                                           depth + 1))
+               return false;
+           }
+         continue;
+       }
+
+      if (!is_gimple_assign (use_stmt))
+       return false;
+
+      enum tree_code code = gimple_assign_rhs_code (use_stmt);
+      if (!is_additive_or_cast_p (code))
+       return false;
+
+      tree lhs = gimple_assign_lhs (use_stmt);
+      if (!validate_uses_in_loop_p (lhs, loop, visited, depth + 1))
+       return false;
+    }
+  return true;
+}
+
+/* Extract the preheader (init) and latch (back-edge) arguments from
+   a two-argument header PHI.  Returns true on success.  */
+
+static bool
+get_phi_args (gphi *header_phi, class loop *loop,
+             tree *init_arg_out, tree *back_arg_out,
+             location_t *init_loc_out = NULL,
+             location_t *back_loc_out = NULL)
+{
+  if (gimple_phi_num_args (header_phi) != 2)
+    return false;
+
+  edge pre_edge = loop_preheader_edge (loop);
+  edge latch_edge = loop_latch_edge (loop);
+
+  *init_arg_out = NULL_TREE;
+  *back_arg_out = NULL_TREE;
+
+  for (unsigned i = 0; i < 2; i++)
+    {
+      edge e = gimple_phi_arg_edge (header_phi, i);
+      if (e == pre_edge)
+       {
+         *init_arg_out = gimple_phi_arg_def (header_phi, i);
+         if (init_loc_out)
+           *init_loc_out = gimple_phi_arg_location (header_phi, i);
+       }
+      else if (e == latch_edge)
+       {
+         *back_arg_out = gimple_phi_arg_def (header_phi, i);
+         if (back_loc_out)
+           *back_loc_out = gimple_phi_arg_location (header_phi, i);
+       }
+    }
+
+  return *init_arg_out && *back_arg_out;
+}
+
+/* Top-level analysis for a candidate header PHI.
+   Returns true if the PHI is a narrow accumulator that can be
+   widened.  The caller has already verified the PHI result is a
+   narrow integral type.  */
+
+static bool
+analyze_candidate (gphi *header_phi, class loop *loop)
+{
+  tree phi_result = gimple_phi_result (header_phi);
+
+  tree init_arg, back_arg;
+  if (!get_phi_args (header_phi, loop, &init_arg, &back_arg))
+    return false;
+
+  /* Verify the back-edge argument chains back to the phi_result
+     through additive ops, casts, and merge-PHIs.  */
+  hash_set<tree> chain_visited;
+  auto_vec<tree> undo;
+  if (!verify_chain_to_phi_p (back_arg, phi_result, loop, chain_visited,
+                             undo))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+       fprintf (dump_file, "  reject: back-edge chain does not reach PHI\n");
+      return false;
+    }
+
+  /* Validate that all in-loop uses of the phi_result are safe.  */
+  hash_set<tree> use_visited;
+  if (!validate_uses_in_loop_p (phi_result, loop, use_visited))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+       fprintf (dump_file, "  reject: unsafe in-loop uses\n");
+      return false;
+    }
+
+  return true;
+}
+
+/* If OPERAND already has a wide mapping in NARROW_TO_WIDE, return it.
+   If OPERAND is an INTEGER_CST, return fold_convert to WIDE_TYPE.
+   Otherwise, insert a widening NOP_EXPR before GSI and return the
+   new wide SSA name.  */
+
+static tree
+get_or_widen (tree operand, tree wide_type,
+             hash_map<tree, tree> &narrow_to_wide,
+             gimple_stmt_iterator &gsi)
+{
+  tree *mapped = narrow_to_wide.get (operand);
+  if (mapped)
+    return *mapped;
+
+  if (TREE_CODE (operand) == INTEGER_CST)
+    return fold_convert (wide_type, operand);
+
+  /* Insert a widening cast.  */
+  tree wide_name = make_ssa_name (wide_type);
+  gassign *widen_stmt = gimple_build_assign (wide_name, NOP_EXPR, operand);
+  gsi_insert_before (&gsi, widen_stmt, GSI_SAME_STMT);
+  return wide_name;
+}
+
+/* Recursively resolve the wide-type version of the back-edge PHI
+   argument.  Handles:
+   - Names already in narrow_to_wide (direct lookup)
+   - Type conversions of any width (look through to source)
+   - Merge-PHIs inside the loop (create wide merge-PHIs)  */
+
+static tree
+resolve_wide_back_arg (tree back_arg, tree wide_type,
+                      hash_map<tree, tree> &narrow_to_wide,
+                      class loop *loop, int depth = 0)
+{
+  if (depth > param_max_widen_accum_chain_depth)
+    return NULL_TREE;
+
+  if (TREE_CODE (back_arg) != SSA_NAME)
+    return NULL_TREE;
+
+  tree *mapped = narrow_to_wide.get (back_arg);
+  if (mapped)
+    return *mapped;
+
+  gimple *def = SSA_NAME_DEF_STMT (back_arg);
+  if (!def)
+    return NULL_TREE;
+
+  /* Any type conversion (same-width sign-change, narrowing truncation,
+     or widening cast): look through to the source operand.  This
+     handles the int->short truncation that the worklist skips, as well
+     as same-width casts like signed short <-> unsigned short.  The
+     recursion will eventually reach a name in narrow_to_wide.  */
+  if (is_gimple_assign (def)
+      && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
+    {
+      tree src = gimple_assign_rhs1 (def);
+      return resolve_wide_back_arg (src, wide_type,
+                                   narrow_to_wide, loop, depth + 1);
+    }
+
+  /* Merge-PHI inside the loop.  */
+  if (gimple_code (def) == GIMPLE_PHI)
+    {
+      gphi *merge_phi = as_a<gphi *> (def);
+      basic_block bb = gimple_bb (merge_phi);
+      if (!bb || !flow_bb_inside_loop_p (loop, bb) || bb == loop->header)
+       return NULL_TREE;
+
+      tree wide_phi_result = make_ssa_name (wide_type);
+      gphi *new_phi = create_phi_node (wide_phi_result, bb);
+
+      /* Map before recursion to break cycles.  */
+      narrow_to_wide.put (back_arg, wide_phi_result);
+
+      for (unsigned i = 0; i < gimple_phi_num_args (merge_phi); i++)
+       {
+         tree arg = gimple_phi_arg_def (merge_phi, i);
+         edge e = gimple_phi_arg_edge (merge_phi, i);
+         tree wide_arg = resolve_wide_back_arg (arg, wide_type,
+                                                narrow_to_wide, loop,
+                                                depth + 1);
+         if (!wide_arg)
+           {
+             /* Fall back to widening the narrow arg on this edge.  */
+             wide_arg = make_ssa_name (wide_type);
+             gassign *cast_stmt
+               = gimple_build_assign (wide_arg, NOP_EXPR, arg);
+             gsi_insert_on_edge (e, cast_stmt);
+           }
+         add_phi_arg (new_phi, wide_arg, e, UNKNOWN_LOCATION);
+       }
+      return wide_phi_result;
+    }
+
+  return NULL_TREE;
+}
+
+/* Perform the widening transformation on HEADER_PHI.
+   Returns true on success.  */
+
+static bool
+widen_accumulator (gphi *header_phi, class loop *loop)
+{
+  tree phi_result = gimple_phi_result (header_phi);
+  tree narrow_type = TREE_TYPE (phi_result);
+  unsigned narrow_prec = TYPE_PRECISION (narrow_type);
+  /* Use unsigned to ensure the wide addition wraps on overflow,
+     matching the wrapping semantics of the original narrow-type
+     arithmetic.  Using signed int would introduce UB that did not
+     exist in the original program.  */
+  tree wide_type = unsigned_type_node;
+
+  /* Ensure we actually widen.  */
+  if (TYPE_PRECISION (wide_type) <= narrow_prec)
+    return false;
+
+  tree init_arg, back_arg;
+  location_t init_loc, back_loc;
+  if (!get_phi_args (header_phi, loop, &init_arg, &back_arg,
+                    &init_loc, &back_loc))
+    return false;
+
+  edge pre_edge = loop_preheader_edge (loop);
+  edge latch_edge = loop_latch_edge (loop);
+
+  /* 1. Create widened init on the preheader edge.  */
+  tree wide_init = make_ssa_name (wide_type);
+  gassign *init_cast = gimple_build_assign (wide_init, NOP_EXPR, init_arg);
+  gsi_insert_on_edge (pre_edge, init_cast);
+
+  /* 2. Create the new wide PHI in the loop header.
+     Add the preheader arg now; the back-edge arg is added later.  */
+  tree wide_phi_result = make_ssa_name (wide_type);
+  gphi *wide_phi = create_phi_node (wide_phi_result, loop->header);
+  add_phi_arg (wide_phi, wide_init, pre_edge, init_loc);
+
+  /* 3. Map old phi_result -> wide_phi_result.  */
+  hash_map<tree, tree> narrow_to_wide;
+  narrow_to_wide.put (phi_result, wide_phi_result);
+
+  /* 4. Worklist-driven widening of the in-loop chain.
+     The original narrow statements are left in place; they become
+     dead once exit PHIs are patched below and are removed by DCE.  */
+  auto_vec<tree> worklist;
+  worklist.safe_push (phi_result);
+
+  while (!worklist.is_empty ())
+    {
+      tree old_name = worklist.pop ();
+      tree *wide_name_p = narrow_to_wide.get (old_name);
+      if (!wide_name_p)
+       continue;
+      tree wide_name = *wide_name_p;
+
+      imm_use_iterator iter;
+      use_operand_p use_p;
+      /* Collect uses first to avoid issues with iteration.
+        Use a hash set to deduplicate -- a statement may appear
+        more than once if it uses old_name in multiple operands.  */
+      hash_set<gimple *> seen_stmts;
+      auto_vec<gimple *> use_stmts;
+      FOR_EACH_IMM_USE_FAST (use_p, iter, old_name)
+       {
+         gimple *use_stmt = USE_STMT (use_p);
+         if (is_gimple_debug (use_stmt))
+           continue;
+         basic_block bb = gimple_bb (use_stmt);
+         if (!bb || !flow_bb_inside_loop_p (loop, bb))
+           continue;
+         if (!seen_stmts.add (use_stmt))
+           use_stmts.safe_push (use_stmt);
+       }
+
+      unsigned i;
+      gimple *use_stmt;
+      FOR_EACH_VEC_ELT (use_stmts, i, use_stmt)
+       {
+         /* Skip merge PHIs -- handled by resolve_wide_back_arg.  */
+         if (gimple_code (use_stmt) == GIMPLE_PHI)
+           continue;
+
+         if (!is_gimple_assign (use_stmt))
+           continue;
+
+         enum tree_code code = gimple_assign_rhs_code (use_stmt);
+         tree lhs = gimple_assign_lhs (use_stmt);
+
+         if (CONVERT_EXPR_CODE_P (code))
+           {
+             /* Widening or same-width cast: map lhs -> wide_name.
+                This absorbs same-width sign-changes (e.g. short <->
+                unsigned short) by mapping them to the same wide SSA.  */
+             if (is_widening_or_nop_cast_p (use_stmt, narrow_prec))
+               {
+                 if (!narrow_to_wide.get (lhs))
+                   {
+                     narrow_to_wide.put (lhs, wide_name);
+                     worklist.safe_push (lhs);
+                   }
+                 continue;
+               }
+             /* Any other cast (truly narrowing from wider type, etc.)
+                -- skip; the value won't be widened.  */
+             continue;
+           }
+
+         if (code == PLUS_EXPR || code == MINUS_EXPR)
+           {
+             if (narrow_to_wide.get (lhs))
+               continue;  /* Already widened.  */
+
+             tree op0 = gimple_assign_rhs1 (use_stmt);
+             tree op1 = gimple_assign_rhs2 (use_stmt);
+
+             gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+
+             tree w0 = get_or_widen (op0, wide_type, narrow_to_wide, gsi);
+             tree w1 = get_or_widen (op1, wide_type, narrow_to_wide, gsi);
+
+             tree wide_lhs = make_ssa_name (wide_type);
+             gassign *wide_stmt
+               = gimple_build_assign (wide_lhs, code, w0, w1);
+             gsi_insert_after (&gsi, wide_stmt, GSI_NEW_STMT);
+
+             narrow_to_wide.put (lhs, wide_lhs);
+             worklist.safe_push (lhs);
+             continue;
+           }
+       }
+    }
+
+  /* 5. Wire the back-edge argument.  */
+  tree wide_back = resolve_wide_back_arg (back_arg, wide_type,
+                                         narrow_to_wide, loop);
+  /* Analysis guarantees the back-edge chain reaches the header PHI
+     through additive ops, casts, and merge-PHIs -- all of which
+     resolve_wide_back_arg handles.  A NULL here means a bug.  */
+  gcc_assert (wide_back);
+  add_phi_arg (wide_phi, wide_back, latch_edge, back_loc);
+
+  /* Ensure back_arg is in the map so exit PHI replacement finds it.
+     resolve_wide_back_arg maps intermediate nodes (merge-PHIs, cast
+     sources) but not back_arg itself.  */
+  if (!narrow_to_wide.get (back_arg))
+    narrow_to_wide.put (back_arg, wide_back);
+
+  /* 6. Insert narrowing casts at loop exits.
+     For each exit edge, scan its destination's PHIs for arguments
+     that reference any name in the widened accumulator chain.  For
+     each match, insert a narrowing cast of the corresponding wide
+     value on that edge.  This handles both the back-edge value and
+     intermediate accumulator values at early exits.  */
+  auto_vec<edge> exits = get_loop_exit_edges (loop);
+  unsigned j;
+  edge ex;
+  FOR_EACH_VEC_ELT (exits, j, ex)
+    {
+      if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
+       continue;
+
+      for (gphi_iterator psi = gsi_start_phis (ex->dest);
+          !gsi_end_p (psi); gsi_next (&psi))
+       {
+         gphi *exit_phi = psi.phi ();
+         for (unsigned k = 0; k < gimple_phi_num_args (exit_phi); k++)
+           {
+             if (gimple_phi_arg_edge (exit_phi, k) != ex)
+               continue;
+             tree arg = gimple_phi_arg_def (exit_phi, k);
+             tree arg_type = TREE_TYPE (arg);
+             /* Only replace args that carry a narrow-type value.
+                The narrow_to_wide map may also contain int-typed
+                intermediates (from widening casts in the accumulator
+                chain); replacing those with a narrow_type cast would
+                create a type mismatch in the exit PHI.  */
+             if (INTEGRAL_TYPE_P (arg_type)
+                 && TYPE_PRECISION (arg_type) > narrow_prec)
+               continue;
+             tree *wide_p = narrow_to_wide.get (arg);
+             if (!wide_p)
+               continue;
+
+             tree narrow_exit = make_ssa_name (narrow_type);
+             gassign *exit_cast
+               = gimple_build_assign (narrow_exit, NOP_EXPR, *wide_p);
+             gsi_insert_on_edge (ex, exit_cast);
+             SET_PHI_ARG_DEF (exit_phi, k, narrow_exit);
+           }
+       }
+    }
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "Accumulator widened successfully: ");
+      print_generic_expr (dump_file, phi_result, TDF_SLIM);
+      fprintf (dump_file, " in loop %d\n", loop->num);
+    }
+
+  return true;
+}
+
+namespace {
+
+const pass_data pass_data_widen_accumulator =
+{
+  GIMPLE_PASS,             /* type */
+  "widen_accum",           /* name */
+  OPTGROUP_LOOP,           /* optinfo_flags */
+  TV_TREE_WIDEN_ACCUM,     /* tv_id */
+  ( PROP_cfg | PROP_ssa ),  /* properties_required */
+  0,                       /* properties_provided */
+  0,                       /* properties_destroyed */
+  0,                       /* todo_flags_start */
+  TODO_cleanup_cfg,        /* todo_flags_finish */
+};
+
+class pass_widen_accumulator : public gimple_opt_pass
+{
+public:
+  pass_widen_accumulator (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_widen_accumulator, ctxt) {}
+
+  bool gate (function *) final override
+  {
+    return flag_tree_widen_accum && !flag_trapv;
+  }
+
+  unsigned int execute (function *) final override;
+};
+
+unsigned int
+pass_widen_accumulator::execute (function *fun)
+{
+  bool changed = false;
+
+  loop_optimizer_init (LOOPS_NORMAL);
+
+  if (number_of_loops (fun) <= 1)
+    {
+      loop_optimizer_finalize ();
+      return 0;
+    }
+
+  for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
+    {
+      for (gphi_iterator gsi = gsi_start_phis (loop->header);
+          !gsi_end_p (gsi); gsi_next (&gsi))
+       {
+         gphi *phi = gsi.phi ();
+         tree result = gimple_phi_result (phi);
+
+         if (virtual_operand_p (result))
+           continue;
+
+         if (!INTEGRAL_TYPE_P (TREE_TYPE (result)))
+           continue;
+
+         if (TYPE_PRECISION (TREE_TYPE (result))
+             >= TYPE_PRECISION (integer_type_node))
+           continue;
+
+         if (dump_file && (dump_flags & TDF_DETAILS))
+           {
+             fprintf (dump_file,
+                      "\nExamining narrow PHI (prec=%u) in loop %d:\n",
+                      TYPE_PRECISION (TREE_TYPE (result)), loop->num);
+             print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
+           }
+
+         if (analyze_candidate (phi, loop))
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               {
+                 fprintf (dump_file,
+                          "\nCandidate accumulator PHI in loop %d:\n",
+                          loop->num);
+                 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
+               }
+             if (widen_accumulator (phi, loop))
+               changed = true;
+           }
+       }
+    }
+
+  if (changed)
+    {
+      gsi_commit_edge_inserts ();
+      /* Rewrite into loop-closed SSA so that subsequent passes that
+        expect LCSSA form (e.g. pass_optimize_widening_mul) see
+        correct PHI nodes at loop exits for the new wide names.  */
+      rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
+    }
+
+  loop_optimizer_finalize ();
+
+  return 0;
+}
+
+} // anonymous namespace
+
+gimple_opt_pass *
+make_pass_widen_accumulator (gcc::context *ctxt)
+{
+  return new pass_widen_accumulator (ctxt);
+}
-- 
2.34.1

Reply via email to