This is a proof of concept. This passes bootstrap and regtest but I don't
think this is a sensible code choice.

This adds a rematerialize loads pass that does the following
transformation:

FROM:
<bb 2>:
  _9 = *arr_4(D);

<bb 3>:
  # _10 = PHI <_1(7), _9(2)>
  # arr_11 = PHI <arr_7(7), arr_4(D)(2)>

  # Some uses of _10

<bb 4>:
  arr_7 = arr_11 + 1;
  _1 = *arr_7;

  if (_1 != 0)
    goto <bb 3>; [94.50%]
  else
    # Exit the loop

TO:

<bb 2>:

<bb 3>:
  # arr_11 = PHI <arr_7(7), arr_4(D)(2)>
  # _10 = *arr_11

<bb 4>:
  arr_7 = arr_11 + 1;
  _1 = *arr_7;

  if (_1 != 0)
    goto <bb 3>; [94.50%]
  else
    # Exit the loop

This allows loops such as the below to be vectorized.

int foo(char *arr, char c) {
  while (*arr) {
    if (*arr == c) return 1;
    arr++;
  }
  return 0;
}

I'm not sure where this transformation fits. I could imagine it going
in ifcvt, though it feels conceptually disjoint. Maybe in vect, but
before analysis?

I see roughly 0.3% geomean speedup at O3 with this accross various
benchmarks on aarch64 and on x86 with avx512.

Any feedback would be appreciated.

Thanks,
Alfie

gcc/ChangeLog:

        * Makefile.in: Add new file
        * passes.def: Add rematerialize_loads pass.
        * timevar.def (TV_TREE_REMATERIALIZE_LOADS): Add rematerialize loads 
pass.
        * tree-pass.h (make_pass_rematerialize_loads): Add rematerialize loads 
pass.
        * rematerialize_loads.cc: New pass.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/vect_rematerialize_loads.c: New test.
---
 gcc/Makefile.in                               |   1 +
 gcc/passes.def                                |   1 +
 gcc/rematerialize_loads.cc                    | 321 ++++++++++++++++++
 .../vect/vect-early-break-rematerialize.c     |  16 +
 gcc/timevar.def                               |   1 +
 gcc/tree-pass.h                               |   1 +
 6 files changed, 341 insertions(+)
 create mode 100644 gcc/rematerialize_loads.cc
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break-rematerialize.c

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index abf98aabac8..00ae012850b 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1757,6 +1757,7 @@ OBJS = \
        tree-eh.o \
        tree-emutls.o \
        tree-if-conv.o \
+       rematerialize_loads.o \
        tree-inline.o \
        tree-into-ssa.o \
        tree-iterator.o \
diff --git a/gcc/passes.def b/gcc/passes.def
index cdddb87302f..0b9ffc3f1ad 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -317,6 +317,7 @@ along with GCC; see the file COPYING3.  If not see
          NEXT_PASS (pass_expand_omp_ssa);
          NEXT_PASS (pass_ch_vect);
          NEXT_PASS (pass_if_conversion);
+         NEXT_PASS (pass_rematerialize_loads);
          /* pass_vectorize must immediately follow pass_if_conversion.
             Please do not add any other passes in between.  */
          NEXT_PASS (pass_vectorize);
diff --git a/gcc/rematerialize_loads.cc b/gcc/rematerialize_loads.cc
new file mode 100644
index 00000000000..d55a6a70a74
--- /dev/null
+++ b/gcc/rematerialize_loads.cc
@@ -0,0 +1,321 @@
+/* Load rematerialization.
+   Copyright (C) 2026 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "expr.h"
+#include "fold-const.h"
+#include "gimple-iterator.h"
+#include "tree-into-ssa.h"
+#include "tree-ssa.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
+
+/* We sometimes end up with a loop like:
+
+<bb 2>:
+  _9 = *arr_4(D);
+
+<bb 3>:
+  # _10 = PHI <_1(7), _9(2)>
+  # arr_11 = PHI <arr_7(7), arr_4(D)(2)>
+
+  # Some uses of _10
+
+<bb 4>:
+  arr_7 = arr_11 + 1;
+  _1 = *arr_7;
+
+  if (_1 != 0)
+    goto <bb 3>; [94.50%]
+  else
+    # Exit the loop
+
+With one use-def loop for the array pointer, and one for the defefrenced value
+This gets in the way of vectorization. So we transform it to:
+
+<bb 2>:
+
+<bb 3>:
+  # arr_11 = PHI <arr_7(7), arr_4(D)(2)>
+  # _10 = *arr_11
+
+<bb 4>:
+  arr_7 = arr_11 + 1;
+  _1 = *arr_7;
+
+  if (_1 != 0)
+    goto <bb 3>; [94.50%]
+  else
+    # Exit the loop
+ */
+
+unsigned
+vect_try_rematerialize_loads (class loop *loop)
+{
+  gphi_iterator gsi;
+  basic_block bb = loop->header;
+
+  DUMP_VECT_SCOPE ("vect_try_rematerialize_loads");
+
+  unsigned retval = 0;
+  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi);)
+    {
+      gphi *phi = gsi.phi ();
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location, "Looking at phi: %G",
+                        (gimple *) phi);
+
+      tree res = gimple_phi_result (phi);
+      if (TREE_CODE (res) != SSA_NAME)
+       continue;
+
+      /* Only handle the cases where the phi has two arguments.  */
+      if (gimple_phi_num_args (phi) != 2)
+       {
+         gsi_next (&gsi);
+         continue;
+       }
+
+      tree left_arg = gimple_phi_arg_def (phi, 0);
+      tree right_arg = gimple_phi_arg_def (phi, 1);
+
+      /* Only handle the cases where the two phi arguments are SSA names.  */
+      if (TREE_CODE (left_arg) != SSA_NAME || TREE_CODE (right_arg) != 
SSA_NAME)
+       {
+         gsi_next (&gsi);
+         continue;
+       }
+      gimple *left_arg_def = SSA_NAME_DEF_STMT (left_arg);
+      gimple *right_arg_def = SSA_NAME_DEF_STMT (right_arg);
+
+      edge left_edge = gimple_phi_arg_edge (phi, 0);
+      edge right_edge = gimple_phi_arg_edge (phi, 1);
+
+      /* Only handle the cases where the two phi arg SSA defs are loads (and 
not
+        also stores).  */
+      if (!gimple_assign_load_p (left_arg_def)
+         || !gimple_assign_load_p (right_arg_def)
+         || gimple_store_p (left_arg_def) || gimple_store_p (right_arg_def))
+       {
+         gsi_next (&gsi);
+         continue;
+       }
+
+      tree left_ref = gimple_assign_rhs1 (left_arg_def);
+      tree ref_type = TREE_TYPE (left_ref);
+      tree right_ref = gimple_assign_rhs1 (right_arg_def);
+      /* Only handle the cases where the two loads use MEM_REFS.  */
+      if (TREE_CODE (left_ref) != MEM_REF || TREE_CODE (right_ref) != MEM_REF)
+       {
+         gsi_next (&gsi);
+         continue;
+       }
+
+      tree left_addr = TREE_OPERAND (left_ref, 0);
+      left_addr = tree_strip_nop_conversions (left_addr);
+      tree right_addr = TREE_OPERAND (right_ref, 0);
+      right_addr = tree_strip_nop_conversions (right_addr);
+      tree left_type = TREE_TYPE (left_addr);
+      tree right_type = TREE_TYPE (right_addr);
+      /* Only handle the cases where the two load types are compatible.  */
+      if (!types_compatible_p (left_type, right_type))
+       {
+         gsi_next (&gsi);
+         continue;
+       }
+
+      tree right_offset = TREE_OPERAND (right_ref, 1);
+      tree left_offset = TREE_OPERAND (right_ref, 1);
+
+      /* Check the two loads use the same offsets.  */
+      if (!operand_equal_p (left_offset, right_offset))
+       {
+         gsi_next (&gsi);
+         continue;
+       }
+      tree offset = left_offset;
+
+      left_addr = tree_strip_nop_conversions (left_addr);
+      right_addr = tree_strip_nop_conversions (right_addr);
+
+      if (dump_enabled_p ())
+       {
+         dump_printf_loc (MSG_NOTE, vect_location, "Left assign: %G",
+                          (gimple *) left_arg_def);
+         dump_printf_loc (MSG_NOTE, vect_location, "Right assign: %G",
+                          (gimple *) right_arg_def);
+         dump_printf_loc (MSG_NOTE, vect_location, "left addr: %T\n",
+                          left_addr);
+         dump_printf_loc (MSG_NOTE, vect_location, "right addr: %T\n",
+                          right_addr);
+       }
+
+      /* Try find a phi node that is the address of the load phis.  */
+      gphi_iterator addr_gsi;
+      gphi *address_phi = NULL;
+      for (addr_gsi = gsi_start_phis (bb); !gsi_end_p (addr_gsi);
+          gsi_next (&addr_gsi))
+       {
+         gphi *addr_phi = addr_gsi.phi ();
+
+         /* Check we only have two phi nodes.  */
+         if (addr_phi == phi || gimple_phi_num_args (addr_phi) != 2)
+           continue;
+         tree left_addr_arg = gimple_phi_arg_def (addr_phi, 0);
+         tree right_addr_arg = gimple_phi_arg_def (addr_phi, 1);
+
+         left_addr_arg = tree_strip_nop_conversions (left_addr_arg);
+         right_addr_arg = tree_strip_nop_conversions (right_addr_arg);
+
+         edge left_addr_edge = gimple_phi_arg_edge (addr_phi, 0);
+         edge right_addr_edge = gimple_phi_arg_edge (addr_phi, 1);
+
+         /* Check this phi nodes edges match the original phi's nodes.  */
+         if (left_addr_edge != left_edge || right_addr_edge != right_edge)
+           continue;
+
+         /* Check neither the address nodes are loads.  */
+         if (!gimple_assign_load_p (left_arg_def)
+             || !gimple_assign_load_p (right_arg_def))
+           continue;
+
+         if (dump_enabled_p ())
+           {
+             dump_printf_loc (MSG_NOTE, vect_location, "left arg: %T\n",
+                              left_addr_arg);
+             dump_printf_loc (MSG_NOTE, vect_location, "right arg: %T\n",
+                              right_addr_arg);
+           }
+
+         /* Check the addresses match between the load phi and the address
+            phi.  */
+         if (!operand_equal_p (left_addr_arg, left_addr)
+             || !operand_equal_p (right_addr_arg, right_addr))
+           continue;
+
+         tree left_arg_type = TREE_TYPE (left_addr_arg);
+         tree right_arg_type = TREE_TYPE (right_addr_arg);
+
+         /* Check all the types are compatible */
+         if (!types_compatible_p (left_type, left_arg_type)
+             || !types_compatible_p (right_type, right_arg_type))
+           continue;
+
+         address_phi = addr_phi;
+
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location, "Matched phi: %G",
+                            (gimple *) addr_phi);
+       }
+
+      /* No address was found.  */
+      if (!address_phi)
+       {
+         gsi_next (&gsi);
+         continue;
+       }
+
+      // Yay! Change the phi to instead be a load of the other phi.
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location, "Removing phi: %G",
+                        (gimple *) phi);
+
+      tree addr = gimple_phi_result (address_phi);
+      tree mem = build2 (MEM_REF, ref_type, addr, offset);
+      gassign *load = gimple_build_assign (res, mem);
+      remove_phi_node (&gsi, false);
+      gimple_stmt_iterator gsi_add = gsi_after_labels (bb);
+      gsi_insert_before (&gsi_add, load, GSI_SAME_STMT);
+      retval |= TODO_update_ssa;
+    }
+  return retval;
+}
+
+/* Tree load-rematerialization pass management.  */
+
+namespace {
+
+const pass_data pass_data_rematerialize_loads = {
+  GIMPLE_PASS,                /* type */
+  "rematerialize_loads",       /* name */
+  OPTGROUP_NONE,              /* optinfo_flags */
+  TV_TREE_REMATERIALIZE_LOADS, /* tv_id */
+  (PROP_cfg | PROP_ssa),       /* properties_required */
+  0,                          /* properties_provided */
+  0,                          /* properties_destroyed */
+  0,                          /* todo_flags_start */
+  0                           /* todo_flags_finish */
+};
+
+class pass_rematerialize_loads : public gimple_opt_pass
+{
+public:
+  pass_rematerialize_loads (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_rematerialize_loads, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *) final override;
+  unsigned int execute (function *) final override;
+
+}; // class pass_rematerialize_loads
+
+bool
+pass_rematerialize_loads::gate (function *fun)
+{
+  return (flag_tree_loop_vectorize || fun->has_force_vectorize_loops);
+}
+
+unsigned int
+pass_rematerialize_loads::execute (function *fun)
+{
+  unsigned todo = 0;
+
+  if (number_of_loops (fun) <= 1)
+    return 0;
+
+  auto_vec<gimple *> preds;
+  for (auto loop : loops_list (cfun, 0))
+    if (flag_tree_loop_if_convert == 1
+       || ((flag_tree_loop_vectorize || loop->force_vectorize)
+           && !loop->dont_vectorize))
+      todo |= vect_try_rematerialize_loads (loop);
+
+  if (todo & TODO_update_ssa_any)
+    update_ssa (todo & TODO_update_ssa_any);
+
+  return 0;
+}
+
+} // namespace
+
+gimple_opt_pass *
+make_pass_rematerialize_loads (gcc::context *ctxt)
+{
+  return new pass_rematerialize_loads (ctxt);
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-rematerialize.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break-rematerialize.c
new file mode 100644
index 00000000000..17b9fa13fa4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-rematerialize.c
@@ -0,0 +1,16 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-additional-options "-Ofast" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+int foo(char *arr, char c) {
+  while (*arr) {
+    if (*arr == c) return 1;
+    arr++;
+  }
+  return 0;
+}
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 3824caa01bc..a756fdfc05a 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -318,6 +318,7 @@ DEFTIMEVAR (TV_TREE_UBSAN            , "tree ubsan")
 DEFTIMEVAR (TV_INITIALIZE_RTL        , "initialize rtl")
 DEFTIMEVAR (TV_GIMPLE_LADDRESS       , "address lowering")
 DEFTIMEVAR (TV_TREE_LOOP_IFCVT       , "tree loop if-conversion")
+DEFTIMEVAR (TV_TREE_REMATERIALIZE_LOADS       , "tree rematerialize loads")
 DEFTIMEVAR (TV_WARN_ACCESS           , "access analysis")
 DEFTIMEVAR (TV_GIMPLE_CRC_OPTIMIZATION, "crc optimization")
 DEFTIMEVAR (TV_EXT_DCE               , "ext dce")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index b3c97658a8f..b8a96b3cc30 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -387,6 +387,7 @@ extern gimple_opt_pass *make_pass_empty_loop (gcc::context 
*ctxt);
 extern gimple_opt_pass *make_pass_graphite (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_graphite_transforms (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_if_conversion (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_rematerialize_loads (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_if_to_switch (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_loop_distribution (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_crc_optimization (gcc::context *ctxt);
-- 
2.34.1

Reply via email to