This is a proof of concept. This passes bootstrap and regtest but I don't
think this is a sensible code choice.
This adds a rematerialize loads pass that does the following
transformation:
FROM:
<bb 2>:
_9 = *arr_4(D);
<bb 3>:
# _10 = PHI <_1(7), _9(2)>
# arr_11 = PHI <arr_7(7), arr_4(D)(2)>
# Some uses of _10
<bb 4>:
arr_7 = arr_11 + 1;
_1 = *arr_7;
if (_1 != 0)
goto <bb 3>; [94.50%]
else
# Exit the loop
TO:
<bb 2>:
<bb 3>:
# arr_11 = PHI <arr_7(7), arr_4(D)(2)>
# _10 = *arr_11
<bb 4>:
arr_7 = arr_11 + 1;
_1 = *arr_7;
if (_1 != 0)
goto <bb 3>; [94.50%]
else
# Exit the loop
This allows loops such as the below to be vectorized.
int foo(char *arr, char c) {
while (*arr) {
if (*arr == c) return 1;
arr++;
}
return 0;
}
I'm not sure where this transformation fits. I could imagine it going
in ifcvt, though it feels conceptually disjoint. Maybe in vect, but
before analysis?
I see roughly 0.3% geomean speedup at O3 with this accross various
benchmarks on aarch64 and on x86 with avx512.
Any feedback would be appreciated.
Thanks,
Alfie
gcc/ChangeLog:
* Makefile.in: Add new file
* passes.def: Add rematerialize_loads pass.
* timevar.def (TV_TREE_REMATERIALIZE_LOADS): Add rematerialize loads
pass.
* tree-pass.h (make_pass_rematerialize_loads): Add rematerialize loads
pass.
* rematerialize_loads.cc: New pass.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/vect_rematerialize_loads.c: New test.
---
gcc/Makefile.in | 1 +
gcc/passes.def | 1 +
gcc/rematerialize_loads.cc | 321 ++++++++++++++++++
.../vect/vect-early-break-rematerialize.c | 16 +
gcc/timevar.def | 1 +
gcc/tree-pass.h | 1 +
6 files changed, 341 insertions(+)
create mode 100644 gcc/rematerialize_loads.cc
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break-rematerialize.c
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index abf98aabac8..00ae012850b 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1757,6 +1757,7 @@ OBJS = \
tree-eh.o \
tree-emutls.o \
tree-if-conv.o \
+ rematerialize_loads.o \
tree-inline.o \
tree-into-ssa.o \
tree-iterator.o \
diff --git a/gcc/passes.def b/gcc/passes.def
index cdddb87302f..0b9ffc3f1ad 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -317,6 +317,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_expand_omp_ssa);
NEXT_PASS (pass_ch_vect);
NEXT_PASS (pass_if_conversion);
+ NEXT_PASS (pass_rematerialize_loads);
/* pass_vectorize must immediately follow pass_if_conversion.
Please do not add any other passes in between. */
NEXT_PASS (pass_vectorize);
diff --git a/gcc/rematerialize_loads.cc b/gcc/rematerialize_loads.cc
new file mode 100644
index 00000000000..d55a6a70a74
--- /dev/null
+++ b/gcc/rematerialize_loads.cc
@@ -0,0 +1,321 @@
+/* Load rematerialization.
+ Copyright (C) 2026 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "expr.h"
+#include "fold-const.h"
+#include "gimple-iterator.h"
+#include "tree-into-ssa.h"
+#include "tree-ssa.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
+
+/* We sometimes end up with a loop like:
+
+<bb 2>:
+ _9 = *arr_4(D);
+
+<bb 3>:
+ # _10 = PHI <_1(7), _9(2)>
+ # arr_11 = PHI <arr_7(7), arr_4(D)(2)>
+
+ # Some uses of _10
+
+<bb 4>:
+ arr_7 = arr_11 + 1;
+ _1 = *arr_7;
+
+ if (_1 != 0)
+ goto <bb 3>; [94.50%]
+ else
+ # Exit the loop
+
+With one use-def loop for the array pointer, and one for the defefrenced value
+This gets in the way of vectorization. So we transform it to:
+
+<bb 2>:
+
+<bb 3>:
+ # arr_11 = PHI <arr_7(7), arr_4(D)(2)>
+ # _10 = *arr_11
+
+<bb 4>:
+ arr_7 = arr_11 + 1;
+ _1 = *arr_7;
+
+ if (_1 != 0)
+ goto <bb 3>; [94.50%]
+ else
+ # Exit the loop
+ */
+
+unsigned
+vect_try_rematerialize_loads (class loop *loop)
+{
+ gphi_iterator gsi;
+ basic_block bb = loop->header;
+
+ DUMP_VECT_SCOPE ("vect_try_rematerialize_loads");
+
+ unsigned retval = 0;
+ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi);)
+ {
+ gphi *phi = gsi.phi ();
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "Looking at phi: %G",
+ (gimple *) phi);
+
+ tree res = gimple_phi_result (phi);
+ if (TREE_CODE (res) != SSA_NAME)
+ continue;
+
+ /* Only handle the cases where the phi has two arguments. */
+ if (gimple_phi_num_args (phi) != 2)
+ {
+ gsi_next (&gsi);
+ continue;
+ }
+
+ tree left_arg = gimple_phi_arg_def (phi, 0);
+ tree right_arg = gimple_phi_arg_def (phi, 1);
+
+ /* Only handle the cases where the two phi arguments are SSA names. */
+ if (TREE_CODE (left_arg) != SSA_NAME || TREE_CODE (right_arg) !=
SSA_NAME)
+ {
+ gsi_next (&gsi);
+ continue;
+ }
+ gimple *left_arg_def = SSA_NAME_DEF_STMT (left_arg);
+ gimple *right_arg_def = SSA_NAME_DEF_STMT (right_arg);
+
+ edge left_edge = gimple_phi_arg_edge (phi, 0);
+ edge right_edge = gimple_phi_arg_edge (phi, 1);
+
+ /* Only handle the cases where the two phi arg SSA defs are loads (and
not
+ also stores). */
+ if (!gimple_assign_load_p (left_arg_def)
+ || !gimple_assign_load_p (right_arg_def)
+ || gimple_store_p (left_arg_def) || gimple_store_p (right_arg_def))
+ {
+ gsi_next (&gsi);
+ continue;
+ }
+
+ tree left_ref = gimple_assign_rhs1 (left_arg_def);
+ tree ref_type = TREE_TYPE (left_ref);
+ tree right_ref = gimple_assign_rhs1 (right_arg_def);
+ /* Only handle the cases where the two loads use MEM_REFS. */
+ if (TREE_CODE (left_ref) != MEM_REF || TREE_CODE (right_ref) != MEM_REF)
+ {
+ gsi_next (&gsi);
+ continue;
+ }
+
+ tree left_addr = TREE_OPERAND (left_ref, 0);
+ left_addr = tree_strip_nop_conversions (left_addr);
+ tree right_addr = TREE_OPERAND (right_ref, 0);
+ right_addr = tree_strip_nop_conversions (right_addr);
+ tree left_type = TREE_TYPE (left_addr);
+ tree right_type = TREE_TYPE (right_addr);
+ /* Only handle the cases where the two load types are compatible. */
+ if (!types_compatible_p (left_type, right_type))
+ {
+ gsi_next (&gsi);
+ continue;
+ }
+
+ tree right_offset = TREE_OPERAND (right_ref, 1);
+ tree left_offset = TREE_OPERAND (right_ref, 1);
+
+ /* Check the two loads use the same offsets. */
+ if (!operand_equal_p (left_offset, right_offset))
+ {
+ gsi_next (&gsi);
+ continue;
+ }
+ tree offset = left_offset;
+
+ left_addr = tree_strip_nop_conversions (left_addr);
+ right_addr = tree_strip_nop_conversions (right_addr);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "Left assign: %G",
+ (gimple *) left_arg_def);
+ dump_printf_loc (MSG_NOTE, vect_location, "Right assign: %G",
+ (gimple *) right_arg_def);
+ dump_printf_loc (MSG_NOTE, vect_location, "left addr: %T\n",
+ left_addr);
+ dump_printf_loc (MSG_NOTE, vect_location, "right addr: %T\n",
+ right_addr);
+ }
+
+ /* Try find a phi node that is the address of the load phis. */
+ gphi_iterator addr_gsi;
+ gphi *address_phi = NULL;
+ for (addr_gsi = gsi_start_phis (bb); !gsi_end_p (addr_gsi);
+ gsi_next (&addr_gsi))
+ {
+ gphi *addr_phi = addr_gsi.phi ();
+
+ /* Check we only have two phi nodes. */
+ if (addr_phi == phi || gimple_phi_num_args (addr_phi) != 2)
+ continue;
+ tree left_addr_arg = gimple_phi_arg_def (addr_phi, 0);
+ tree right_addr_arg = gimple_phi_arg_def (addr_phi, 1);
+
+ left_addr_arg = tree_strip_nop_conversions (left_addr_arg);
+ right_addr_arg = tree_strip_nop_conversions (right_addr_arg);
+
+ edge left_addr_edge = gimple_phi_arg_edge (addr_phi, 0);
+ edge right_addr_edge = gimple_phi_arg_edge (addr_phi, 1);
+
+ /* Check this phi nodes edges match the original phi's nodes. */
+ if (left_addr_edge != left_edge || right_addr_edge != right_edge)
+ continue;
+
+ /* Check neither the address nodes are loads. */
+ if (!gimple_assign_load_p (left_arg_def)
+ || !gimple_assign_load_p (right_arg_def))
+ continue;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "left arg: %T\n",
+ left_addr_arg);
+ dump_printf_loc (MSG_NOTE, vect_location, "right arg: %T\n",
+ right_addr_arg);
+ }
+
+ /* Check the addresses match between the load phi and the address
+ phi. */
+ if (!operand_equal_p (left_addr_arg, left_addr)
+ || !operand_equal_p (right_addr_arg, right_addr))
+ continue;
+
+ tree left_arg_type = TREE_TYPE (left_addr_arg);
+ tree right_arg_type = TREE_TYPE (right_addr_arg);
+
+ /* Check all the types are compatible */
+ if (!types_compatible_p (left_type, left_arg_type)
+ || !types_compatible_p (right_type, right_arg_type))
+ continue;
+
+ address_phi = addr_phi;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "Matched phi: %G",
+ (gimple *) addr_phi);
+ }
+
+ /* No address was found. */
+ if (!address_phi)
+ {
+ gsi_next (&gsi);
+ continue;
+ }
+
+ // Yay! Change the phi to instead be a load of the other phi.
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "Removing phi: %G",
+ (gimple *) phi);
+
+ tree addr = gimple_phi_result (address_phi);
+ tree mem = build2 (MEM_REF, ref_type, addr, offset);
+ gassign *load = gimple_build_assign (res, mem);
+ remove_phi_node (&gsi, false);
+ gimple_stmt_iterator gsi_add = gsi_after_labels (bb);
+ gsi_insert_before (&gsi_add, load, GSI_SAME_STMT);
+ retval |= TODO_update_ssa;
+ }
+ return retval;
+}
+
+/* Tree load-rematerialization pass management. */
+
+namespace {
+
+const pass_data pass_data_rematerialize_loads = {
+ GIMPLE_PASS, /* type */
+ "rematerialize_loads", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_TREE_REMATERIALIZE_LOADS, /* tv_id */
+ (PROP_cfg | PROP_ssa), /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0 /* todo_flags_finish */
+};
+
+class pass_rematerialize_loads : public gimple_opt_pass
+{
+public:
+ pass_rematerialize_loads (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_rematerialize_loads, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate (function *) final override;
+ unsigned int execute (function *) final override;
+
+}; // class pass_rematerialize_loads
+
+bool
+pass_rematerialize_loads::gate (function *fun)
+{
+ return (flag_tree_loop_vectorize || fun->has_force_vectorize_loops);
+}
+
+unsigned int
+pass_rematerialize_loads::execute (function *fun)
+{
+ unsigned todo = 0;
+
+ if (number_of_loops (fun) <= 1)
+ return 0;
+
+ auto_vec<gimple *> preds;
+ for (auto loop : loops_list (cfun, 0))
+ if (flag_tree_loop_if_convert == 1
+ || ((flag_tree_loop_vectorize || loop->force_vectorize)
+ && !loop->dont_vectorize))
+ todo |= vect_try_rematerialize_loads (loop);
+
+ if (todo & TODO_update_ssa_any)
+ update_ssa (todo & TODO_update_ssa_any);
+
+ return 0;
+}
+
+} // namespace
+
+gimple_opt_pass *
+make_pass_rematerialize_loads (gcc::context *ctxt)
+{
+ return new pass_rematerialize_loads (ctxt);
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-rematerialize.c
b/gcc/testsuite/gcc.dg/vect/vect-early-break-rematerialize.c
new file mode 100644
index 00000000000..17b9fa13fa4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-rematerialize.c
@@ -0,0 +1,16 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-additional-options "-Ofast" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+int foo(char *arr, char c) {
+ while (*arr) {
+ if (*arr == c) return 1;
+ arr++;
+ }
+ return 0;
+}
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 3824caa01bc..a756fdfc05a 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -318,6 +318,7 @@ DEFTIMEVAR (TV_TREE_UBSAN , "tree ubsan")
DEFTIMEVAR (TV_INITIALIZE_RTL , "initialize rtl")
DEFTIMEVAR (TV_GIMPLE_LADDRESS , "address lowering")
DEFTIMEVAR (TV_TREE_LOOP_IFCVT , "tree loop if-conversion")
+DEFTIMEVAR (TV_TREE_REMATERIALIZE_LOADS , "tree rematerialize loads")
DEFTIMEVAR (TV_WARN_ACCESS , "access analysis")
DEFTIMEVAR (TV_GIMPLE_CRC_OPTIMIZATION, "crc optimization")
DEFTIMEVAR (TV_EXT_DCE , "ext dce")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index b3c97658a8f..b8a96b3cc30 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -387,6 +387,7 @@ extern gimple_opt_pass *make_pass_empty_loop (gcc::context
*ctxt);
extern gimple_opt_pass *make_pass_graphite (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_graphite_transforms (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_if_conversion (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_rematerialize_loads (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_if_to_switch (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_loop_distribution (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_crc_optimization (gcc::context *ctxt);
--
2.34.1