On Wed, 4 Feb 2026, Alfie Richards wrote:
> This is a proof of concept. This passes bootstrap and regtest but I don't
> think this is a sensible code choice.
>
> This adds a rematerialize loads pass that does the following
> transformation:
>
> FROM:
> <bb 2>:
> _9 = *arr_4(D);
>
> <bb 3>:
> # _10 = PHI <_1(7), _9(2)>
> # arr_11 = PHI <arr_7(7), arr_4(D)(2)>
>
> # Some uses of _10
>
> <bb 4>:
> arr_7 = arr_11 + 1;
> _1 = *arr_7;
>
> if (_1 != 0)
> goto <bb 3>; [94.50%]
> else
> # Exit the loop
>
> TO:
>
> <bb 2>:
>
> <bb 3>:
> # arr_11 = PHI <arr_7(7), arr_4(D)(2)>
> # _10 = *arr_11
>
> <bb 4>:
> arr_7 = arr_11 + 1;
> _1 = *arr_7;
>
> if (_1 != 0)
> goto <bb 3>; [94.50%]
> else
> # Exit the loop
>
> This allows loops such as the below to be vectorized.
>
> int foo(char *arr, char c) {
> while (*arr) {
> if (*arr == c) return 1;
> arr++;
> }
> return 0;
> }
>
> I'm not sure where this transformation fits. I could imagine it going
> in ifcvt, though it feels conceptually disjoint. Maybe in vect, but
> before analysis?
>
> I see roughly 0.3% geomean speedup at O3 with this accross various
> benchmarks on aarch64 and on x86 with avx512.
>
> Any feedback would be appreciated.
I'd not make an extra pass but put it in tree-if-conv.cc. In theory
we could also mangle the SLP tree after construction, thus have this
as an extra "SLP PHI elimination phase", it might or might not fit well
into SLP pattern processing (maybe it alters the SLP graph structure
too much).
You are missing correctness checks - consider an aliasing
store after _1 = *arr_7 or after the load of _9 = *arr_4(D).
Richard.
> Thanks,
> Alfie
>
> gcc/ChangeLog:
>
> * Makefile.in: Add new file
> * passes.def: Add rematerialize_loads pass.
> * timevar.def (TV_TREE_REMATERIALIZE_LOADS): Add rematerialize loads
> pass.
> * tree-pass.h (make_pass_rematerialize_loads): Add rematerialize loads
> pass.
> * rematerialize_loads.cc: New pass.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/vect_rematerialize_loads.c: New test.
> ---
> gcc/Makefile.in | 1 +
> gcc/passes.def | 1 +
> gcc/rematerialize_loads.cc | 321 ++++++++++++++++++
> .../vect/vect-early-break-rematerialize.c | 16 +
> gcc/timevar.def | 1 +
> gcc/tree-pass.h | 1 +
> 6 files changed, 341 insertions(+)
> create mode 100644 gcc/rematerialize_loads.cc
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break-rematerialize.c
>
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index abf98aabac8..00ae012850b 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1757,6 +1757,7 @@ OBJS = \
> tree-eh.o \
> tree-emutls.o \
> tree-if-conv.o \
> + rematerialize_loads.o \
> tree-inline.o \
> tree-into-ssa.o \
> tree-iterator.o \
> diff --git a/gcc/passes.def b/gcc/passes.def
> index cdddb87302f..0b9ffc3f1ad 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -317,6 +317,7 @@ along with GCC; see the file COPYING3. If not see
> NEXT_PASS (pass_expand_omp_ssa);
> NEXT_PASS (pass_ch_vect);
> NEXT_PASS (pass_if_conversion);
> + NEXT_PASS (pass_rematerialize_loads);
> /* pass_vectorize must immediately follow pass_if_conversion.
> Please do not add any other passes in between. */
> NEXT_PASS (pass_vectorize);
> diff --git a/gcc/rematerialize_loads.cc b/gcc/rematerialize_loads.cc
> new file mode 100644
> index 00000000000..d55a6a70a74
> --- /dev/null
> +++ b/gcc/rematerialize_loads.cc
> @@ -0,0 +1,321 @@
> +/* Load rematerialization.
> + Copyright (C) 2026 Free Software Foundation, Inc.
> +
> + This file is part of GCC.
> +
> + GCC is free software; you can redistribute it and/or modify it
> + under the terms of the GNU General Public License as published by
> + the Free Software Foundation; either version 3, or (at your option)
> + any later version.
> +
> + GCC is distributed in the hope that it will be useful, but WITHOUT
> + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
> + License for more details.
> +
> + You should have received a copy of the GNU General Public License
> + along with GCC; see the file COPYING3. If not see
> + <http://www.gnu.org/licenses/>. */
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "rtl.h"
> +#include "tree.h"
> +#include "gimple.h"
> +#include "cfghooks.h"
> +#include "tree-pass.h"
> +#include "ssa.h"
> +#include "expr.h"
> +#include "fold-const.h"
> +#include "gimple-iterator.h"
> +#include "tree-into-ssa.h"
> +#include "tree-ssa.h"
> +#include "cfgloop.h"
> +#include "tree-vectorizer.h"
> +
> +/* We sometimes end up with a loop like:
> +
> +<bb 2>:
> + _9 = *arr_4(D);
> +
> +<bb 3>:
> + # _10 = PHI <_1(7), _9(2)>
> + # arr_11 = PHI <arr_7(7), arr_4(D)(2)>
> +
> + # Some uses of _10
> +
> +<bb 4>:
> + arr_7 = arr_11 + 1;
> + _1 = *arr_7;
> +
> + if (_1 != 0)
> + goto <bb 3>; [94.50%]
> + else
> + # Exit the loop
> +
> +With one use-def loop for the array pointer, and one for the defefrenced
> value
> +This gets in the way of vectorization. So we transform it to:
> +
> +<bb 2>:
> +
> +<bb 3>:
> + # arr_11 = PHI <arr_7(7), arr_4(D)(2)>
> + # _10 = *arr_11
> +
> +<bb 4>:
> + arr_7 = arr_11 + 1;
> + _1 = *arr_7;
> +
> + if (_1 != 0)
> + goto <bb 3>; [94.50%]
> + else
> + # Exit the loop
> + */
> +
> +unsigned
> +vect_try_rematerialize_loads (class loop *loop)
> +{
> + gphi_iterator gsi;
> + basic_block bb = loop->header;
> +
> + DUMP_VECT_SCOPE ("vect_try_rematerialize_loads");
> +
> + unsigned retval = 0;
> + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi);)
> + {
> + gphi *phi = gsi.phi ();
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location, "Looking at phi: %G",
> + (gimple *) phi);
> +
> + tree res = gimple_phi_result (phi);
> + if (TREE_CODE (res) != SSA_NAME)
> + continue;
> +
> + /* Only handle the cases where the phi has two arguments. */
> + if (gimple_phi_num_args (phi) != 2)
> + {
> + gsi_next (&gsi);
> + continue;
> + }
> +
> + tree left_arg = gimple_phi_arg_def (phi, 0);
> + tree right_arg = gimple_phi_arg_def (phi, 1);
> +
> + /* Only handle the cases where the two phi arguments are SSA names. */
> + if (TREE_CODE (left_arg) != SSA_NAME || TREE_CODE (right_arg) !=
> SSA_NAME)
> + {
> + gsi_next (&gsi);
> + continue;
> + }
> + gimple *left_arg_def = SSA_NAME_DEF_STMT (left_arg);
> + gimple *right_arg_def = SSA_NAME_DEF_STMT (right_arg);
> +
> + edge left_edge = gimple_phi_arg_edge (phi, 0);
> + edge right_edge = gimple_phi_arg_edge (phi, 1);
> +
> + /* Only handle the cases where the two phi arg SSA defs are loads (and
> not
> + also stores). */
> + if (!gimple_assign_load_p (left_arg_def)
> + || !gimple_assign_load_p (right_arg_def)
> + || gimple_store_p (left_arg_def) || gimple_store_p (right_arg_def))
> + {
> + gsi_next (&gsi);
> + continue;
> + }
> +
> + tree left_ref = gimple_assign_rhs1 (left_arg_def);
> + tree ref_type = TREE_TYPE (left_ref);
> + tree right_ref = gimple_assign_rhs1 (right_arg_def);
> + /* Only handle the cases where the two loads use MEM_REFS. */
> + if (TREE_CODE (left_ref) != MEM_REF || TREE_CODE (right_ref) !=
> MEM_REF)
> + {
> + gsi_next (&gsi);
> + continue;
> + }
> +
> + tree left_addr = TREE_OPERAND (left_ref, 0);
> + left_addr = tree_strip_nop_conversions (left_addr);
> + tree right_addr = TREE_OPERAND (right_ref, 0);
> + right_addr = tree_strip_nop_conversions (right_addr);
> + tree left_type = TREE_TYPE (left_addr);
> + tree right_type = TREE_TYPE (right_addr);
> + /* Only handle the cases where the two load types are compatible. */
> + if (!types_compatible_p (left_type, right_type))
> + {
> + gsi_next (&gsi);
> + continue;
> + }
> +
> + tree right_offset = TREE_OPERAND (right_ref, 1);
> + tree left_offset = TREE_OPERAND (right_ref, 1);
> +
> + /* Check the two loads use the same offsets. */
> + if (!operand_equal_p (left_offset, right_offset))
> + {
> + gsi_next (&gsi);
> + continue;
> + }
> + tree offset = left_offset;
> +
> + left_addr = tree_strip_nop_conversions (left_addr);
> + right_addr = tree_strip_nop_conversions (right_addr);
> +
> + if (dump_enabled_p ())
> + {
> + dump_printf_loc (MSG_NOTE, vect_location, "Left assign: %G",
> + (gimple *) left_arg_def);
> + dump_printf_loc (MSG_NOTE, vect_location, "Right assign: %G",
> + (gimple *) right_arg_def);
> + dump_printf_loc (MSG_NOTE, vect_location, "left addr: %T\n",
> + left_addr);
> + dump_printf_loc (MSG_NOTE, vect_location, "right addr: %T\n",
> + right_addr);
> + }
> +
> + /* Try find a phi node that is the address of the load phis. */
> + gphi_iterator addr_gsi;
> + gphi *address_phi = NULL;
> + for (addr_gsi = gsi_start_phis (bb); !gsi_end_p (addr_gsi);
> + gsi_next (&addr_gsi))
> + {
> + gphi *addr_phi = addr_gsi.phi ();
> +
> + /* Check we only have two phi nodes. */
> + if (addr_phi == phi || gimple_phi_num_args (addr_phi) != 2)
> + continue;
> + tree left_addr_arg = gimple_phi_arg_def (addr_phi, 0);
> + tree right_addr_arg = gimple_phi_arg_def (addr_phi, 1);
> +
> + left_addr_arg = tree_strip_nop_conversions (left_addr_arg);
> + right_addr_arg = tree_strip_nop_conversions (right_addr_arg);
> +
> + edge left_addr_edge = gimple_phi_arg_edge (addr_phi, 0);
> + edge right_addr_edge = gimple_phi_arg_edge (addr_phi, 1);
> +
> + /* Check this phi nodes edges match the original phi's nodes. */
> + if (left_addr_edge != left_edge || right_addr_edge != right_edge)
> + continue;
> +
> + /* Check neither the address nodes are loads. */
> + if (!gimple_assign_load_p (left_arg_def)
> + || !gimple_assign_load_p (right_arg_def))
> + continue;
> +
> + if (dump_enabled_p ())
> + {
> + dump_printf_loc (MSG_NOTE, vect_location, "left arg: %T\n",
> + left_addr_arg);
> + dump_printf_loc (MSG_NOTE, vect_location, "right arg: %T\n",
> + right_addr_arg);
> + }
> +
> + /* Check the addresses match between the load phi and the address
> + phi. */
> + if (!operand_equal_p (left_addr_arg, left_addr)
> + || !operand_equal_p (right_addr_arg, right_addr))
> + continue;
> +
> + tree left_arg_type = TREE_TYPE (left_addr_arg);
> + tree right_arg_type = TREE_TYPE (right_addr_arg);
> +
> + /* Check all the types are compatible */
> + if (!types_compatible_p (left_type, left_arg_type)
> + || !types_compatible_p (right_type, right_arg_type))
> + continue;
> +
> + address_phi = addr_phi;
> +
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location, "Matched phi: %G",
> + (gimple *) addr_phi);
> + }
> +
> + /* No address was found. */
> + if (!address_phi)
> + {
> + gsi_next (&gsi);
> + continue;
> + }
> +
> + // Yay! Change the phi to instead be a load of the other phi.
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location, "Removing phi: %G",
> + (gimple *) phi);
> +
> + tree addr = gimple_phi_result (address_phi);
> + tree mem = build2 (MEM_REF, ref_type, addr, offset);
> + gassign *load = gimple_build_assign (res, mem);
> + remove_phi_node (&gsi, false);
> + gimple_stmt_iterator gsi_add = gsi_after_labels (bb);
> + gsi_insert_before (&gsi_add, load, GSI_SAME_STMT);
> + retval |= TODO_update_ssa;
> + }
> + return retval;
> +}
> +
> +/* Tree load-rematerialization pass management. */
> +
> +namespace {
> +
> +const pass_data pass_data_rematerialize_loads = {
> + GIMPLE_PASS, /* type */
> + "rematerialize_loads", /* name */
> + OPTGROUP_NONE, /* optinfo_flags */
> + TV_TREE_REMATERIALIZE_LOADS, /* tv_id */
> + (PROP_cfg | PROP_ssa), /* properties_required */
> + 0, /* properties_provided */
> + 0, /* properties_destroyed */
> + 0, /* todo_flags_start */
> + 0 /* todo_flags_finish */
> +};
> +
> +class pass_rematerialize_loads : public gimple_opt_pass
> +{
> +public:
> + pass_rematerialize_loads (gcc::context *ctxt)
> + : gimple_opt_pass (pass_data_rematerialize_loads, ctxt)
> + {}
> +
> + /* opt_pass methods: */
> + bool gate (function *) final override;
> + unsigned int execute (function *) final override;
> +
> +}; // class pass_rematerialize_loads
> +
> +bool
> +pass_rematerialize_loads::gate (function *fun)
> +{
> + return (flag_tree_loop_vectorize || fun->has_force_vectorize_loops);
> +}
> +
> +unsigned int
> +pass_rematerialize_loads::execute (function *fun)
> +{
> + unsigned todo = 0;
> +
> + if (number_of_loops (fun) <= 1)
> + return 0;
> +
> + auto_vec<gimple *> preds;
> + for (auto loop : loops_list (cfun, 0))
> + if (flag_tree_loop_if_convert == 1
> + || ((flag_tree_loop_vectorize || loop->force_vectorize)
> + && !loop->dont_vectorize))
> + todo |= vect_try_rematerialize_loads (loop);
> +
> + if (todo & TODO_update_ssa_any)
> + update_ssa (todo & TODO_update_ssa_any);
> +
> + return 0;
> +}
> +
> +} // namespace
> +
> +gimple_opt_pass *
> +make_pass_rematerialize_loads (gcc::context *ctxt)
> +{
> + return new pass_rematerialize_loads (ctxt);
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-rematerialize.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break-rematerialize.c
> new file mode 100644
> index 00000000000..17b9fa13fa4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-rematerialize.c
> @@ -0,0 +1,16 @@
> +/* { dg-add-options vect_early_break } */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-additional-options "-Ofast" } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +int foo(char *arr, char c) {
> + while (*arr) {
> + if (*arr == c) return 1;
> + arr++;
> + }
> + return 0;
> +}
> diff --git a/gcc/timevar.def b/gcc/timevar.def
> index 3824caa01bc..a756fdfc05a 100644
> --- a/gcc/timevar.def
> +++ b/gcc/timevar.def
> @@ -318,6 +318,7 @@ DEFTIMEVAR (TV_TREE_UBSAN , "tree ubsan")
> DEFTIMEVAR (TV_INITIALIZE_RTL , "initialize rtl")
> DEFTIMEVAR (TV_GIMPLE_LADDRESS , "address lowering")
> DEFTIMEVAR (TV_TREE_LOOP_IFCVT , "tree loop if-conversion")
> +DEFTIMEVAR (TV_TREE_REMATERIALIZE_LOADS , "tree rematerialize loads")
> DEFTIMEVAR (TV_WARN_ACCESS , "access analysis")
> DEFTIMEVAR (TV_GIMPLE_CRC_OPTIMIZATION, "crc optimization")
> DEFTIMEVAR (TV_EXT_DCE , "ext dce")
> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
> index b3c97658a8f..b8a96b3cc30 100644
> --- a/gcc/tree-pass.h
> +++ b/gcc/tree-pass.h
> @@ -387,6 +387,7 @@ extern gimple_opt_pass *make_pass_empty_loop
> (gcc::context *ctxt);
> extern gimple_opt_pass *make_pass_graphite (gcc::context *ctxt);
> extern gimple_opt_pass *make_pass_graphite_transforms (gcc::context *ctxt);
> extern gimple_opt_pass *make_pass_if_conversion (gcc::context *ctxt);
> +extern gimple_opt_pass *make_pass_rematerialize_loads (gcc::context *ctxt);
> extern gimple_opt_pass *make_pass_if_to_switch (gcc::context *ctxt);
> extern gimple_opt_pass *make_pass_loop_distribution (gcc::context *ctxt);
> extern gimple_opt_pass *make_pass_crc_optimization (gcc::context *ctxt);
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)