As Richi suggested this moves the check into the loop so we check every load.
I had initially not done this because I figured the loads would be treated as a
group anyway and the group would be valid or not as a whole. But for invariants
they could be a group, but not all the loads within range of a known bounds.
Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.
Pushed to master.
Thanks,
Tamar
gcc/ChangeLog:
PR tree-optimization/122868
* tree-vect-stmts.cc (vectorizable_load): Move check for invariant loads
down into the loop.
---
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index
12eb5ea5b5e3a8b92cafdc0bb6f05e7c72c3fe05..c5ca8c91f048793bf37d902e3655fc2404383957
100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9902,36 +9902,35 @@ vectorizable_load (vec_info *vinfo,
bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
&& !nested_in_vect_loop);
- /* It is unsafe to hoist a conditional load over the conditions that make
- it valid. When early break this means that any invariant load can't be
- hoisted unless it's in the loop header or if we know something else has
- verified the load is valid to do. Alignment peeling would do this
- since getting through the prologue means the load was done at least
- once and so the vector main body is free to hoist it. However today
- GCC will hoist the load above the PFA loop. As such that makes it
- still invalid and so we can't allow it today. */
- auto stmt_bb
- = gimple_bb (STMT_VINFO_STMT (
- vect_orig_stmt (SLP_TREE_SCALAR_STMTS (slp_node)[0])));
- if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
- && !DR_SCALAR_KNOWN_BOUNDS (dr_info)
- && stmt_bb != loop->header)
- {
- if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
- && dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ bool uniform_p = true;
+ for (stmt_vec_info sinfo : SLP_TREE_SCALAR_STMTS (slp_node))
+ {
+ /* It is unsafe to hoist a conditional load over the conditions that
+ make it valid. When early break this means that any invariant load
+ can't be hoisted unless it's in the loop header or if we know
+ something else has verified the load is valid to do. Alignment
+ peeling would do this since getting through the prologue means the
+ load was done at least once and so the vector main body is free to
+ hoist it. However today GCC will hoist the load above the PFA
+ loop. As such that makes it still invalid and so we can't allow it
+ today. */
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+ && !DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (sinfo))
+ && gimple_bb (STMT_VINFO_STMT (vect_orig_stmt (sinfo)))
+ != loop->header)
+ {
+ if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+ && dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not hoisting invariant load due to early break"
"constraints\n");
- else if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
+ else if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
"not hoisting invariant load due to early break"
"constraints\n");
- hoist_p = false;
- }
+ hoist_p = false;
+ }
- bool uniform_p = true;
- for (stmt_vec_info sinfo : SLP_TREE_SCALAR_STMTS (slp_node))
- {
hoist_p = hoist_p && hoist_defs_of_uses (sinfo->stmt, loop, false);
if (sinfo != SLP_TREE_SCALAR_STMTS (slp_node)[0])
uniform_p = false;
--
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 12eb5ea5b5e3a8b92cafdc0bb6f05e7c72c3fe05..c5ca8c91f048793bf37d902e3655fc2404383957 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9902,36 +9902,35 @@ vectorizable_load (vec_info *vinfo,
bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
&& !nested_in_vect_loop);
- /* It is unsafe to hoist a conditional load over the conditions that make
- it valid. When early break this means that any invariant load can't be
- hoisted unless it's in the loop header or if we know something else has
- verified the load is valid to do. Alignment peeling would do this
- since getting through the prologue means the load was done at least
- once and so the vector main body is free to hoist it. However today
- GCC will hoist the load above the PFA loop. As such that makes it
- still invalid and so we can't allow it today. */
- auto stmt_bb
- = gimple_bb (STMT_VINFO_STMT (
- vect_orig_stmt (SLP_TREE_SCALAR_STMTS (slp_node)[0])));
- if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
- && !DR_SCALAR_KNOWN_BOUNDS (dr_info)
- && stmt_bb != loop->header)
- {
- if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
- && dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ bool uniform_p = true;
+ for (stmt_vec_info sinfo : SLP_TREE_SCALAR_STMTS (slp_node))
+ {
+ /* It is unsafe to hoist a conditional load over the conditions that
+ make it valid. When early break this means that any invariant load
+ can't be hoisted unless it's in the loop header or if we know
+ something else has verified the load is valid to do. Alignment
+ peeling would do this since getting through the prologue means the
+ load was done at least once and so the vector main body is free to
+ hoist it. However today GCC will hoist the load above the PFA
+ loop. As such that makes it still invalid and so we can't allow it
+ today. */
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+ && !DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (sinfo))
+ && gimple_bb (STMT_VINFO_STMT (vect_orig_stmt (sinfo)))
+ != loop->header)
+ {
+ if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+ && dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not hoisting invariant load due to early break"
"constraints\n");
- else if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
+ else if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
"not hoisting invariant load due to early break"
"constraints\n");
- hoist_p = false;
- }
+ hoist_p = false;
+ }
- bool uniform_p = true;
- for (stmt_vec_info sinfo : SLP_TREE_SCALAR_STMTS (slp_node))
- {
hoist_p = hoist_p && hoist_defs_of_uses (sinfo->stmt, loop, false);
if (sinfo != SLP_TREE_SCALAR_STMTS (slp_node)[0])
uniform_p = false;