From: Philipp Tomsich <[email protected]>
The redundant-store tracking introduced in ec5349c37af replaced a
safe bitmap_bit_in_range_p check (which bailed on any overlap) with
bitmap_all_bits_in_range_p (which only removed fully redundant stores).
This broke "last writer wins" semantics for partially overlapping
stores: when an earlier store's BFI was applied after the base store's
value, it overwrote bytes that should have belonged to the later store.
Restore the original overlap check from 1d8de1e93ea: bail out of the
optimization when any bit in a store's byte range is already claimed
by a later store in program order. Remove the now-unnecessary
redundant-store tracking (redundant_stores, store_ind_to_remove).
gcc/ChangeLog:
PR rtl-optimization/124476
* avoid-store-forwarding.cc
(store_forwarding_analyzer::process_store_forwarding): Replace
bitmap_all_bits_in_range_p with bitmap_any_bit_in_range_p and
return false on partial overlap. Remove redundant-store vectors
and their associated removal, dump, and deletion logic.
gcc/testsuite/ChangeLog:
PR rtl-optimization/124476
* gcc.dg/avoid-store-forwarding-1.c: New test.
---
gcc/avoid-store-forwarding.cc | 43 ++++---------------
.../gcc.dg/avoid-store-forwarding-1.c | 31 +++++++++++++
2 files changed, 39 insertions(+), 35 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/avoid-store-forwarding-1.c
diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc
index 75ba30a62eb5..260c135c219f 100644
--- a/gcc/avoid-store-forwarding.cc
+++ b/gcc/avoid-store-forwarding.cc
@@ -171,28 +171,23 @@ process_store_forwarding (vec<store_fwd_info> &stores,
rtx_insn *load_insn,
/* Memory sizes should be constants at this stage. */
HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant ();
- /* If the stores cover all the bytes of the load, then we can eliminate
- the load entirely and use the computed value instead.
- We can also eliminate stores on addresses that are overwritten
- by later stores. */
+ /* If the stores cover all the bytes of the load without overlap then we can
+ eliminate the load entirely and use the computed value instead.
+ Bail out when partially overlapping stores are detected, as the pass
+ cannot correctly handle "last writer wins" semantics for the
+ overlapping byte ranges (see PR124476). */
auto_sbitmap forwarded_bytes (load_size);
bitmap_clear (forwarded_bytes);
unsigned int i;
store_fwd_info* it;
- auto_vec<store_fwd_info> redundant_stores;
- auto_vec<int> store_ind_to_remove;
FOR_EACH_VEC_ELT (stores, i, it)
{
HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant ();
- if (bitmap_all_bits_in_range_p (forwarded_bytes, it->offset,
- it->offset + store_size - 1))
- {
- redundant_stores.safe_push (*it);
- store_ind_to_remove.safe_push (i);
- continue;
- }
+ if (bitmap_any_bit_in_range_p (forwarded_bytes, it->offset,
+ it->offset + store_size - 1))
+ return false;
bitmap_set_range (forwarded_bytes, it->offset, store_size);
}
@@ -218,15 +213,6 @@ process_store_forwarding (vec<store_fwd_info> &stores,
rtx_insn *load_insn,
fprintf (dump_file, "(Load elimination candidate)\n");
}
- /* Remove redundant stores from the vector. Although this is quadratic,
- there doesn't seem to be much point optimizing it. The number of
- redundant stores is expected to be low and the length of the list is
- limited by a --param. The dependence checking that we did earlier is
- also quadratic in the size of this list. */
- store_ind_to_remove.reverse ();
- for (int i : store_ind_to_remove)
- stores.ordered_remove (i);
-
rtx load = single_set (load_insn);
rtx dest;
@@ -413,15 +399,6 @@ process_store_forwarding (vec<store_fwd_info> &stores,
rtx_insn *load_insn,
}
}
- if (redundant_stores.length () > 0)
- {
- fprintf (dump_file, "\nRedundant stores that have been removed:\n");
- FOR_EACH_VEC_ELT (redundant_stores, i, it)
- {
- fprintf (dump_file, " ");
- print_rtl_single (dump_file, it->store_insn);
- }
- }
}
stats_sf_avoided++;
@@ -441,10 +418,6 @@ process_store_forwarding (vec<store_fwd_info> &stores,
rtx_insn *load_insn,
delete_insn (it->store_insn);
}
- /* Delete redundant stores. */
- FOR_EACH_VEC_ELT (redundant_stores, i, it)
- delete_insn (it->store_insn);
-
df_insn_rescan (load_insn);
if (load_elim)
diff --git a/gcc/testsuite/gcc.dg/avoid-store-forwarding-1.c
b/gcc/testsuite/gcc.dg/avoid-store-forwarding-1.c
new file mode 100644
index 000000000000..f9f0e392df4a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/avoid-store-forwarding-1.c
@@ -0,0 +1,31 @@
+/* PR rtl-optimization/124476 */
+/* { dg-do run } */
+/* { dg-options "-O1 -favoid-store-forwarding" } */
+
+/* Verify that overlapping stores are handled correctly.
+ The avoid-store-forwarding pass must respect "last writer wins"
+ semantics when stores have partially overlapping byte ranges. */
+
+int y;
+
+__attribute__((noipa)) void
+foo (int a, long *ret)
+{
+ long f;
+ char c;
+ __builtin_memset (2 + (char *) &f, a, 6);
+ char n = *(char *) __builtin_memset (&c, a, 1);
+ __builtin_memmove (&f, &y, 4);
+ long r = f + a + n;
+ *ret = r;
+}
+
+int
+main ()
+{
+ long x;
+ foo (5, &x);
+ if (x != 0x050505050000000aL)
+ __builtin_abort ();
+ return 0;
+}
--
2.52.0