From: Philipp Tomsich <[email protected]>

The redundant-store tracking introduced in ec5349c37af replaced a
safe bitmap_bit_in_range_p check (which bailed on any overlap) with
bitmap_all_bits_in_range_p (which only removed fully redundant stores).
This broke "last writer wins" semantics for partially overlapping
stores: when an earlier store's BFI was applied after the base store's
value, it overwrote bytes that should have belonged to the later store.

Restore the original overlap check from 1d8de1e93ea: bail out of the
optimization when any bit in a store's byte range is already claimed
by a later store in program order.  Remove the now-unnecessary
redundant-store tracking (redundant_stores, store_ind_to_remove).

gcc/ChangeLog:

        PR rtl-optimization/124476
        * avoid-store-forwarding.cc
        (store_forwarding_analyzer::process_store_forwarding): Replace
        bitmap_all_bits_in_range_p with bitmap_any_bit_in_range_p and
        return false on partial overlap.  Remove redundant-store vectors
        and their associated removal, dump, and deletion logic.

gcc/testsuite/ChangeLog:

        PR rtl-optimization/124476
        * gcc.dg/avoid-store-forwarding-1.c: New test.
---
 gcc/avoid-store-forwarding.cc                 | 43 ++++---------------
 .../gcc.dg/avoid-store-forwarding-1.c         | 31 +++++++++++++
 2 files changed, 39 insertions(+), 35 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/avoid-store-forwarding-1.c

diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc
index 75ba30a62eb5..260c135c219f 100644
--- a/gcc/avoid-store-forwarding.cc
+++ b/gcc/avoid-store-forwarding.cc
@@ -171,28 +171,23 @@ process_store_forwarding (vec<store_fwd_info> &stores, 
rtx_insn *load_insn,
   /* Memory sizes should be constants at this stage.  */
   HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant ();
 
-  /* If the stores cover all the bytes of the load, then we can eliminate
-     the load entirely and use the computed value instead.
-     We can also eliminate stores on addresses that are overwritten
-     by later stores.  */
+  /* If the stores cover all the bytes of the load without overlap then we can
+     eliminate the load entirely and use the computed value instead.
+     Bail out when partially overlapping stores are detected, as the pass
+     cannot correctly handle "last writer wins" semantics for the
+     overlapping byte ranges (see PR124476).  */
 
   auto_sbitmap forwarded_bytes (load_size);
   bitmap_clear (forwarded_bytes);
 
   unsigned int i;
   store_fwd_info* it;
-  auto_vec<store_fwd_info> redundant_stores;
-  auto_vec<int> store_ind_to_remove;
   FOR_EACH_VEC_ELT (stores, i, it)
     {
       HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant ();
-      if (bitmap_all_bits_in_range_p (forwarded_bytes, it->offset,
-                                     it->offset + store_size - 1))
-       {
-         redundant_stores.safe_push (*it);
-         store_ind_to_remove.safe_push (i);
-         continue;
-       }
+      if (bitmap_any_bit_in_range_p (forwarded_bytes, it->offset,
+                                it->offset + store_size - 1))
+       return false;
       bitmap_set_range (forwarded_bytes, it->offset, store_size);
     }
 
@@ -218,15 +213,6 @@ process_store_forwarding (vec<store_fwd_info> &stores, 
rtx_insn *load_insn,
        fprintf (dump_file, "(Load elimination candidate)\n");
     }
 
-  /* Remove redundant stores from the vector.  Although this is quadratic,
-     there doesn't seem to be much point optimizing it.  The number of
-     redundant stores is expected to be low and the length of the list is
-     limited by a --param.  The dependence checking that we did earlier is
-     also quadratic in the size of this list.  */
-  store_ind_to_remove.reverse ();
-  for (int i : store_ind_to_remove)
-    stores.ordered_remove (i);
-
   rtx load = single_set (load_insn);
   rtx dest;
 
@@ -413,15 +399,6 @@ process_store_forwarding (vec<store_fwd_info> &stores, 
rtx_insn *load_insn,
            }
        }
 
-      if (redundant_stores.length () > 0)
-       {
-         fprintf (dump_file, "\nRedundant stores that have been removed:\n");
-         FOR_EACH_VEC_ELT (redundant_stores, i, it)
-           {
-             fprintf (dump_file, "  ");
-             print_rtl_single (dump_file, it->store_insn);
-           }
-       }
     }
 
   stats_sf_avoided++;
@@ -441,10 +418,6 @@ process_store_forwarding (vec<store_fwd_info> &stores, 
rtx_insn *load_insn,
       delete_insn (it->store_insn);
     }
 
-  /* Delete redundant stores.  */
-  FOR_EACH_VEC_ELT (redundant_stores, i, it)
-    delete_insn (it->store_insn);
-
   df_insn_rescan (load_insn);
 
   if (load_elim)
diff --git a/gcc/testsuite/gcc.dg/avoid-store-forwarding-1.c 
b/gcc/testsuite/gcc.dg/avoid-store-forwarding-1.c
new file mode 100644
index 000000000000..f9f0e392df4a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/avoid-store-forwarding-1.c
@@ -0,0 +1,31 @@
+/* PR rtl-optimization/124476 */
+/* { dg-do run } */
+/* { dg-options "-O1 -favoid-store-forwarding" } */
+
+/* Verify that overlapping stores are handled correctly.
+   The avoid-store-forwarding pass must respect "last writer wins"
+   semantics when stores have partially overlapping byte ranges.  */
+
+int y;
+
+__attribute__((noipa)) void
+foo (int a, long *ret)
+{
+  long f;
+  char c;
+  __builtin_memset (2 + (char *) &f, a, 6);
+  char n = *(char *) __builtin_memset (&c, a, 1);
+  __builtin_memmove (&f, &y, 4);
+  long r = f + a + n;
+  *ret = r;
+}
+
+int
+main ()
+{
+  long x;
+  foo (5, &x);
+  if (x != 0x050505050000000aL)
+    __builtin_abort ();
+  return 0;
+}
-- 
2.52.0

Reply via email to