When having multiple stores with the same offset as the load, in the
case that we are eliminating the load, we were generating a mov instruction
for both of them, leading to the overwrite of the register containing the
loaded value.

This patch fixes this issue by generating a mov instruction only for the
first store in the store-load sequence that has the same offset as the load.
For the next ones that might be encountered, we use bit-field insertion.

Bootstrapped/regtested on AArch64 and x86_64.

        PR rtl-optimization/120660

gcc/ChangeLog:

        * avoid-store-forwarding.cc (process_store_forwarding):
        Fix instruction generation when haveing multiple stores with
        base offset.

gcc/testsuite/ChangeLog:

        * gcc.dg/pr120660.c: New test.
---
 gcc/avoid-store-forwarding.cc   | 35 +++++++++++++++++++++++++--------
 gcc/testsuite/gcc.dg/pr120660.c | 19 ++++++++++++++++++
 2 files changed, 46 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr120660.c

diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc
index 785efd22606f..b62fb61539bd 100644
--- a/gcc/avoid-store-forwarding.cc
+++ b/gcc/avoid-store-forwarding.cc
@@ -231,20 +231,39 @@ process_store_forwarding (vec<store_fwd_info> &stores, 
rtx_insn *load_insn,
 
   int move_to_front = -1;
   int total_cost = 0;
+  int base_offset_index = -1;
+
+  /* Find the last store that has the same offset the load, in the case that
+     we're eliminating the load.  We will try to use it as a base register
+     to avoid bit inserts (see second loop below).  We want the last one, as
+     it will be wider and we don't want to overwrite the base register if
+     there are many of them.  */
+  if (load_elim)
+    {
+      FOR_EACH_VEC_ELT_REVERSE (stores, i, it)
+       {
+         const bool has_base_offset
+           = known_eq (poly_uint64 (it->offset),
+                       subreg_size_lowpart_offset (MEM_SIZE (it->store_mem),
+                                                   load_size));
+         if (has_base_offset)
+           {
+             base_offset_index = i;
+             break;
+           }
+       }
+    }
 
   /* Check if we can emit bit insert instructions for all forwarded stores.  */
   FOR_EACH_VEC_ELT (stores, i, it)
     {
       it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem));
       rtx_insn *insns = NULL;
-      const bool has_base_offset
-       = known_eq (poly_uint64 (it->offset),
-                   subreg_size_lowpart_offset (MEM_SIZE (it->store_mem),
-                                               load_size));
-
-      /* If we're eliminating the load then find the store with zero offset
-        and use it as the base register to avoid a bit insert if possible.  */
-      if (load_elim && has_base_offset)
+
+      /* Check if this is a store with base offset, if we're eliminating the
+        load, and use it as the base register to avoid a bit insert if
+        possible.  Load elimination is implied by base_offset_index != -1.  */
+      if (i == (unsigned) base_offset_index)
        {
          start_sequence ();
 
diff --git a/gcc/testsuite/gcc.dg/pr120660.c b/gcc/testsuite/gcc.dg/pr120660.c
new file mode 100644
index 000000000000..6e8c5e88d00e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr120660.c
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+/* { dg-options "-O -favoid-store-forwarding" } */
+
+int c;
+
+short
+foo (short s)
+{
+  __builtin_memset (&s, c, 1);
+  return s;
+}
+
+int
+main ()
+{
+  short x = foo (0x1111);
+  if (x != 0x1100 && x != 0x0011)
+    __builtin_abort();
+}
-- 
2.50.1

Reply via email to