On Thu, Apr 11, 2019 at 09:54:24AM +0200, Richard Biener wrote:
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> > 
> > During those 2 bootstraps/regtests, data.load_found has been set just
> > on the new testcase on ia32.
> 
> Hmm, I wonder whether we really need to DCE calls after reload?
> That said, I'm not familiar enough with the code to check if the
> patch makes sense (can there ever be uses of the argument slots
> _after_ the call?).

And here is the patch on top of the refactoring patch.
As for the argument slots after the call, hope Jeff and Vlad won't mind if I
copy'n'paste what they said on IRC yesterday:
law: vmakarov: in PR89965, is it valid RTL to store something in stack argument 
slot and read it again before the call that uses that stack slot?
law: vmakarov: if yes, I have a rough idea what to do, but if stack argument 
slots may be only stored and can be read only by a callee and not by something 
else in the caller, then it is a RA issue
<vmakarov> jakub: i think it is not defined (or described). But the old reload 
used equiv memory for long time to store value in memory.  LRA just uses the 
same approach.
<law> i think you're allowed to read it up to the call point, who "owns" the 
contents of the slot after the call point is subject to debate :-)
<law> not sure if we defined things once a REG_EQUIV is on the MEM, but that 
would tend to imply the memory is unchanging across the call
<law> (though one could ask how in the world the caller would know the callee 
didn't clobber the slot....)

2019-04-11  Jakub Jelinek  <ja...@redhat.com>
        
        PR rtl-optimization/89965
        * dce.c: Include rtl-iter.h.
        (struct check_argument_load_data): New type.
        (check_argument_load): New function.
        (find_call_stack_args): Check for loads from stack slots still tracked
        in sp_bytes and punt if any is found.

        * gcc.target/i386/pr89965.c: New test.

--- gcc/dce.c.jj        2019-04-11 10:30:00.436705065 +0200
+++ gcc/dce.c   2019-04-11 10:43:00.926828390 +0200
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.
 #include "valtrack.h"
 #include "tree-pass.h"
 #include "dbgcnt.h"
+#include "rtl-iter.h"
 
 
 /* -------------------------------------------------------------------------
@@ -325,6 +326,48 @@ sp_based_mem_offset (rtx_call_insn *call
   return off;
 }
 
+/* Data for check_argument_load called via note_uses.  */
+struct check_argument_load_data {
+  bitmap sp_bytes;
+  HOST_WIDE_INT min_sp_off, max_sp_off;
+  rtx_call_insn *call_insn;
+  bool fast;
+  bool load_found;
+};
+
+/* Helper function for find_call_stack_args.  Check if there are
+   any loads from the argument slots in between the const/pure call
+   and store to the argument slot, set LOAD_FOUND if any is found.  */
+
+static void
+check_argument_load (rtx *loc, void *data)
+{
+  struct check_argument_load_data *d
+    = (struct check_argument_load_data *) data;
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, *loc, NONCONST)
+    {
+      const_rtx mem = *iter;
+      HOST_WIDE_INT size;
+      if (MEM_P (mem)
+         && MEM_SIZE_KNOWN_P (mem)
+         && MEM_SIZE (mem).is_constant (&size))
+       {
+         HOST_WIDE_INT off = sp_based_mem_offset (d->call_insn, mem, d->fast);
+         if (off != INTTYPE_MINIMUM (HOST_WIDE_INT)
+             && off < d->max_sp_off
+             && off + size > d->min_sp_off)
+           for (HOST_WIDE_INT byte = MAX (off, d->min_sp_off);
+                byte < MIN (off + size, d->max_sp_off); byte++)
+             if (bitmap_bit_p (d->sp_bytes, byte - d->min_sp_off))
+               {
+                 d->load_found = true;
+                 return;
+               }
+       }
+    }
+}
+
 /* Try to find all stack stores of CALL_INSN arguments if
    ACCUMULATE_OUTGOING_ARGS.  If all stack stores have been found
    and it is therefore safe to eliminate the call, return true,
@@ -396,6 +439,8 @@ find_call_stack_args (rtx_call_insn *cal
   /* Walk backwards, looking for argument stores.  The search stops
      when seeing another call, sp adjustment or memory store other than
      argument store.  */
+  struct check_argument_load_data data
+    = { sp_bytes, min_sp_off, max_sp_off, call_insn, fast, false };
   ret = false;
   for (insn = PREV_INSN (call_insn); insn; insn = prev_insn)
     {
@@ -414,6 +459,10 @@ find_call_stack_args (rtx_call_insn *cal
       if (!set || SET_DEST (set) == stack_pointer_rtx)
        break;
 
+      note_uses (&PATTERN (insn), check_argument_load, &data);
+      if (data.load_found)
+       break;
+
       if (!MEM_P (SET_DEST (set)))
        continue;
 
--- gcc/testsuite/gcc.target/i386/pr89965.c.jj  2019-04-11 10:28:56.211764660 
+0200
+++ gcc/testsuite/gcc.target/i386/pr89965.c     2019-04-11 10:28:56.211764660 
+0200
@@ -0,0 +1,39 @@
+/* PR rtl-optimization/89965 */
+/* { dg-do run } */
+/* { dg-options "-O -mtune=nano-x2 -fcaller-saves -fexpensive-optimizations 
-fno-tree-dce -fno-tree-ter" } */
+/* { dg-additional-options "-march=i386" { target ia32 } } */
+
+int a;
+
+__attribute__ ((noipa)) unsigned long long
+foo (unsigned char c, unsigned d, unsigned e, unsigned long long f,
+     unsigned char g, unsigned h, unsigned long long i)
+{
+  (void) d;
+  unsigned short j = __builtin_mul_overflow_p (~0, h, c);
+  e <<= e;
+  i >>= 7;
+  c *= i;
+  i /= 12;
+  a = __builtin_popcount (c);
+  __builtin_add_overflow (e, a, &f);
+  return c + f + g + j + h;
+}
+
+__attribute__ ((noipa)) void
+bar (void)
+{
+  char buf[64];
+  __builtin_memset (buf, 0x55, sizeof buf);
+  asm volatile ("" : : "r" (&buf[0]) : "memory");
+}
+
+int
+main (void)
+{
+  bar ();
+  unsigned long long x = foo (2, 0, 0, 0, 0, 0, 0);
+  if (x != 0)
+    __builtin_abort ();
+  return 0;
+}

        Jakub

Reply via email to