On Thu, Apr 11, 2019 at 09:54:24AM +0200, Richard Biener wrote: > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > > > During those 2 bootstraps/regtests, data.load_found has been set just > > on the new testcase on ia32. > > Hmm, I wonder whether we really need to DCE calls after reload? > That said, I'm not familiar enough with the code to check if the > patch makes sense (can there ever be uses of the argument slots > _after_ the call?).
And here is the patch on top of the refactoring patch. As for the argument slots after the call, hope Jeff and Vlad won't mind if I copy'n'paste what they said on IRC yesterday: law: vmakarov: in PR89965, is it valid RTL to store something in stack argument slot and read it again before the call that uses that stack slot? law: vmakarov: if yes, I have a rough idea what to do, but if stack argument slots may be only stored and can be read only by a callee and not by something else in the caller, then it is a RA issue <vmakarov> jakub: i think it is not defined (or described). But the old reload used equiv memory for long time to store value in memory. LRA just uses the same approach. <law> i think you're allowed to read it up to the call point, who "owns" the contents of the slot after the call point is subject to debate :-) <law> not sure if we defined things once a REG_EQUIV is on the MEM, but that would tend to imply the memory is unchanging across the call <law> (though one could ask how in the world the caller would know the callee didn't clobber the slot....) 2019-04-11 Jakub Jelinek <ja...@redhat.com> PR rtl-optimization/89965 * dce.c: Include rtl-iter.h. (struct check_argument_load_data): New type. (check_argument_load): New function. (find_call_stack_args): Check for loads from stack slots still tracked in sp_bytes and punt if any is found. * gcc.target/i386/pr89965.c: New test. --- gcc/dce.c.jj 2019-04-11 10:30:00.436705065 +0200 +++ gcc/dce.c 2019-04-11 10:43:00.926828390 +0200 @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. #include "valtrack.h" #include "tree-pass.h" #include "dbgcnt.h" +#include "rtl-iter.h" /* ------------------------------------------------------------------------- @@ -325,6 +326,48 @@ sp_based_mem_offset (rtx_call_insn *call return off; } +/* Data for check_argument_load called via note_uses. */ +struct check_argument_load_data { + bitmap sp_bytes; + HOST_WIDE_INT min_sp_off, max_sp_off; + rtx_call_insn *call_insn; + bool fast; + bool load_found; +}; + +/* Helper function for find_call_stack_args. Check if there are + any loads from the argument slots in between the const/pure call + and store to the argument slot, set LOAD_FOUND if any is found. */ + +static void +check_argument_load (rtx *loc, void *data) +{ + struct check_argument_load_data *d + = (struct check_argument_load_data *) data; + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, *loc, NONCONST) + { + const_rtx mem = *iter; + HOST_WIDE_INT size; + if (MEM_P (mem) + && MEM_SIZE_KNOWN_P (mem) + && MEM_SIZE (mem).is_constant (&size)) + { + HOST_WIDE_INT off = sp_based_mem_offset (d->call_insn, mem, d->fast); + if (off != INTTYPE_MINIMUM (HOST_WIDE_INT) + && off < d->max_sp_off + && off + size > d->min_sp_off) + for (HOST_WIDE_INT byte = MAX (off, d->min_sp_off); + byte < MIN (off + size, d->max_sp_off); byte++) + if (bitmap_bit_p (d->sp_bytes, byte - d->min_sp_off)) + { + d->load_found = true; + return; + } + } + } +} + /* Try to find all stack stores of CALL_INSN arguments if ACCUMULATE_OUTGOING_ARGS. If all stack stores have been found and it is therefore safe to eliminate the call, return true, @@ -396,6 +439,8 @@ find_call_stack_args (rtx_call_insn *cal /* Walk backwards, looking for argument stores. The search stops when seeing another call, sp adjustment or memory store other than argument store. */ + struct check_argument_load_data data + = { sp_bytes, min_sp_off, max_sp_off, call_insn, fast, false }; ret = false; for (insn = PREV_INSN (call_insn); insn; insn = prev_insn) { @@ -414,6 +459,10 @@ find_call_stack_args (rtx_call_insn *cal if (!set || SET_DEST (set) == stack_pointer_rtx) break; + note_uses (&PATTERN (insn), check_argument_load, &data); + if (data.load_found) + break; + if (!MEM_P (SET_DEST (set))) continue; --- gcc/testsuite/gcc.target/i386/pr89965.c.jj 2019-04-11 10:28:56.211764660 +0200 +++ gcc/testsuite/gcc.target/i386/pr89965.c 2019-04-11 10:28:56.211764660 +0200 @@ -0,0 +1,39 @@ +/* PR rtl-optimization/89965 */ +/* { dg-do run } */ +/* { dg-options "-O -mtune=nano-x2 -fcaller-saves -fexpensive-optimizations -fno-tree-dce -fno-tree-ter" } */ +/* { dg-additional-options "-march=i386" { target ia32 } } */ + +int a; + +__attribute__ ((noipa)) unsigned long long +foo (unsigned char c, unsigned d, unsigned e, unsigned long long f, + unsigned char g, unsigned h, unsigned long long i) +{ + (void) d; + unsigned short j = __builtin_mul_overflow_p (~0, h, c); + e <<= e; + i >>= 7; + c *= i; + i /= 12; + a = __builtin_popcount (c); + __builtin_add_overflow (e, a, &f); + return c + f + g + j + h; +} + +__attribute__ ((noipa)) void +bar (void) +{ + char buf[64]; + __builtin_memset (buf, 0x55, sizeof buf); + asm volatile ("" : : "r" (&buf[0]) : "memory"); +} + +int +main (void) +{ + bar (); + unsigned long long x = foo (2, 0, 0, 0, 0, 0, 0); + if (x != 0) + __builtin_abort (); + return 0; +} Jakub