https://gcc.gnu.org/g:c1eb02dea3558b2974b5621717a793a398f8855e

commit c1eb02dea3558b2974b5621717a793a398f8855e
Author: Alexandre Oliva <[email protected]>
Date:   Thu Dec 4 00:40:27 2025 -0300

    add fusage for non-ACCUMULATE_OUTGOING_ARGS calls [PR122947]
    
    Since we may delete stores that are found to be redundant in
    postreload cse, we need cselib to invalidate argument stores at calls,
    and to that end we need CALL_INSN_FUNCTION_USAGE to mention all MEM
    stack space that may be legitimately modified by a const/pure callee,
    i.e., all arguments passed to it on the stack.
    
    When ACCUMULATE_OUTGOING_ARGS, each on-stack argument gets its own
    usage information, but when it's not, each argument is pushed
    incrementally, without precomputed stack slots.
    
    Since we only mentioned such precomputed stack slots in
    CALL_INSN_FUNCTION_USAGE, non-ACCUMULATE_OUTGOING_ARGS configurations
    miss the stack usage data, and cselib fails to invalidate the stores.
    
    Stores in such slots are anonymous, and they often invalidate other
    anonymous slots, even part of the same object, but as the testcase
    demonstrates, we may occasionally be unlucky that consecutive calls
    have the stores to multi-word objects reordered by scheduling in such
    a way that the last store for the first call survives the call in the
    cselib tables, and then it is found to be redundant with the first
    store for the subsequent call, as in the testcase.
    
    So, if we haven't preallocated outgoing arguments for a call (which
    would give us preassigned stack slots), and we have used any stack
    space, add function call usage covering the entire stack range where
    arguments were stored.
    
    
    for  gcc/ChangeLog
    
            PR rtl-optimization/122947
            * calls.cc (expand_call): Add stack function usage in
            non-ACCUMULATE_OUTGOING_ARGS configurations.
    
    for  gcc/testsuite/ChangeLog
    
            PR rtl-optimization/122947
            * gcc.dg/pr122947.c: New.

Diff:
---
 gcc/calls.cc                    | 29 +++++++++++++++++++++++++-
 gcc/testsuite/gcc.dg/pr122947.c | 45 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/gcc/calls.cc b/gcc/calls.cc
index bb8a6d09f825..85d6ea413437 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -3579,7 +3579,8 @@ expand_call (tree exp, rtx target, int ignore)
                      && check_sibcall_argument_overlap (before_arg,
                                                         &args[i], true)))
                sibcall_failure = true;
-             }
+             gcc_checking_assert (!args[i].stack || argblock);
+           }
 
          if (args[i].stack)
            call_fusage
@@ -3676,6 +3677,32 @@ expand_call (tree exp, rtx target, int ignore)
          && !must_preallocate && reg_parm_stack_space > 0)
        anti_adjust_stack (GEN_INT (reg_parm_stack_space));
 
+      /* Cover pushed arguments with call usage, so that cselib knows to
+        invalidate the stores in them at the call insn.  */
+      if (pass == 1 && !argblock
+         && (maybe_ne (adjusted_args_size.constant, 0)
+             || adjusted_args_size.var))
+       {
+         rtx addr = virtual_outgoing_args_rtx;
+         poly_int64 size = adjusted_args_size.constant;
+         if (!STACK_GROWS_DOWNWARD)
+           {
+             if (adjusted_args_size.var)
+               /* ??? We can't compute the exact base address.  */
+               addr = gen_rtx_PLUS (GET_MODE (addr), addr,
+                                    gen_rtx_SCRATCH (GET_MODE (addr)));
+             else
+               addr = plus_constant (GET_MODE (addr), addr, -size);
+           }
+         rtx fu = gen_rtx_MEM (BLKmode, addr);
+         if (adjusted_args_size.var == 0)
+           set_mem_size (fu, size);
+         call_fusage
+           = gen_rtx_EXPR_LIST (BLKmode,
+                                gen_rtx_USE (VOIDmode, fu),
+                                call_fusage);
+       }
+
       /* Pass the function the address in which to return a
         structure value.  */
       if (pass != 0 && structure_value_addr && ! structure_value_addr_parm)
diff --git a/gcc/testsuite/gcc.dg/pr122947.c b/gcc/testsuite/gcc.dg/pr122947.c
new file mode 100644
index 000000000000..945a61a4c177
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr122947.c
@@ -0,0 +1,45 @@
+/* PR rtl-optimization/122947 based on PR 117239 */
+/* { dg-do run } */
+/* { dg-options "-fno-inline -O2" } */
+/* { dg-additional-options "-fschedule-insns -mno-accumulate-outgoing-args" { 
target x86 } } */
+
+int c = 1;
+
+struct A {
+  int e, f, g, h;
+  short i;
+  int j;
+};
+
+void
+bar (int x, struct A y)
+{
+  if (y.j == 1)
+    c = 0;
+}
+
+/* Simplest pure way to force baz's x.j back to memory.  
+   So simple that IPA "inlines" it, so we disable IPA and mark as pure.  */
+int __attribute__ ((noipa, pure))
+bad (struct A const *x)
+{
+  return x->j;
+}
+
+int
+baz (struct A x)
+{
+  x.j = 0;
+  return bad (&x);
+}
+
+int
+main ()
+{
+  struct A k = { 0, 0, 0, 0, 0, 1 };
+  int d = baz (k);
+  bar (0, k);
+  if (c + d != 0)
+    __builtin_abort ();
+  return 0;
+}

Reply via email to