On Tue, Feb 04, 2025 at 01:11:10PM +0100, Richard Biener wrote:
> OK, fair enough.  I think there should be a comment indicating this
> isn't a full conservative approach but handling a certain class of
> accesses only, with the hope that this is all that's actually needed.

Ok, here is an updated patch.

cselib_invalidate_mem (callmem[1]) is now done solely for
!ACCUMULATE_OUTGOING_ARGS || cfun->calls_alloca functions, I've
renamed the variables to x_{base,off} and sp_{base,off} -
the first pair is for x's address, the second pair is for current
value of stack pointer, tweaked/extended comments and sp_{base,off}
is now initialized just once per cselib_invalidate_mem call, though still
only lazily (if we actually need it for something).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-02-05  Jakub Jelinek  <ja...@redhat.com>

        PR rtl-optimization/117239
        * cselib.cc: Include predict.h.
        (callmem): Change type from rtx to rtx[2].
        (cselib_preserve_only_values): Use callmem[0] rather than callmem.
        (cselib_invalidate_mem): Optimize and don't try to invalidate
        for the mem_rtx == callmem[1] case MEMs which clearly can't be
        below the stack pointer.
        (cselib_process_insn): Use callmem[0] rather than callmem.
        For const/pure calls also call cselib_invalidate_mem (callmem[1])
        in !ACCUMULATE_OUTGOING_ARGS or cfun->calls_alloca functions.
        (cselib_init): Initialize callmem[0] rather than callmem and also
        initialize callmem[1].

        * gcc.dg/pr117239.c: New test.

--- gcc/cselib.cc.jj    2025-02-01 00:46:53.073275225 +0100
+++ gcc/cselib.cc       2025-02-05 09:15:47.394065496 +0100
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.
 #include "cselib.h"
 #include "function-abi.h"
 #include "alias.h"
+#include "predict.h"
 
 /* A list of cselib_val structures.  */
 struct elt_list
@@ -248,8 +249,9 @@ static unsigned int *used_regs;
 static unsigned int n_used_regs;
 
 /* We pass this to cselib_invalidate_mem to invalidate all of
-   memory for a non-const call instruction.  */
-static GTY(()) rtx callmem;
+   memory for a non-const call instruction and memory below stack pointer
+   for const/pure calls.  */
+static GTY(()) rtx callmem[2];
 
 /* Set by discard_useless_locs if it deleted the last location of any
    value.  */
@@ -808,7 +810,7 @@ cselib_preserve_only_values (void)
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     cselib_invalidate_regno (i, reg_raw_mode[i]);
 
-  cselib_invalidate_mem (callmem);
+  cselib_invalidate_mem (callmem[0]);
 
   remove_useless_values ();
 
@@ -2630,6 +2632,8 @@ cselib_invalidate_mem (rtx mem_rtx)
       struct elt_loc_list **p = &v->locs;
       bool had_locs = v->locs != NULL;
       rtx_insn *setting_insn = v->locs ? v->locs->setting_insn : NULL;
+      rtx sp_base = NULL_RTX;
+      HOST_WIDE_INT sp_off = 0;
 
       while (*p)
        {
@@ -2644,6 +2648,114 @@ cselib_invalidate_mem (rtx mem_rtx)
              p = &(*p)->next;
              continue;
            }
+
+         /* When invalidating memory below the stack pointer for const/pure
+            calls and alloca/VLAs aren't used, attempt to optimize.  Values
+            stored into area sometimes below the stack pointer shouldn't be
+            addressable and should be stored just through stack pointer
+            derived expressions, so don't invalidate MEMs not using stack
+            derived addresses, or if the MEMs clearly aren't below the stack
+            pointer.  This isn't a fully conservative approach, the hope is
+            that invalidating more MEMs than this isn't actually needed.  */
+         if (mem_rtx == callmem[1]
+             && num_mems < param_max_cselib_memory_locations
+             && GET_CODE (XEXP (x, 0)) == VALUE
+             && !cfun->calls_alloca)
+           {
+             cselib_val *v2 = CSELIB_VAL_PTR (XEXP (x, 0));
+             rtx x_base = NULL_RTX;
+             HOST_WIDE_INT x_off = 0;
+             if (SP_DERIVED_VALUE_P (v2->val_rtx))
+               x_base = v2->val_rtx;
+             else
+               for (struct elt_loc_list *l = v2->locs; l; l = l->next)
+                 if (GET_CODE (l->loc) == PLUS
+                     && GET_CODE (XEXP (l->loc, 0)) == VALUE
+                     && SP_DERIVED_VALUE_P (XEXP (l->loc, 0))
+                     && CONST_INT_P (XEXP (l->loc, 1)))
+                   {
+                     x_base = XEXP (l->loc, 0);
+                     x_off = INTVAL (XEXP (l->loc, 1));
+                     break;
+                   }
+             /* If x_base is NULL here, don't invalidate x as its address
+                isn't derived from sp such that it could be in outgoing
+                argument area of some call in !ACCUMULATE_OUTGOING_ARGS
+                function.  */
+             if (x_base)
+               {
+                 if (sp_base == NULL_RTX)
+                   {
+                     if (cselib_val *v3
+                         = cselib_lookup_1 (stack_pointer_rtx, Pmode, 0,
+                                            VOIDmode))
+                       {
+                         if (SP_DERIVED_VALUE_P (v3->val_rtx))
+                           sp_base = v3->val_rtx;
+                         else
+                           for (struct elt_loc_list *l = v3->locs;
+                                l; l = l->next)
+                             if (GET_CODE (l->loc) == PLUS
+                                 && GET_CODE (XEXP (l->loc, 0)) == VALUE
+                                 && SP_DERIVED_VALUE_P (XEXP (l->loc, 0))
+                                 && CONST_INT_P (XEXP (l->loc, 1)))
+                               {
+                                 sp_base = XEXP (l->loc, 0);
+                                 sp_off = INTVAL (XEXP (l->loc, 1));
+                                 break;
+                               }
+                       }
+                     if (sp_base == NULL_RTX)
+                       sp_base = pc_rtx;
+                   }
+                 /* Otherwise, if x_base and sp_base are the same,
+                    we know that x_base + x_off is the x's address and
+                    sp_base + sp_off is current value of stack pointer,
+                    so try to determine if x is certainly not below stack
+                    pointer.  */
+                 if (sp_base == x_base)
+                   {
+                     if (STACK_GROWS_DOWNWARD)
+                       {
+                         HOST_WIDE_INT off = sp_off;
+#ifdef STACK_ADDRESS_OFFSET
+                         /* On SPARC take stack pointer bias into account as
+                            well.  */
+                         off += (STACK_ADDRESS_OFFSET
+                                 - FIRST_PARM_OFFSET (current_function_decl));
+#endif
+                         if (x_off >= off)
+                           /* x is at or above the current stack pointer,
+                              no need to invalidate it.  */
+                           x_base = NULL_RTX;
+                       }
+                     else
+                       {
+                         HOST_WIDE_INT sz;
+                         enum machine_mode mode = GET_MODE (x);
+                         if ((MEM_SIZE_KNOWN_P (x)
+                              && MEM_SIZE (x).is_constant (&sz))
+                             || (mode != BLKmode
+                                 && GET_MODE_SIZE (mode).is_constant (&sz)))
+                           if (x_off < sp_off
+                               && ((HOST_WIDE_INT) ((unsigned HOST_WIDE_INT)
+                                                    x_off + sz) <= sp_off))
+                             /* x's end is below or at the current stack
+                                pointer in !STACK_GROWS_DOWNWARD target,
+                                no need to invalidate it.  */
+                             x_base = NULL_RTX;
+                        }
+                   }
+               }
+             if (x_base == NULL_RTX)
+               {
+                 has_mem = true;
+                 num_mems++;
+                 p = &(*p)->next;
+                 continue;
+               }
+           }
+
          if (num_mems < param_max_cselib_memory_locations
              && ! canon_anti_dependence (x, false, mem_rtx,
                                          GET_MODE (mem_rtx), mem_addr))
@@ -3196,14 +3308,24 @@ cselib_process_insn (rtx_insn *insn)
         as if they were regular functions.  */
       if (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
          || !(RTL_CONST_OR_PURE_CALL_P (insn)))
-       cselib_invalidate_mem (callmem);
+       cselib_invalidate_mem (callmem[0]);
       else
-       /* For const/pure calls, invalidate any argument slots because
-          they are owned by the callee.  */
-       for (x = CALL_INSN_FUNCTION_USAGE (insn); x; x = XEXP (x, 1))
-         if (GET_CODE (XEXP (x, 0)) == USE
-             && MEM_P (XEXP (XEXP (x, 0), 0)))
-           cselib_invalidate_mem (XEXP (XEXP (x, 0), 0));
+       {
+         /* For const/pure calls, invalidate any argument slots because
+            they are owned by the callee.  */
+         for (x = CALL_INSN_FUNCTION_USAGE (insn); x; x = XEXP (x, 1))
+           if (GET_CODE (XEXP (x, 0)) == USE
+               && MEM_P (XEXP (XEXP (x, 0), 0)))
+             cselib_invalidate_mem (XEXP (XEXP (x, 0), 0));
+         /* And invalidate memory below the stack (or above for
+            !STACK_GROWS_DOWNWARD), as even const/pure call can invalidate
+            that.  Do this only if !ACCUMULATE_OUTGOING_ARGS or if
+            cfun->calls_alloca, otherwise the stack pointer shouldn't be
+            changing in the middle of the function and nothing should be
+            stored below the stack pointer.  */
+         if (!ACCUMULATE_OUTGOING_ARGS || cfun->calls_alloca)
+           cselib_invalidate_mem (callmem[1]);
+       }
     }
 
   cselib_record_sets (insn);
@@ -3256,8 +3378,31 @@ cselib_init (int record_what)
 
   /* (mem:BLK (scratch)) is a special mechanism to conflict with everything,
      see canon_true_dependence.  This is only created once.  */
-  if (! callmem)
-    callmem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
+  if (! callmem[0])
+    callmem[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
+  /* Similarly create a MEM representing roughly everything below
+     the stack for STACK_GROWS_DOWNWARD targets or everything above
+     it otherwise.  Do this only when !ACCUMULATE_OUTGOING_ARGS or
+     if cfun->calls_alloca, otherwise the stack pointer shouldn't be
+     changing in the middle of the function and nothing should be stored
+     below the stack pointer.  */
+  if (!callmem[1] && (!ACCUMULATE_OUTGOING_ARGS || cfun->calls_alloca))
+    {
+      if (STACK_GROWS_DOWNWARD)
+       {
+         unsigned HOST_WIDE_INT off = -(GET_MODE_MASK (Pmode) >> 1);
+#ifdef STACK_ADDRESS_OFFSET
+         /* On SPARC take stack pointer bias into account as well.  */
+         off += (STACK_ADDRESS_OFFSET
+                 - FIRST_PARM_OFFSET (current_function_decl)));
+#endif
+         callmem[1] = plus_constant (Pmode, stack_pointer_rtx, off);
+       }
+      else
+       callmem[1] = stack_pointer_rtx;
+      callmem[1] = gen_rtx_MEM (BLKmode, callmem[1]);
+      set_mem_size (callmem[1], GET_MODE_MASK (Pmode) >> 1);
+    }
 
   cselib_nregs = max_reg_num ();
 
--- gcc/testsuite/gcc.dg/pr117239.c.jj  2025-02-03 11:13:59.399159640 +0100
+++ gcc/testsuite/gcc.dg/pr117239.c     2025-02-03 11:13:59.399159640 +0100
@@ -0,0 +1,42 @@
+/* PR rtl-optimization/117239 */
+/* { dg-do run } */
+/* { dg-options "-fno-inline -O2" } */
+/* { dg-additional-options "-fschedule-insns" { target i?86-*-* x86_64-*-* } } 
*/
+
+int a, b, c = 1, d;
+
+int
+foo (void)
+{
+  return a;
+}
+
+struct A {
+  int e, f, g, h;
+  short i;
+  int j;
+};
+
+void
+bar (int x, struct A y)
+{
+  if (y.j == 1)
+    c = 0;
+}
+
+int
+baz (struct A x)
+{
+  return b;
+}
+
+int
+main ()
+{
+  struct A k = { 0, 0, 0, 0, 0, 1 };
+  d = baz (k);
+  bar (foo (), k);
+  if (c != 0)
+    __builtin_abort ();
+  return 0;
+}


        Jakub

Reply via email to