https://gcc.gnu.org/g:4440e022d220c19358745a1ba7ccf6ae054ce347

commit r16-4200-g4440e022d220c19358745a1ba7ccf6ae054ce347
Author: Andrew Pinski <[email protected]>
Date:   Mon Sep 22 15:26:00 2025 -0700

    fab: rewrite optimize_stack_restore call check [PR122033]
    
    The call check in optimize_stack_restore is not the same as
    what is described in the comment before the function. It has never
    been the same even. It has always allowed all nromal builtins but not
    target builtins while the comment says no calls.
    
    This rewrites it to allow the following.
    * a restore right before a noreturn is fine to be removed as the noreturn
      function will do the restore (or exit the program).
    * Internal functions are ok because they will turn into an instruction or 2
    * Still specifically reject alloca and __scrub_leave
    * simple or inexpensive builtins (including target builtins)
    
    This will both reject some more locations but also accepts more locations 
due
    to the internal and target and noreturn function calls checks.
    
    Bootstrapped and tested on x86_64-linux-gnu.
    
    Changes since v1:
    * v2: Add comment about why restoring before calls is important.
    
            PR tree-optimization/122033
    gcc/ChangeLog:
    
            * tree-ssa-ccp.cc (optimize_stack_restore): Rewrite the call check.
            Update comment in the front to new rules on calls.
            * tree.h (fndecl_builtin_alloc_p): New function.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.dg/tree-ssa/pr122033-1.c: New test.
            * gcc.dg/tree-ssa/pr122033-2.c: New test.
    
    Signed-off-by: Andrew Pinski <[email protected]>

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/pr122033-1.c | 18 ++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr122033-2.c | 23 +++++++++++++++
 gcc/tree-ssa-ccp.cc                        | 46 ++++++++++++++++++++++--------
 gcc/tree.h                                 |  9 ++++++
 4 files changed, 84 insertions(+), 12 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr122033-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr122033-1.c
new file mode 100644
index 000000000000..4ef8c6c3150f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr122033-1.c
@@ -0,0 +1,18 @@
+/* PR middle-end/122033 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void bar1 (char *, int);
+void bar3(void) __attribute__((noreturn));
+void foo1 (int size)
+{
+  {
+    char temp[size];
+    temp[size-1] = '\0';
+    bar1 (temp, size);
+  }
+  bar3 ();
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_stack_save" "optimized"} } */
+/* { dg-final { scan-tree-dump-not "__builtin_stack_restore" "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr122033-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr122033-2.c
new file mode 100644
index 000000000000..f429324f64c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr122033-2.c
@@ -0,0 +1,23 @@
+/* PR middle-end/122033 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void g(int*);
+void h();
+double t;
+void f(int a, int b)
+{
+  {
+    int array0[a];
+    {
+      int array1[b];
+      g(array0);
+      g(array1);
+    }
+    t = __builtin_sin(t);
+  }
+  h ();
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 2 "optimized"} 
} */
diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc
index 6d9cbd82d07e..51e133e860de 100644
--- a/gcc/tree-ssa-ccp.cc
+++ b/gcc/tree-ssa-ccp.cc
@@ -3091,7 +3091,12 @@ make_pass_ccp (gcc::context *ctxt)
    if there is another __builtin_stack_restore in the same basic
    block and no calls or ASM_EXPRs are in between, or if this block's
    only outgoing edge is to EXIT_BLOCK and there are no calls or
-   ASM_EXPRs after this __builtin_stack_restore.  */
+   ASM_EXPRs after this __builtin_stack_restore.
+   Note restore right before a noreturn function is not needed.
+   And skip some cheap calls that will most likely become an instruction.
+   Restoring the stack before a call is important to be able to keep
+   stack usage down so that call does not run out of stack.  */
+
 
 static tree
 optimize_stack_restore (gimple_stmt_iterator i)
@@ -3111,26 +3116,43 @@ optimize_stack_restore (gimple_stmt_iterator i)
   for (gsi_next (&i); !gsi_end_p (i); gsi_next (&i))
     {
       stmt = gsi_stmt (i);
-      if (gimple_code (stmt) == GIMPLE_ASM)
+      if (is_a<gasm*> (stmt))
        return NULL_TREE;
-      if (gimple_code (stmt) != GIMPLE_CALL)
+      gcall *call = dyn_cast<gcall*>(stmt);
+      if (!call)
+       continue;
+
+      /* We can remove the restore in front of noreturn
+        calls.  Since the restore will happen either
+        via an unwind/longjmp or not at all. */
+      if (gimple_call_noreturn_p (call))
+       break;
+
+      /* Internal calls are ok, to bypass
+        check first since fndecl will be null. */
+      if (gimple_call_internal_p (call))
        continue;
 
-      callee = gimple_call_fndecl (stmt);
+      callee = gimple_call_fndecl (call);
+      /* Non-builtin calls are not ok. */
       if (!callee
-         || !fndecl_built_in_p (callee, BUILT_IN_NORMAL)
-         /* All regular builtins are ok, just obviously not alloca.  */
-         || ALLOCA_FUNCTION_CODE_P (DECL_FUNCTION_CODE (callee))
-         /* Do not remove stack updates before strub leave.  */
-         || fndecl_built_in_p (callee, BUILT_IN___STRUB_LEAVE))
+         || !fndecl_built_in_p (callee))
+       return NULL_TREE;
+
+      /* Do not remove stack updates before strub leave.  */
+      if (fndecl_built_in_p (callee, BUILT_IN___STRUB_LEAVE)
+         /* Alloca calls are not ok either. */
+         || fndecl_builtin_alloc_p (callee))
        return NULL_TREE;
 
       if (fndecl_built_in_p (callee, BUILT_IN_STACK_RESTORE))
        goto second_stack_restore;
-    }
 
-  if (!gsi_end_p (i))
-    return NULL_TREE;
+      /* If not a simple or inexpensive builtin, then it is not ok either. */
+      if (!is_simple_builtin (callee)
+         && !is_inexpensive_builtin (callee))
+       return NULL_TREE;
+    }
 
   /* Allow one successor of the exit block, or zero successors.  */
   switch (EDGE_COUNT (bb->succs))
diff --git a/gcc/tree.h b/gcc/tree.h
index 4c8ad9842ff0..6e46374357c1 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -7005,6 +7005,15 @@ fndecl_built_in_p (const_tree node, built_in_function 
name1, F... names)
                                        name1, names...));
 }
 
+/* Returns true if the function decl NODE is an alloca. */
+inline bool
+fndecl_builtin_alloc_p (const_tree node)
+{
+  if (!fndecl_built_in_p (node, BUILT_IN_NORMAL))
+    return false;
+  return ALLOCA_FUNCTION_CODE_P (DECL_FUNCTION_CODE (node));
+}
+
 /* A struct for encapsulating location information about an operator
    and the operation built from it.

Reply via email to