In the example below, 'x' is once placed on the stack frame and then read
into registers as the argument value of bar():

    /* example */
    struct foo {
      int a, b;
    };
    extern struct foo bar(struct foo);
    struct foo test(void) {
      struct foo x = { 0, 1 };
      return bar(x);
    }

Thanks to the dead store elimination, the initialization of 'x' turns into
merely loading the immediates to registers, but corresponding stack frame
growth is not rolled back.  As a result:

    ;; prereq: the CALL0 ABI
    ;; before
    test:
        addi    sp, sp, -16     // unused stack frame allocation/freeing
        movi.n  a2, 0
        movi.n  a3, 1
        addi    sp, sp, 16      // because no instructions that refer to
        j.l     bar, a9         // the stack pointer between the two

This patch eliminates such unused stack frame allocation/freeing:

    ;; after
    test:
        movi.n  a2, 0
        movi.n  a3, 1
        j.l     bar, a9

gcc/ChangeLog:

        * config/xtensa/xtensa.cc (machine_function): New member to track
        the insns for stack pointer adjustment inside of the pro/epilogue.
        (xtensa_emit_adjust_stack_pointer): New function to share the
        common codes and to record the insns for stack pointer adjustment.
        (xtensa_expand_prologue): Change to use the function mentioned
        above when using the CALL0 ABI.
        (xtensa_expand_prologue): Ditto.
        And also change to cancel emitting the insns for the stack pointer
        adjustment if only used for its own.
---
 gcc/config/xtensa/xtensa.cc | 221 ++++++++++++++++++------------------
 1 file changed, 110 insertions(+), 111 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index b673b6764da..cd509876fd2 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -102,6 +102,7 @@ struct GTY(()) machine_function
   int callee_save_size;
   bool frame_laid_out;
   bool epilogue_done;
+  hash_set<rtx_insn *> *logues_a1_adjusts;
 };
 
 /* Vector, indexed by hard register number, which contains 1 for a
@@ -3048,7 +3049,7 @@ xtensa_output_literal (FILE *file, rtx x, machine_mode 
mode, int labelno)
 }
 
 static bool
-xtensa_call_save_reg(int regno)
+xtensa_call_save_reg (int regno)
 {
   if (TARGET_WINDOWED_ABI)
     return false;
@@ -3084,7 +3085,7 @@ compute_frame_size (poly_int64 size)
   cfun->machine->callee_save_size = 0;
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
     {
-      if (xtensa_call_save_reg(regno))
+      if (xtensa_call_save_reg (regno))
        cfun->machine->callee_save_size += UNITS_PER_WORD;
     }
 
@@ -3143,6 +3144,49 @@ xtensa_initial_elimination_offset (int from, int to 
ATTRIBUTE_UNUSED)
    and the total number of words must be a multiple of 128 bits.  */
 #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
 
+static rtx_insn *
+xtensa_emit_adjust_stack_pointer (HOST_WIDE_INT offset, bool need_note)
+{
+  rtx_insn *insn;
+
+  if (xtensa_simm8 (offset)
+      || xtensa_simm8x256 (offset))
+    insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+                                 GEN_INT (offset)));
+  else
+    {
+      rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+      rtx_insn* tmp_insn;
+
+      if (offset < 0)
+       {
+         tmp_insn = emit_move_insn (tmp_reg, GEN_INT (-offset));
+         insn = emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx,
+                                       tmp_reg));
+       }
+      else
+       {
+         tmp_insn = emit_move_insn (tmp_reg, GEN_INT (offset));
+         insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+                                       tmp_reg));
+       }
+      cfun->machine->logues_a1_adjusts->add (tmp_insn);
+    }
+
+  if (need_note)
+    {
+      rtx note_rtx = gen_rtx_SET (stack_pointer_rtx,
+                                 plus_constant (Pmode, stack_pointer_rtx,
+                                                offset));
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+    }
+
+  cfun->machine->logues_a1_adjusts->add (insn);
+  return insn;
+}
+
 void
 xtensa_expand_prologue (void)
 {
@@ -3175,16 +3219,12 @@ xtensa_expand_prologue (void)
       HOST_WIDE_INT offset = 0;
       int callee_save_size = cfun->machine->callee_save_size;
 
+      cfun->machine->logues_a1_adjusts = new hash_set<rtx_insn *>;
+
       /* -128 is a limit of single addi instruction. */
       if (IN_RANGE (total_size, 1, 128))
        {
-         insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
-                                       GEN_INT (-total_size)));
-         RTX_FRAME_RELATED_P (insn) = 1;
-         note_rtx = gen_rtx_SET (stack_pointer_rtx,
-                                 plus_constant (Pmode, stack_pointer_rtx,
-                                                -total_size));
-         add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+         insn = xtensa_emit_adjust_stack_pointer (-total_size, true);
          offset = total_size - UNITS_PER_WORD;
        }
       else if (callee_save_size)
@@ -3194,75 +3234,34 @@ xtensa_expand_prologue (void)
           * move it to its final location. */
          if (total_size > 1024)
            {
-             insn = emit_insn (gen_addsi3 (stack_pointer_rtx, 
stack_pointer_rtx,
-                                           GEN_INT (-callee_save_size)));
-             RTX_FRAME_RELATED_P (insn) = 1;
-             note_rtx = gen_rtx_SET (stack_pointer_rtx,
-                                     plus_constant (Pmode, stack_pointer_rtx,
-                                                    -callee_save_size));
-             add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+             insn = xtensa_emit_adjust_stack_pointer (-callee_save_size,
+                                                      true);
              offset = callee_save_size - UNITS_PER_WORD;
            }
          else
            {
-             if (xtensa_simm8x256 (-total_size))
-               insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
-                                             stack_pointer_rtx,
-                                             GEN_INT (-total_size)));
-             else
-               {
-                 rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-                 emit_move_insn (tmp_reg, GEN_INT (total_size));
-                 insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
-                                               stack_pointer_rtx, tmp_reg));
-               }
-             RTX_FRAME_RELATED_P (insn) = 1;
-             note_rtx = gen_rtx_SET (stack_pointer_rtx,
-                                     plus_constant (Pmode, stack_pointer_rtx,
-                                                    -total_size));
-             add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+             insn = xtensa_emit_adjust_stack_pointer (-total_size, true);
              offset = total_size - UNITS_PER_WORD;
            }
        }
 
       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
-       {
-         if (xtensa_call_save_reg(regno))
-           {
-             rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
-             rtx mem = gen_frame_mem (SImode, x);
-             rtx reg = gen_rtx_REG (SImode, regno);
-
-             offset -= UNITS_PER_WORD;
-             insn = emit_move_insn (mem, reg);
-             RTX_FRAME_RELATED_P (insn) = 1;
-             add_reg_note (insn, REG_FRAME_RELATED_EXPR,
-                           gen_rtx_SET (mem, reg));
-           }
-       }
+       if (xtensa_call_save_reg (regno))
+         {
+           rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
+           rtx mem = gen_frame_mem (SImode, x);
+           rtx reg = gen_rtx_REG (SImode, regno);
+
+           offset -= UNITS_PER_WORD;
+           insn = emit_move_insn (mem, reg);
+           RTX_FRAME_RELATED_P (insn) = 1;
+           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+                         gen_rtx_SET (mem, reg));
+         }
       if (total_size > 1024
          || (!callee_save_size && total_size > 128))
-       {
-         if (xtensa_simm8x256 (callee_save_size - total_size))
-           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
-                                         stack_pointer_rtx,
-                                         GEN_INT (callee_save_size -
-                                                  total_size)));
-         else
-           {
-             rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-             emit_move_insn (tmp_reg, GEN_INT (total_size -
-                                               callee_save_size));
-             insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
-                                           stack_pointer_rtx, tmp_reg));
-           }
-         RTX_FRAME_RELATED_P (insn) = 1;
-         note_rtx = gen_rtx_SET (stack_pointer_rtx,
-                                 plus_constant (Pmode, stack_pointer_rtx,
-                                                callee_save_size -
-                                                total_size));
-         add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
-       }
+       insn = xtensa_emit_adjust_stack_pointer (callee_save_size - total_size,
+                                                true);
     }
 
   if (frame_pointer_needed)
@@ -3326,24 +3325,14 @@ xtensa_expand_epilogue (bool sibcall_p)
     {
       int regno;
       HOST_WIDE_INT offset;
+      df_ref ref;
+      bool stack_pointer_used = false;
 
       if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127 : 
1024))
        {
-         if (xtensa_simm8x256 (cfun->machine->current_frame_size -
-                               cfun->machine->callee_save_size))
-           emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
-                                  hard_frame_pointer_rtx : stack_pointer_rtx,
-                                  GEN_INT (cfun->machine->current_frame_size -
-                                           cfun->machine->callee_save_size)));
-         else
-           {
-             rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-             emit_move_insn (tmp_reg, GEN_INT 
(cfun->machine->current_frame_size -
-                                               
cfun->machine->callee_save_size));
-             emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
-                                    hard_frame_pointer_rtx : stack_pointer_rtx,
-                                    tmp_reg));
-           }
+         xtensa_emit_adjust_stack_pointer (cfun->machine->current_frame_size -
+                                           cfun->machine->callee_save_size,
+                                           false);
          offset = cfun->machine->callee_save_size - UNITS_PER_WORD;
        }
       else
@@ -3359,19 +3348,18 @@ xtensa_expand_epilogue (bool sibcall_p)
        emit_insn (gen_blockage ());
 
       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
-       {
-         if (xtensa_call_save_reg(regno))
-           {
-             rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
-             rtx reg;
-
-             offset -= UNITS_PER_WORD;
-             emit_move_insn (reg = gen_rtx_REG (SImode, regno),
-                             gen_frame_mem (SImode, x));
-             if (regno == A0_REG && sibcall_p)
-               emit_use (reg);
-           }
-       }
+       if (xtensa_call_save_reg (regno))
+         {
+           rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
+           rtx reg;
+
+           offset -= UNITS_PER_WORD;
+           emit_move_insn (reg = gen_rtx_REG (SImode, regno),
+                           gen_frame_mem (SImode, x));
+           if (regno == A0_REG && sibcall_p)
+             emit_use (reg);
+           stack_pointer_used = true;
+         }
 
       if (cfun->machine->current_frame_size > 0)
        {
@@ -3384,31 +3372,42 @@ xtensa_expand_epilogue (bool sibcall_p)
              else
                offset = cfun->machine->callee_save_size;
              if (offset)
-               emit_insn (gen_addsi3 (stack_pointer_rtx,
-                                      stack_pointer_rtx,
-                                      GEN_INT (offset)));
+               xtensa_emit_adjust_stack_pointer (offset, false);
            }
          else
-           {
-             if (xtensa_simm8x256 (cfun->machine->current_frame_size))
-               emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
-                                      GEN_INT 
(cfun->machine->current_frame_size)));
-             else
-               {
-                 rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-                 emit_move_insn (tmp_reg,
-                                 GEN_INT (cfun->machine->current_frame_size));
-                 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
-                                        tmp_reg));
-               }
-           }
+           xtensa_emit_adjust_stack_pointer (cfun->machine->current_frame_size,
+                                             false);
        }
 
       if (crtl->calls_eh_return)
        emit_insn (gen_add3_insn (stack_pointer_rtx,
                                  stack_pointer_rtx,
                                  EH_RETURN_STACKADJ_RTX));
+
+      /* Check if the function body uses the stack pointer.  */
+      for (ref = DF_REG_USE_CHAIN (A1_REG);
+          ref; ref = DF_REF_NEXT_REG (ref))
+       if (DF_REF_CLASS (ref) == DF_REF_REGULAR)
+         {
+           stack_pointer_used = true;
+           break;
+         }
+
+      /* Undo the insns if the stack pointer is only used for its own
+        adjustment.  */
+      if (! (stack_pointer_used
+            || frame_pointer_needed
+            || crtl->calls_eh_return))
+       for (const auto &insn : *cfun->machine->logues_a1_adjusts)
+         {
+           rtx note = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL);
+
+           if (note)
+             remove_note (insn, note);
+           PATTERN (insn) = const0_rtx;
+         }
     }
+
   cfun->machine->epilogue_done = true;
   if (!sibcall_p)
     emit_jump_insn (gen_return ());
-- 
2.20.1

Reply via email to