This patch enforces the use of "addmi" machine instruction instead of
addition/subtraction with two source registers for adjusting the stack
pointer, if the adjustment fits into a signed 16-bit and is also a multiple
of 256.

    /* example */
    void test(void) {
      char buffer[4096];
      __asm__(""::"m"(buffer));
    }

    ;; before
    test:
        movi.n  a9, 1
        slli    a9, a9, 12
        sub     sp, sp, a9
        movi.n  a9, 1
        slli    a9, a9, 12
        add.n   sp, sp, a9
        addi    sp, sp, 0
        ret.n

    ;; after
    test:
        addmi   sp, sp, -0x1000
        addmi   sp, sp, 0x1000
        ret.n

gcc/ChangeLog:

        * config/xtensa/xtensa.cc (xtensa_expand_prologue):
        Use an "addmi" machine instruction for updating the stack pointer
        rather than addition/subtraction via hard register A9, if the amount
        of change satisfies the literal value conditions of that instruction
        when the CALL0 ABI is used.
        (xtensa_expand_epilogue): Ditto.
        And also inhibit the stack pointer addition of constant zero.
---
 gcc/config/xtensa/xtensa.cc | 79 +++++++++++++++++++++++++------------
 1 file changed, 54 insertions(+), 25 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 6ac879c38fb..b673b6764da 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -3150,7 +3150,6 @@ xtensa_expand_prologue (void)
   rtx_insn *insn = NULL;
   rtx note_rtx;
 
-
   total_size = compute_frame_size (get_frame_size ());
 
   if (flag_stack_usage_info)
@@ -3206,10 +3205,17 @@ xtensa_expand_prologue (void)
            }
          else
            {
-             rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-             emit_move_insn (tmp_reg, GEN_INT (total_size));
-             insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
-                                           stack_pointer_rtx, tmp_reg));
+             if (xtensa_simm8x256 (-total_size))
+               insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+                                             stack_pointer_rtx,
+                                             GEN_INT (-total_size)));
+             else
+               {
+                 rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+                 emit_move_insn (tmp_reg, GEN_INT (total_size));
+                 insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
+                                               stack_pointer_rtx, tmp_reg));
+               }
              RTX_FRAME_RELATED_P (insn) = 1;
              note_rtx = gen_rtx_SET (stack_pointer_rtx,
                                      plus_constant (Pmode, stack_pointer_rtx,
@@ -3237,11 +3243,19 @@ xtensa_expand_prologue (void)
       if (total_size > 1024
          || (!callee_save_size && total_size > 128))
        {
-         rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-         emit_move_insn (tmp_reg, GEN_INT (total_size -
-                                           callee_save_size));
-         insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
-                                       stack_pointer_rtx, tmp_reg));
+         if (xtensa_simm8x256 (callee_save_size - total_size))
+           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+                                         stack_pointer_rtx,
+                                         GEN_INT (callee_save_size -
+                                                  total_size)));
+         else
+           {
+             rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+             emit_move_insn (tmp_reg, GEN_INT (total_size -
+                                               callee_save_size));
+             insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
+                                           stack_pointer_rtx, tmp_reg));
+           }
          RTX_FRAME_RELATED_P (insn) = 1;
          note_rtx = gen_rtx_SET (stack_pointer_rtx,
                                  plus_constant (Pmode, stack_pointer_rtx,
@@ -3315,12 +3329,21 @@ xtensa_expand_epilogue (bool sibcall_p)
 
       if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127 : 
1024))
        {
-         rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-         emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size -
-                                           cfun->machine->callee_save_size));
-         emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
-                                hard_frame_pointer_rtx : stack_pointer_rtx,
-                                tmp_reg));
+         if (xtensa_simm8x256 (cfun->machine->current_frame_size -
+                               cfun->machine->callee_save_size))
+           emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
+                                  hard_frame_pointer_rtx : stack_pointer_rtx,
+                                  GEN_INT (cfun->machine->current_frame_size -
+                                           cfun->machine->callee_save_size)));
+         else
+           {
+             rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+             emit_move_insn (tmp_reg, GEN_INT 
(cfun->machine->current_frame_size -
+                                               
cfun->machine->callee_save_size));
+             emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
+                                    hard_frame_pointer_rtx : stack_pointer_rtx,
+                                    tmp_reg));
+           }
          offset = cfun->machine->callee_save_size - UNITS_PER_WORD;
        }
       else
@@ -3360,18 +3383,24 @@ xtensa_expand_epilogue (bool sibcall_p)
                offset = cfun->machine->current_frame_size;
              else
                offset = cfun->machine->callee_save_size;
-
-             emit_insn (gen_addsi3 (stack_pointer_rtx,
-                                    stack_pointer_rtx,
-                                    GEN_INT (offset)));
+             if (offset)
+               emit_insn (gen_addsi3 (stack_pointer_rtx,
+                                      stack_pointer_rtx,
+                                      GEN_INT (offset)));
            }
          else
            {
-             rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-             emit_move_insn (tmp_reg,
-                             GEN_INT (cfun->machine->current_frame_size));
-             emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
-                                    tmp_reg));
+             if (xtensa_simm8x256 (cfun->machine->current_frame_size))
+               emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+                                      GEN_INT 
(cfun->machine->current_frame_size)));
+             else
+               {
+                 rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+                 emit_move_insn (tmp_reg,
+                                 GEN_INT (cfun->machine->current_frame_size));
+                 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+                                        tmp_reg));
+               }
            }
        }
 
-- 
2.20.1
  • [PATCH] xtensa: Optimize stac... Takayuki 'January June' Suwa via Gcc-patches

Reply via email to