register operations on AVR

vmakarov at gcc dot gnu.org via Gcc-bugs Tue, 13 Dec 2022 06:10:32 -0800

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90706


Vladimir Makarov <vmakarov at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |vmakarov at gcc dot gnu.org

--- Comment #14 from Vladimir Makarov <vmakarov at gcc dot gnu.org> ---
What I see is the input to RA was significantly changed sing gcc-8 (see
insns marked by !).  A lot of subregs is generated now and there is no
promotion of (argument) hard regs (insns 44-47) because of
https://gcc.gnu.org/legacy-ml/gcc-patches/2018-10/msg01356.html.


    1: NOTE_INSN_DELETED                             1: NOTE_INSN_DELETED
    4: NOTE_INSN_BASIC_BLOCK 2                       4: NOTE_INSN_BASIC_BLOCK 2
    2: r44:SF=r22:SF                                44: r56:QI=r22:QI
      REG_DEAD r22:SF                                  REG_DEAD r22:QI
    3: NOTE_INSN_FUNCTION_BEG                       45: r57:QI=r23:QI
    6: r45:QI=0x1                                      REG_DEAD r23:QI
      REG_EQUAL 0x1                                 46: r58:QI=r24:QI
    7: r18:SF=0.0                                      REG_DEAD r24:QI
!   8: r22:SF=r44:SF                                47: r59:QI=r25:QI
      REG_DEAD r44:SF                                  REG_DEAD r25:QI
    9: r24:QI=call [`__gtsf2'] argc:0               48: r52:QI=r56:QI
      REG_DEAD r25:QI                                  REG_DEAD r56:QI
      REG_DEAD r23:QI                               49: r53:QI=r57:QI
      REG_DEAD r22:QI                                  REG_DEAD r57:QI
      REG_DEAD r18:SF                               50: r54:QI=r58:QI
      REG_CALL_DECL `__gtsf2'                          REG_DEAD r58:QI
      REG_EH_REGION 0xffffffff80000000              51: r55:QI=r59:QI
   10: NOTE_INSN_DELETED                               REG_DEAD r59:QI
   11: cc0=cmp(r24:QI,0)                             3: NOTE_INSN_FUNCTION_BEG
      REG_DEAD r24:QI                                6: r46:QI=0x1
   12: pc={(cc0>0)?L14:pc}                             REG_EQUAL 0x1
      REG_BR_PROB 633507684                          7: r18:SF=0.0
   22: NOTE_INSN_BASIC_BLOCK 3                    !  52: clobber r60:SI
   13: r45:QI=0                                   !  53: r60:SI#0=r52:QI
      REG_EQUAL 0                                      REG_DEAD r52:QI
   14: L14:                                       !  54: r60:SI#1=r53:QI
   23: NOTE_INSN_BASIC_BLOCK 4                         REG_DEAD r53:QI
   19: r24:QI=r45:QI                              !  55: r60:SI#2=r54:QI
      REG_DEAD r45:QI                                  REG_DEAD r54:QI
   20: use r24:QI                                 !  56: r60:SI#3=r55:QI
                                                       REG_DEAD r55:QI
                                                  !  57: r22:SF=r60:SI#0
                                                       REG_DEAD r60:SI
                                                     9: r24:QI=call [`__gtsf2']
argc:0
                                                       REG_DEAD r25:QI
                                                       REG_DEAD r23:QI
                                                       REG_DEAD r22:QI
                                                       REG_DEAD r18:SF
                                                       REG_CALL_DECL `__gtsf2'
                                                       REG_EH_REGION
0xffffffff80000000
                                                    34: r50:QI=r24:QI
                                                       REG_DEAD r24:QI
                                                    10: NOTE_INSN_DELETED
                                                    11: pc={(r50:QI>0)?L13:pc}
                                                       REG_DEAD r50:QI
                                                       REG_BR_PROB 633507684
                                                    21: NOTE_INSN_BASIC_BLOCK 3
                                                    12: r46:QI=0
                                                       REG_EQUAL 0
                                                    13: L13:
                                                    22: NOTE_INSN_BASIC_BLOCK 4
                                                    18: r24:QI=r46:QI
                                                       REG_DEAD r46:QI
                                                    19: use r24:QI

Currently, GCC generates the following AVR code:

check:
        push r28
        push r29
        rcall .
        rcall .
        push __tmp_reg__
        in r28,__SP_L__
        in r29,__SP_H__
/* prologue: function */
/* frame size = 5 */
/* stack size = 7 */
.L__stack_usage = 7
        ldi r18,lo8(1)
        std Y+5,r18
        ldi r18,0
        ldi r19,0
        ldi r20,0
        ldi r21,0
!       std Y+1,r22
!       std Y+2,r23
!       std Y+3,r24
!       std Y+4,r25
!       ldd r22,Y+1
!       ldd r23,Y+2
!       ldd r24,Y+3
!       ldd r25,Y+4
        rcall __gtsf2
        cp __zero_reg__,r24
        brlt .L2
        std Y+5,__zero_reg__
.L2:
        ldd r24,Y+5
/* epilogue start */
        pop __tmp_reg__
        pop __tmp_reg__
        pop __tmp_reg__
        pop __tmp_reg__
        pop __tmp_reg__
        pop r29
        pop r28
        ret

There are a lot of loads and stores.  That is because p60 got memory:

a2(r60,l0) costs: ADDW_REGS:32000 SIMPLE_LD_REGS:32000 LD_REGS:32000
NO_LD_REGS:32000 GENERAL_REGS:32000 MEM:12000
r60: preferred NO_REGS, alternative NO_REGS, allocno NO_REGS

After some investigation I found that IRA calculates a wrong cost for moving
general hard regs of SFmode.

The following patch solves the problem:

diff --git a/gcc/ira.cc b/gcc/ira.cc
index d28a67b2546..cb4bfca739d 100644
--- a/gcc/ira.cc
+++ b/gcc/ira.cc
@@ -1627,14 +1627,22 @@ ira_init_register_move_cost (machine_mode mode)
                 *p2 != LIM_REG_CLASSES; p2++)
              if (ira_class_hard_regs_num[*p2] > 0
                  && (ira_reg_class_max_nregs[*p2][mode]
-                     <= ira_class_hard_regs_num[*p2]))
+                     <= ira_class_hard_regs_num[*p2])
+                 && hard_reg_set_intersect_p (ok_regs,
+                                              reg_class_contents[cl1])
+                 && hard_reg_set_intersect_p (ok_regs,
+                                              reg_class_contents[*p2]))
                cost = MAX (cost, ira_register_move_cost[mode][cl1][*p2]);

            for (p1 = &reg_class_subclasses[cl1][0];
                 *p1 != LIM_REG_CLASSES; p1++)
              if (ira_class_hard_regs_num[*p1] > 0
                  && (ira_reg_class_max_nregs[*p1][mode]
-                     <= ira_class_hard_regs_num[*p1]))
+                     <= ira_class_hard_regs_num[*p1])
+                 && hard_reg_set_intersect_p (ok_regs,
+                                              reg_class_contents[cl2])
+                 && hard_reg_set_intersect_p (ok_regs,
+                                              reg_class_contents[*p1]))
                cost = MAX (cost, ira_register_move_cost[mode][*p1][cl2]);

            ira_assert (cost <= 65535);


With this patch RA generates the following better code:

check:
        push r12
        push r13
        push r14
        push r15
        push r28
/* prologue: function */
/* frame size = 0 */
/* stack size = 5 */
.L__stack_usage = 5
        ldi r28,lo8(1)
        ldi r18,0
        ldi r19,0
        ldi r20,0
        ldi r21,0
!       mov r12,r22
!       mov r13,r23
!       mov r14,r24
!       mov r15,r25
!       mov r25,r15
!       mov r24,r14
!       mov r23,r13
!       mov r22,r12
        rcall __gtsf2
        cp __zero_reg__,r24
        brlt .L2
        ldi r28,0
.L2:
        mov r24,r28
/* epilogue start */
        pop r28
        pop r15
        pop r14
        pop r13
        pop r12
        ret

Still there are a lot of moves in the generated code.  I'll think how
to solve the problem. I think coalescing could do this.
Unfortunately, IRA/LRA do not coalesce moves involving subregs.  May
be implementing coalescing at end of LRA could be a solution.

In any case, the full PR solution would take some time.  The first, I am
going to submit the patch above after thorough testing a few major
targets.  Then I'll work on removing redundant moves.  I'll
periodically publish updates on the PR progress.

[Bug rtl-optimization/90706] [10/11/12/13 Regression] Useless code generated for stack / register operations on AVR

Reply via email to