I've found a case of suboptimal code coming from LRA when we have a large
frame and have the frame grow downwards with MIPS. A good testcase for
this is the qsort benchmark from the automotive section of mibench[1].

Any frame access where the offset is not supported as an add-immediate
should benefit from this fix. It will probably have little or no effect
when frames grow upwards.  The patch is just prototype for now as it
has had very little testing beyond it looking like it does the right
thing.

@Vladimir: The affected code is in eliminate_regs_in_insn where
there is a comment (history suggests you wrote it) that says:

  /* We allow one special case which happens to work on all machines we
     currently support: a single set with the source or a REG_EQUAL
     note being a PLUS of an eliminable register and a constant.  */

There is an implementation that optimises a single set but not one for
a REG_EQUAL. Do you have any recollection of this code and do you think
there was a reason you didn't implement the REG_EQUAL case? Either way
any advice on my approach is welcome.

Below is an example of RTL pre/post LRA both without and with my patch
applied:

== RTL after IRA ==
(insn 96 95 97 4 (set (reg:SI 299)
        (const_int -1441792 [0xffffffffffea0000])) 310 {*movsi_internal}
     (expr_list:REG_EQUIV (const_int -1441792 [0xffffffffffea0000])
        (nil)))
(insn 97 96 98 4 (set (reg:SI 298)
        (plus:SI (reg:SI 299)
            (const_int 1792 [0x700]))) 13 {*addsi3}
     (expr_list:REG_DEAD (reg:SI 299)
        (expr_list:REG_EQUIV (const_int -1440000 [0xffffffffffea0700])
            (nil))))
(insn 98 97 71 4 (set (reg:SI 282 [ ivtmp.21 ])
        (plus:SI (reg/f:SI 78 $frame)
            (reg:SI 298))) 13 {*addsi3}
     (expr_list:REG_DEAD (reg:SI 298)
        (nil)))

== RTL after LRA ==

(insn 96 95 97 4 (set (reg:SI 16 $16 [299])
        (const_int -1441792 [0xffffffffffea0000])) 310 {*movsi_internal}
     (expr_list:REG_EQUIV (const_int -1441792 [0xffffffffffea0000])
        (nil)))
(insn 97 96 249 4 (set (reg:SI 16 $16 [298])
        (plus:SI (reg:SI 16 $16 [299])
            (const_int 1792 [0x700]))) 13 {*addsi3}
     (expr_list:REG_EQUIV (const_int -1440000 [0xffffffffffea0700])
        (nil)))
(insn 249 97 250 4 (set (reg:SI 3 $3 [342])
        (const_int 1376256 [0x150000])) 310 {*movsi_internal}
     (nil))
(insn 250 249 289 4 (set (reg:SI 2 $2 [341])
        (ior:SI (reg:SI 3 $3 [342])
            (const_int 63776 [0xf920]))) 185 {*iorsi3}
     (expr_list:REG_EQUAL (const_int 1440032 [0x15f920])
        (nil)))
(insn 289 250 251 4 (set (reg:SI 4 $4 [363])
        (plus:SI (reg/f:SI 29 $sp)
            (const_int 24 [0x18]))) 13 {*addsi3}
     (nil))
(insn 251 289 98 4 (set (reg:SI 2 $2 [341])
        (plus:SI (reg:SI 2 $2 [341])
            (reg:SI 4 $4 [363]))) 13 {*addsi3}
     (nil))
(insn 98 251 71 4 (set (reg:SI 16 $16 [orig:282 ivtmp.21 ] [282])
        (plus:SI (reg:SI 2 $2 [341])
            (reg:SI 16 $16 [298]))) 13 {*addsi3}
     (nil))

== new RTL after LRA ==

(note 96 95 97 4 NOTE_INSN_DELETED)
(note 97 96 98 4 NOTE_INSN_DELETED)
(insn 98 97 71 4 (set (reg:SI 16 $16 [orig:282 ivtmp.21 ] [282])
        (plus:SI (reg/f:SI 29 $sp)
            (const_int 48 [0x30]))) 13 {*addsi3}
     (nil))

The frame is 8 bytes smaller due to one less spill which is why
32+24 != 48.  Obviously the code size improvement when this triggers
is quite good. Build options: mips-mti-elf-gcc -Os -mmicromips
-mabi=32 -msoft-float -EL -mips32r2 -fstack-protector

The -fstack-protector forces the frame to grow downwards.

Any advice/comments welcome...

Thanks,
Matthew

[1] http://vhosts.eecs.umich.edu/mibench//source.html

gcc/

        * lra-eliminations.c (remove_reg_equal_offset_note): Use
        ira_reg_equiv to optimise offsets when eliminating the
        frame pointer.

diff --git a/gcc/lra-eliminations.c b/gcc/lra-eliminations.c
index 08cc390..5424de6 100644
--- a/gcc/lra-eliminations.c
+++ b/gcc/lra-eliminations.c
@@ -918,7 +918,7 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, 
bool first_p,
   rtx substed_operand[MAX_RECOG_OPERANDS];
   rtx orig_operand[MAX_RECOG_OPERANDS];
   struct lra_elim_table *ep;
-  rtx plus_src, plus_cst_src;
+  rtx plus_src, plus_src_reg, plus_src_cst;
   lra_insn_recog_data_t id;
   struct lra_static_insn_data *static_id;
 
@@ -1006,7 +1006,7 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, 
bool first_p,
   /* We allow one special case which happens to work on all machines we
      currently support: a single set with the source or a REG_EQUAL
      note being a PLUS of an eliminable register and a constant.  */
-  plus_src = plus_cst_src = 0;
+  plus_src = plus_src_reg = plus_src_cst = 0;
   if (old_set && REG_P (SET_DEST (old_set)))
     {
       if (GET_CODE (SET_SRC (old_set)) == PLUS)
@@ -1014,24 +1014,59 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, 
bool first_p,
       /* First see if the source is of the form (plus (...) CST).  */
       if (plus_src
          && CONST_INT_P (XEXP (plus_src, 1)))
-       plus_cst_src = plus_src;
+       {
+         plus_src_reg = XEXP (plus_src, 0);
+         plus_src_cst = XEXP (plus_src, 1);
+       }
+      /* Next look for an equivalent value for the 2nd operand of the
+        plus.  */
+      if (plus_src && !plus_src_reg
+         && REG_P (XEXP (plus_src, 1))
+         && ira_reg_equiv[REGNO(XEXP (plus_src, 1))].defined_p
+         && ira_reg_equiv[REGNO(XEXP (plus_src, 1))].constant != NULL_RTX
+         && CONST_INT_P (ira_reg_equiv[REGNO(XEXP (plus_src, 1))].constant))
+       {
+         plus_src_reg = XEXP (plus_src, 0);
+         plus_src_cst = ira_reg_equiv[REGNO(XEXP (plus_src, 1))].constant;
+       }
+      /* Finally, look for an equivalent value for the (plus (...) (...)).  */
+      if (plus_src && !plus_src_reg
+         && ira_reg_equiv[REGNO(SET_DEST (old_set))].defined_p
+         && ira_reg_equiv[REGNO(SET_DEST (old_set))].invariant != NULL_RTX)
+       {
+         rtx invariant = ira_reg_equiv[REGNO(SET_DEST (old_set))].invariant;
+         if (GET_CODE (invariant) == PLUS
+             && CONST_INT_P (XEXP (invariant, 1)))
+           {
+             plus_src_reg = XEXP (invariant, 0);
+             plus_src_cst = XEXP (invariant, 1);
+             /* Remove the equivalence as we must not use it on the next
+                iteration.  */
+             ira_reg_equiv[REGNO(SET_DEST (old_set))].defined_p = false;
+             ira_reg_equiv[REGNO(SET_DEST (old_set))].invariant = NULL_RTX;
+           }
+       }
+
       /* Check that the first operand of the PLUS is a hard reg or
         the lowpart subreg of one.  */
-      if (plus_cst_src)
+      if (plus_src_reg)
        {
-         rtx reg = XEXP (plus_cst_src, 0);
+         rtx reg = plus_src_reg;
 
          if (GET_CODE (reg) == SUBREG && subreg_lowpart_p (reg))
            reg = SUBREG_REG (reg);
 
          if (!REG_P (reg) || REGNO (reg) >= FIRST_PSEUDO_REGISTER)
-           plus_cst_src = 0;
+           {
+             plus_src_reg = 0;
+             plus_src_cst = 0;
+           }
        }
     }
-  if (plus_cst_src)
+  if (plus_src_reg)
     {
-      rtx reg = XEXP (plus_cst_src, 0);
-      HOST_WIDE_INT offset = INTVAL (XEXP (plus_cst_src, 1));
+      rtx reg = plus_src_reg;
+      HOST_WIDE_INT offset = INTVAL (plus_src_cst);
 
       if (GET_CODE (reg) == SUBREG)
        reg = SUBREG_REG (reg);
@@ -1051,11 +1086,11 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, 
bool first_p,
                  else
                    offset += update_sp_offset;
                }
-             offset = trunc_int_for_mode (offset, GET_MODE (plus_cst_src));
+             offset = trunc_int_for_mode (offset, GET_MODE (plus_src_reg));
            }
 
-         if (GET_CODE (XEXP (plus_cst_src, 0)) == SUBREG)
-           to_rtx = gen_lowpart (GET_MODE (XEXP (plus_cst_src, 0)), to_rtx);
+         if (GET_CODE (plus_src_reg) == SUBREG)
+           to_rtx = gen_lowpart (GET_MODE (plus_src_reg), to_rtx);
          /* If we have a nonzero offset, and the source is already a
             simple REG, the following transformation would increase
             the cost of the insn by replacing a simple REG with (plus


Reply via email to