The Thumb-2 movsi patterns try to prefer low registers for loads and stores.
However this is done incorrectly by using 2 separate variants with 'l' and 'h'
register classes.  The register allocator will only use low registers, and
as a result we end up with significantly more spills and moves to high
registers.  Fix this by merging the alternatives and use 'l*r' to indicate
preference for low registers.  This saves ~400 instructions from the pr77308
testcase.

Bootstrap & regress OK on arm-none-linux-gnueabihf --with-cpu=cortex-a57

ChangeLog:
2019-07-24  Wilco Dijkstra  <wdijk...@arm.com>

        * config/arm/thumb2.md (thumb2_movsi_insn): Fix load/store low reg.
        * config/arm/vfp.md (thumb2_movsi_vfp): Likewise.

--
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 
78a6ea0b10dab97ed6651ce62e99cfd7a81722ab..c7000d0772a7e5887b6d05be188e8eb38c97217d
 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -247,8 +247,8 @@ (define_insn "*thumb2_pop_single"
 ;; regs.  The high register alternatives are not taken into account when
 ;; choosing register preferences in order to reflect their expense.
 (define_insn "*thumb2_movsi_insn"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l ,*hk,m,*m")
-       (match_operand:SI 1 "general_operand"      "rk,I,Py,K,j,mi,*mi,l,*hk"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l*rk,m")
+       (match_operand:SI 1 "general_operand"      "rk,I,Py,K,j,mi,l*rk"))]
   "TARGET_THUMB2 && !TARGET_IWMMXT && !TARGET_HARD_FLOAT
    && (   register_operand (operands[0], SImode)
        || register_operand (operands[1], SImode))"
@@ -262,22 +262,20 @@ (define_insn "*thumb2_movsi_insn"
     case 3: return \"mvn%?\\t%0, #%B1\";
     case 4: return \"movw%?\\t%0, %1\";
     case 5:
-    case 6:
       /* Cannot load it directly, split to load it via MOV / MOVT.  */
       if (!MEM_P (operands[1]) && arm_disable_literal_pool)
        return \"#\";
       return \"ldr%?\\t%0, %1\";
-    case 7:
-    case 8: return \"str%?\\t%1, %0\";
+    case 6: return \"str%?\\t%1, %0\";
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "type" 
"mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load_4,load_4,store_4,store_4")
-   (set_attr "length" "2,4,2,4,4,4,4,4,4")
+  [(set_attr "type" "mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load_4,store_4")
+   (set_attr "length" "2,4,2,4,4,4,4")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no")
-   (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*")
-   (set_attr "neg_pool_range" "*,*,*,*,*,0,0,*,*")]
+   (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no")
+   (set_attr "pool_range" "*,*,*,*,*,4094,*")
+   (set_attr "neg_pool_range" "*,*,*,*,*,0,*")]
 )
 
 (define_insn "tls_load_dot_plus_four"
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 
e0aaa7b00bb41c046da4531a293e123c94e8b9a4..b59dd6b71d228e042feda3a3a06d81dd01d200da
 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -258,8 +258,8 @@ (define_insn "*arm_movsi_vfp"
 ;; is chosen with length 2 when the instruction is predicated for
 ;; arm_restrict_it.
 (define_insn "*thumb2_movsi_vfp"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r, l,*hk,m, 
*m,*t, r,*t,*t,  *Uv")
-       (match_operand:SI 1 "general_operand"      "rk,I,Py,K,j,mi,*mi,l,*hk, 
r,*t,*t,*UvTu,*t"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l*rk,m,*t, 
r,*t,*t,  *Uv")
+       (match_operand:SI 1 "general_operand"      "rk,I,Py,K,j,mi,l*rk, 
r,*t,*t,*UvTu,*t"))]
   "TARGET_THUMB2 && TARGET_HARD_FLOAT
    && (   s_register_operand (operands[0], SImode)
        || s_register_operand (operands[1], SImode))"
@@ -275,32 +275,30 @@ (define_insn "*thumb2_movsi_vfp"
     case 4:
       return \"movw%?\\t%0, %1\";
     case 5:
-    case 6:
       /* Cannot load it directly, split to load it via MOV / MOVT.  */
       if (!MEM_P (operands[1]) && arm_disable_literal_pool)
        return \"#\";
       return \"ldr%?\\t%0, %1\";
-    case 7:
-    case 8:
+    case 6:
       return \"str%?\\t%1, %0\";
-    case 9:
+    case 7:
       return \"vmov%?\\t%0, %1\\t%@ int\";
-    case 10:
+    case 8:
       return \"vmov%?\\t%0, %1\\t%@ int\";
-    case 11:
+    case 9:
       return \"vmov%?.f32\\t%0, %1\\t%@ int\";
-    case 12: case 13:
+    case 10: case 11:
       return output_move_vfp (operands);
     default:
       gcc_unreachable ();
     }
   "
   [(set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" 
"yes,no,yes,no,no,no,no,no,no,no,no,no,no,no")
-   (set_attr "type" 
"mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,load_4,store_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores")
-   (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4")
-   (set_attr "pool_range"     "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
-   (set_attr "neg_pool_range" "*,*,*,*,*,   0,   0,*,*,*,*,*,1008,*")]
+   (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no")
+   (set_attr "type" 
"mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores")
+   (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4")
+   (set_attr "pool_range"     "*,*,*,*,*,4094,*,*,*,*,1018,*")
+   (set_attr "neg_pool_range" "*,*,*,*,*,   0,*,*,*,*,1008,*")]
 )
 
 

Reply via email to