The Thumb-2 movsi patterns try to prefer low registers for loads and stores. However this is done incorrectly by using 2 separate variants with 'l' and 'h' register classes. The register allocator will only use low registers, and as a result we end up with significantly more spills and moves to high registers. Fix this by merging the alternatives and use 'l*r' to indicate preference for low registers. This saves ~400 instructions from the pr77308 testcase.
Bootstrap & regress OK on arm-none-linux-gnueabihf --with-cpu=cortex-a57 ChangeLog: 2019-07-24 Wilco Dijkstra <wdijk...@arm.com> * config/arm/thumb2.md (thumb2_movsi_insn): Fix load/store low reg. * config/arm/vfp.md (thumb2_movsi_vfp): Likewise. -- diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 78a6ea0b10dab97ed6651ce62e99cfd7a81722ab..c7000d0772a7e5887b6d05be188e8eb38c97217d 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -247,8 +247,8 @@ (define_insn "*thumb2_pop_single" ;; regs. The high register alternatives are not taken into account when ;; choosing register preferences in order to reflect their expense. (define_insn "*thumb2_movsi_insn" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l ,*hk,m,*m") - (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l*rk,m") + (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,l*rk"))] "TARGET_THUMB2 && !TARGET_IWMMXT && !TARGET_HARD_FLOAT && ( register_operand (operands[0], SImode) || register_operand (operands[1], SImode))" @@ -262,22 +262,20 @@ (define_insn "*thumb2_movsi_insn" case 3: return \"mvn%?\\t%0, #%B1\"; case 4: return \"movw%?\\t%0, %1\"; case 5: - case 6: /* Cannot load it directly, split to load it via MOV / MOVT. */ if (!MEM_P (operands[1]) && arm_disable_literal_pool) return \"#\"; return \"ldr%?\\t%0, %1\"; - case 7: - case 8: return \"str%?\\t%1, %0\"; + case 6: return \"str%?\\t%1, %0\"; default: gcc_unreachable (); } } - [(set_attr "type" "mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load_4,load_4,store_4,store_4") - (set_attr "length" "2,4,2,4,4,4,4,4,4") + [(set_attr "type" "mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load_4,store_4") + (set_attr "length" "2,4,2,4,4,4,4") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no") - (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*") - (set_attr "neg_pool_range" "*,*,*,*,*,0,0,*,*")] + (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no") + (set_attr "pool_range" "*,*,*,*,*,4094,*") + (set_attr "neg_pool_range" "*,*,*,*,*,0,*")] ) (define_insn "tls_load_dot_plus_four" diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index e0aaa7b00bb41c046da4531a293e123c94e8b9a4..b59dd6b71d228e042feda3a3a06d81dd01d200da 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -258,8 +258,8 @@ (define_insn "*arm_movsi_vfp" ;; is chosen with length 2 when the instruction is predicated for ;; arm_restrict_it. (define_insn "*thumb2_movsi_vfp" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv") - (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk, r,*t,*t,*UvTu,*t"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l*rk,m,*t, r,*t,*t, *Uv") + (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,l*rk, r,*t,*t,*UvTu,*t"))] "TARGET_THUMB2 && TARGET_HARD_FLOAT && ( s_register_operand (operands[0], SImode) || s_register_operand (operands[1], SImode))" @@ -275,32 +275,30 @@ (define_insn "*thumb2_movsi_vfp" case 4: return \"movw%?\\t%0, %1\"; case 5: - case 6: /* Cannot load it directly, split to load it via MOV / MOVT. */ if (!MEM_P (operands[1]) && arm_disable_literal_pool) return \"#\"; return \"ldr%?\\t%0, %1\"; - case 7: - case 8: + case 6: return \"str%?\\t%1, %0\"; - case 9: + case 7: return \"vmov%?\\t%0, %1\\t%@ int\"; - case 10: + case 8: return \"vmov%?\\t%0, %1\\t%@ int\"; - case 11: + case 9: return \"vmov%?.f32\\t%0, %1\\t%@ int\"; - case 12: case 13: + case 10: case 11: return output_move_vfp (operands); default: gcc_unreachable (); } " [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no") - (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,load_4,store_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores") - (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4") - (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*") - (set_attr "neg_pool_range" "*,*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] + (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no") + (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores") + (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4") + (set_attr "pool_range" "*,*,*,*,*,4094,*,*,*,*,1018,*") + (set_attr "neg_pool_range" "*,*,*,*,*, 0,*,*,*,*,1008,*")] )