Re: [PATCH][AArch64] Improve Cortex-A53 integer scheduler

2016-07-06 Thread Richard Earnshaw (lists)
On 05/07/16 16:00, Wilco Dijkstra wrote:
> This patch improves the accuracy of the Cortex-A53 integer scheduler,
> resulting in performance gains across a wide range of benchmarks.
> 
> OK for commit?
> 

OK.

R.

> ChangeLog:
> 2016-07-05  Wilco Dijkstra  
> 
>   * config/arm/cortex-a53.md: Use final_presence_set for in-order.
>   (cortex_a53_shift): Add mov_shift.
>   (cortex_a53_shift_reg): Add new reservation for register shifts.
>   (cortex_a53_alu): Remove bfm.
>   (cortex_a53_alu_shift): Add bfm, remove mov_shift.
>   (cortex_a53_alu_extr): Add new reservation for EXTR.
>   (bypasses): Improve bypass modelling.
> 
> ---
> diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
> index 
> fc60bc26c7caf7e94064d7f292b877b12f333fca..70c0f4daabe0ccb8e32808f1af51f5460e087a18
>  100644
> --- a/gcc/config/arm/cortex-a53.md
> +++ b/gcc/config/arm/cortex-a53.md
> @@ -30,6 +30,7 @@
>  
>  (define_cpu_unit "cortex_a53_slot0" "cortex_a53")
>  (define_cpu_unit "cortex_a53_slot1" "cortex_a53")
> +(final_presence_set "cortex_a53_slot1" "cortex_a53_slot0")
>  
>  (define_reservation "cortex_a53_slot_any"
>   "cortex_a53_slot0\
> @@ -71,41 +72,43 @@
>  
>  (define_insn_reservation "cortex_a53_shift" 2
>(and (eq_attr "tune" "cortexa53")
> -   (eq_attr "type" "adr,shift_imm,shift_reg,mov_imm,mvn_imm"))
> +   (eq_attr "type" "adr,shift_imm,mov_imm,mvn_imm,mov_shift"))
>"cortex_a53_slot_any")
>  
> -(define_insn_reservation "cortex_a53_alu_rotate_imm" 2
> +(define_insn_reservation "cortex_a53_shift_reg" 2
>(and (eq_attr "tune" "cortexa53")
> -   (eq_attr "type" "rotate_imm"))
> -  "(cortex_a53_slot1)
> -   | (cortex_a53_single_issue)")
> +   (eq_attr "type" "shift_reg,mov_shift_reg"))
> +  "cortex_a53_slot_any+cortex_a53_hazard")
>  
>  (define_insn_reservation "cortex_a53_alu" 3
>(and (eq_attr "tune" "cortexa53")
> (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,
>   alu_sreg,alus_sreg,logic_reg,logics_reg,
>   adc_imm,adcs_imm,adc_reg,adcs_reg,
> - bfm,csel,clz,rbit,rev,alu_dsp_reg,
> - mov_reg,mvn_reg,
> - mrs,multiple,no_insn"))
> + csel,clz,rbit,rev,alu_dsp_reg,
> + mov_reg,mvn_reg,mrs,multiple,no_insn"))
>"cortex_a53_slot_any")
>  
>  (define_insn_reservation "cortex_a53_alu_shift" 3
>(and (eq_attr "tune" "cortexa53")
> (eq_attr "type" "alu_shift_imm,alus_shift_imm,
>   crc,logic_shift_imm,logics_shift_imm,
> - alu_ext,alus_ext,
> - extend,mov_shift,mvn_shift"))
> + alu_ext,alus_ext,bfm,extend,mvn_shift"))
>"cortex_a53_slot_any")
>  
>  (define_insn_reservation "cortex_a53_alu_shift_reg" 3
>(and (eq_attr "tune" "cortexa53")
> (eq_attr "type" "alu_shift_reg,alus_shift_reg,
>   logic_shift_reg,logics_shift_reg,
> - mov_shift_reg,mvn_shift_reg"))
> + mvn_shift_reg"))
>"cortex_a53_slot_any+cortex_a53_hazard")
>  
> -(define_insn_reservation "cortex_a53_mul" 3
> +(define_insn_reservation "cortex_a53_alu_extr" 3
> +  (and (eq_attr "tune" "cortexa53")
> +   (eq_attr "type" "rotate_imm"))
> +  "cortex_a53_slot1|cortex_a53_single_issue")
> +
> +(define_insn_reservation "cortex_a53_mul" 4
>(and (eq_attr "tune" "cortexa53")
> (ior (eq_attr "mul32" "yes")
>   (eq_attr "mul64" "yes")))
> @@ -189,49 +192,43 @@
>  (define_insn_reservation "cortex_a53_branch" 0
>(and (eq_attr "tune" "cortexa53")
> (eq_attr "type" "branch,call"))
> -  "cortex_a53_slot_any,cortex_a53_branch")
> +  "cortex_a53_slot_any+cortex_a53_branch")
>  
>  
>  ;; General-purpose register bypasses
>  
>  
> -;; Model bypasses for unshifted operands to ALU instructions.
> +;; Model bypasses for ALU to ALU instructions.
>  
> -(define_bypass 1 "cortex_a53_shift"
> -  "cortex_a53_shift")
> +(define_bypass 0 "cortex_a53_shift*"
> +  "cortex_a53_alu")
>  
> -(define_bypass 1 "cortex_a53_alu,
> -   cortex_a53_alu_shift*,
> -   cortex_a53_alu_rotate_imm,
> -   cortex_a53_shift"
> +(define_bypass 1 "cortex_a53_shift*"
> +  "cortex_a53_shift*,cortex_a53_alu_*")
> +
> +(define_bypass 1 "cortex_a53_alu*"
>"cortex_a53_alu")
>  
> -(define_bypass 2 "cortex_a53_alu,
> -   cortex_a53_alu_shift*"
> +(define_bypass 1 "cortex_a53_alu*"
>"cortex_a53_alu_shift*"
>"aarch_forward_to_shift_is_not_shifted_reg")
>  
> -;; In our model, we allow any general-purpose register operation to
> -;; bypass to the accumulator operand of an integer 

[PATCH][AArch64] Improve Cortex-A53 integer scheduler

2016-07-05 Thread Wilco Dijkstra
This patch improves the accuracy of the Cortex-A53 integer scheduler,
resulting in performance gains across a wide range of benchmarks.

OK for commit?

ChangeLog:
2016-07-05  Wilco Dijkstra  

* config/arm/cortex-a53.md: Use final_presence_set for in-order.
(cortex_a53_shift): Add mov_shift.
(cortex_a53_shift_reg): Add new reservation for register shifts.
(cortex_a53_alu): Remove bfm.
(cortex_a53_alu_shift): Add bfm, remove mov_shift.
(cortex_a53_alu_extr): Add new reservation for EXTR.
(bypasses): Improve bypass modelling.

---
diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
index 
fc60bc26c7caf7e94064d7f292b877b12f333fca..70c0f4daabe0ccb8e32808f1af51f5460e087a18
 100644
--- a/gcc/config/arm/cortex-a53.md
+++ b/gcc/config/arm/cortex-a53.md
@@ -30,6 +30,7 @@
 
 (define_cpu_unit "cortex_a53_slot0" "cortex_a53")
 (define_cpu_unit "cortex_a53_slot1" "cortex_a53")
+(final_presence_set "cortex_a53_slot1" "cortex_a53_slot0")
 
 (define_reservation "cortex_a53_slot_any"
"cortex_a53_slot0\
@@ -71,41 +72,43 @@
 
 (define_insn_reservation "cortex_a53_shift" 2
   (and (eq_attr "tune" "cortexa53")
-   (eq_attr "type" "adr,shift_imm,shift_reg,mov_imm,mvn_imm"))
+   (eq_attr "type" "adr,shift_imm,mov_imm,mvn_imm,mov_shift"))
   "cortex_a53_slot_any")
 
-(define_insn_reservation "cortex_a53_alu_rotate_imm" 2
+(define_insn_reservation "cortex_a53_shift_reg" 2
   (and (eq_attr "tune" "cortexa53")
-   (eq_attr "type" "rotate_imm"))
-  "(cortex_a53_slot1)
-   | (cortex_a53_single_issue)")
+   (eq_attr "type" "shift_reg,mov_shift_reg"))
+  "cortex_a53_slot_any+cortex_a53_hazard")
 
 (define_insn_reservation "cortex_a53_alu" 3
   (and (eq_attr "tune" "cortexa53")
(eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,
alu_sreg,alus_sreg,logic_reg,logics_reg,
adc_imm,adcs_imm,adc_reg,adcs_reg,
-   bfm,csel,clz,rbit,rev,alu_dsp_reg,
-   mov_reg,mvn_reg,
-   mrs,multiple,no_insn"))
+   csel,clz,rbit,rev,alu_dsp_reg,
+   mov_reg,mvn_reg,mrs,multiple,no_insn"))
   "cortex_a53_slot_any")
 
 (define_insn_reservation "cortex_a53_alu_shift" 3
   (and (eq_attr "tune" "cortexa53")
(eq_attr "type" "alu_shift_imm,alus_shift_imm,
crc,logic_shift_imm,logics_shift_imm,
-   alu_ext,alus_ext,
-   extend,mov_shift,mvn_shift"))
+   alu_ext,alus_ext,bfm,extend,mvn_shift"))
   "cortex_a53_slot_any")
 
 (define_insn_reservation "cortex_a53_alu_shift_reg" 3
   (and (eq_attr "tune" "cortexa53")
(eq_attr "type" "alu_shift_reg,alus_shift_reg,
logic_shift_reg,logics_shift_reg,
-   mov_shift_reg,mvn_shift_reg"))
+   mvn_shift_reg"))
   "cortex_a53_slot_any+cortex_a53_hazard")
 
-(define_insn_reservation "cortex_a53_mul" 3
+(define_insn_reservation "cortex_a53_alu_extr" 3
+  (and (eq_attr "tune" "cortexa53")
+   (eq_attr "type" "rotate_imm"))
+  "cortex_a53_slot1|cortex_a53_single_issue")
+
+(define_insn_reservation "cortex_a53_mul" 4
   (and (eq_attr "tune" "cortexa53")
(ior (eq_attr "mul32" "yes")
(eq_attr "mul64" "yes")))
@@ -189,49 +192,43 @@
 (define_insn_reservation "cortex_a53_branch" 0
   (and (eq_attr "tune" "cortexa53")
(eq_attr "type" "branch,call"))
-  "cortex_a53_slot_any,cortex_a53_branch")
+  "cortex_a53_slot_any+cortex_a53_branch")
 
 
 ;; General-purpose register bypasses
 
 
-;; Model bypasses for unshifted operands to ALU instructions.
+;; Model bypasses for ALU to ALU instructions.
 
-(define_bypass 1 "cortex_a53_shift"
-"cortex_a53_shift")
+(define_bypass 0 "cortex_a53_shift*"
+"cortex_a53_alu")
 
-(define_bypass 1 "cortex_a53_alu,
- cortex_a53_alu_shift*,
- cortex_a53_alu_rotate_imm,
- cortex_a53_shift"
+(define_bypass 1 "cortex_a53_shift*"
+"cortex_a53_shift*,cortex_a53_alu_*")
+
+(define_bypass 1 "cortex_a53_alu*"
 "cortex_a53_alu")
 
-(define_bypass 2 "cortex_a53_alu,
- cortex_a53_alu_shift*"
+(define_bypass 1 "cortex_a53_alu*"
 "cortex_a53_alu_shift*"
 "aarch_forward_to_shift_is_not_shifted_reg")
 
-;; In our model, we allow any general-purpose register operation to
-;; bypass to the accumulator operand of an integer MADD-like operation.
+(define_bypass 2 "cortex_a53_alu*"
+"cortex_a53_alu_*,cortex_a53_shift*")
 
-(define_bypass 1 "cortex_a53_alu*,
- cortex_a53_load*,
- cortex_a53_mul"
+;; Model a bypass