Now that we are in stage 1 again, here is an update to my earlier t99
scheduling file patch for thunderx2t99.md.  There were some instruction
types (mostly asimd) that did not have schedules and other types that had
duplicate schedules.  With this patch there should be one schedule for
every type and no duplicates.

I did some SPEC2017 runs on a T99 to see if this had any significant
performance impact but it did not appear to.  The performance
differences were small and within the range of results I had gotten
before.  I would still like to check this in though in order to have
a complete and correct schedule file for T99.

Steve Ellcey
sell...@cavium.com

2018-05-04  Steve Ellcey  <sell...@cavium.com>

        * config/aarch64/thunderx2t99.md (thunderx2t99_ls_both): Delete.
        (thunderx2t99_multiple): Delete psuedo-units from used cpus.
        Add untyped.
        (thunderx2t99_alu_shift): Remove alu_shift_reg, alus_shift_reg.
        Change logics_shift_reg to logics_shift_imm.
        (thunderx2t99_loadpair): Fix cpu unit ordering.
        (thunderx2t99_fp_loadpair_basic): Delete.
        (thunderx2t99_fp_storepair_basic): Delete.
        (thunderx2t99_asimd_int): Add neon_sub and neon_sub_q types.
        (thunderx2t99_asimd_polynomial): Delete.
        (thunderx2t99_asimd_fp_simple): Add neon_fp_mul_s_scalar_q
        and neon_fp_mul_d_scalar_q.
        (thunderx2t99_asimd_fp_conv): Add *int_to_fp* 
types.gcc/config/aarch64/thunderx2t99.md
        (thunderx2t99_asimd_misc): Delete neon_dup and neon_dup_q.
        (thunderx2t99_asimd_recip_step): Add missing *sqrt* types.
        (thunderx2t99_asimd_lut): Add missing tbl types.
        (thunderx2t99_asimd_ext): Delete.
        (thunderx2t99_asimd_load1_1_mult): Delete.
        (thunderx2t99_asimd_load1_2_mult): Delete.
        (thunderx2t99_asimd_load1_ldp): New.
        (thunderx2t99_asimd_load1): New.
        (thunderx2t99_asimd_load2): Add missing *load2* types.
        (thunderx2t99_asimd_load3): New.
        (thunderx2t99_asimd_load4): New.
        (thunderx2t99_asimd_store1_1_mult): Delete.
        (thunderx2t99_asimd_store1_2_mult): Delete.
        (thunderx2t99_asimd_store2_mult): Delete.
        (thunderx2t99_asimd_store2_onelane): Delete.
        (thunderx2t99_asimd_store_stp): New.
        (thunderx2t99_asimd_store1): New.
        (thunderx2t99_asimd_store2): New.
        (thunderx2t99_asimd_store3): New.
        (thunderx2t99_asimd_store4): New.
diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
index 589e564..eee2896 100644
--- a/gcc/config/aarch64/thunderx2t99.md
+++ b/gcc/config/aarch64/thunderx2t99.md
@@ -54,8 +54,6 @@
 (define_reservation "thunderx2t99_ls01" "thunderx2t99_ls0|thunderx2t99_ls1")
 (define_reservation "thunderx2t99_f01" "thunderx2t99_f0|thunderx2t99_f1")
 
-(define_reservation "thunderx2t99_ls_both" "thunderx2t99_ls0+thunderx2t99_ls1")
-
 ; A load with delay in the ls0/ls1 pipes.
 (define_reservation "thunderx2t99_l0delay" "thunderx2t99_ls0,\
 				      thunderx2t99_ls0d1,thunderx2t99_ls0d2,\
@@ -86,12 +84,10 @@
 
 (define_insn_reservation "thunderx2t99_multiple" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "multiple"))
+       (eq_attr "type" "multiple,untyped"))
   "thunderx2t99_i0+thunderx2t99_i1+thunderx2t99_i2+thunderx2t99_ls0+\
    thunderx2t99_ls1+thunderx2t99_sd+thunderx2t99_i1m1+thunderx2t99_i1m2+\
-   thunderx2t99_i1m3+thunderx2t99_ls0d1+thunderx2t99_ls0d2+thunderx2t99_ls0d3+\
-   thunderx2t99_ls1d1+thunderx2t99_ls1d2+thunderx2t99_ls1d3+thunderx2t99_f0+\
-   thunderx2t99_f1")
+   thunderx2t99_i1m3+thunderx2t99_f0+thunderx2t99_f1")
 
 ;; Integer arithmetic/logic instructions.
 
@@ -113,9 +109,9 @@
 
 (define_insn_reservation "thunderx2t99_alu_shift" 2
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "alu_shift_imm,alu_ext,alu_shift_reg,\
-			alus_shift_imm,alus_ext,alus_shift_reg,\
-			logic_shift_imm,logics_shift_reg"))
+       (eq_attr "type" "alu_shift_imm,alu_ext,\
+			alus_shift_imm,alus_ext,\
+			logic_shift_imm,logics_shift_imm"))
   "thunderx2t99_i012,thunderx2t99_i012")
 
 (define_insn_reservation "thunderx2t99_div" 13
@@ -150,7 +146,7 @@
 (define_insn_reservation "thunderx2t99_loadpair" 5
   (and (eq_attr "tune" "thunderx2t99")
        (eq_attr "type" "load_8,load_16"))
-  "thunderx2t99_i012,thunderx2t99_ls01")
+  "thunderx2t99_ls01,thunderx2t99_i012")
 
 (define_insn_reservation "thunderx2t99_store_basic" 1
   (and (eq_attr "tune" "thunderx2t99")
@@ -228,21 +224,11 @@
        (eq_attr "type" "f_loads,f_loadd"))
   "thunderx2t99_ls01")
 
-(define_insn_reservation "thunderx2t99_fp_loadpair_basic" 4
-  (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_load1_2reg"))
-  "thunderx2t99_ls01*2")
-
 (define_insn_reservation "thunderx2t99_fp_store_basic" 1
   (and (eq_attr "tune" "thunderx2t99")
        (eq_attr "type" "f_stores,f_stored"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
-(define_insn_reservation "thunderx2t99_fp_storepair_basic" 1
-  (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_store1_2reg"))
-  "thunderx2t99_ls01,(thunderx2t99_ls01+thunderx2t99_sd),thunderx2t99_sd")
-
 ;; ASIMD integer instructions.
 
 (define_insn_reservation "thunderx2t99_asimd_int" 7
@@ -251,6 +237,7 @@
 			neon_arith_acc,neon_arith_acc_q,\
 			neon_abs,neon_abs_q,\
 			neon_add,neon_add_q,\
+			neon_sub,neon_sub_q,\
 			neon_neg,neon_neg_q,\
 			neon_add_long,neon_add_widen,\
 			neon_add_halve,neon_add_halve_q,\
@@ -301,11 +288,6 @@
        (eq_attr "type" "neon_logic,neon_logic_q"))
   "thunderx2t99_f01")
 
-(define_insn_reservation "thunderx2t99_asimd_polynomial" 5
-  (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_mul_d_long"))
-  "thunderx2t99_f01")
-
 ;; ASIMD floating-point instructions.
 
 (define_insn_reservation "thunderx2t99_asimd_fp_simple" 5
@@ -332,6 +314,7 @@
 			neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q,\
 			neon_fp_mul_s,neon_fp_mul_d,\
 			neon_fp_mul_s_q,neon_fp_mul_d_q,\
+			neon_fp_mul_s_scalar_q,neon_fp_mul_d_scalar_q,\
 			neon_fp_mla_s,neon_fp_mla_d,\
 			neon_fp_mla_s_q,neon_fp_mla_d_q"))
   "thunderx2t99_f01")
@@ -341,6 +324,8 @@
        (eq_attr "type" "neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q,\
 			neon_fp_to_int_s,neon_fp_to_int_d,\
 			neon_fp_to_int_s_q,neon_fp_to_int_d_q,\
+			neon_int_to_fp_s,neon_int_to_fp_d,\
+			neon_int_to_fp_s_q,neon_int_to_fp_d_q,\
 			neon_fp_round_s,neon_fp_round_d,\
 			neon_fp_round_s_q,neon_fp_round_d_q"))
   "thunderx2t99_f01")
@@ -373,7 +358,6 @@
 			neon_fp_recpx_s,neon_fp_recpx_d,\
 			neon_fp_recpx_s_q,neon_fp_recpx_d_q,\
 			neon_rev,neon_rev_q,\
-			neon_dup,neon_dup_q,\
 			neon_permute,neon_permute_q"))
   "thunderx2t99_f01")
 
@@ -381,13 +365,18 @@
   (and (eq_attr "tune" "thunderx2t99")
        (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q,\
 			neon_fp_recps_d,neon_fp_recps_d_q,\
+			neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
+			neon_fp_sqrt_d,neon_fp_sqrt_d_q,\
+			neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\
+			neon_fp_rsqrte_d, neon_fp_rsqrte_d_q,\
 			neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\
 			neon_fp_rsqrts_d, neon_fp_rsqrts_d_q"))
   "thunderx2t99_f01")
 
 (define_insn_reservation "thunderx2t99_asimd_lut" 8
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_tbl1,neon_tbl1_q,neon_tbl2_q"))
+       (eq_attr "type" "neon_tbl1,neon_tbl1_q,neon_tbl2,neon_tbl2_q,\
+			neon_tbl3,neon_tbl3_q,neon_tbl4,neon_tbl4_q"))
   "thunderx2t99_f01")
 
 (define_insn_reservation "thunderx2t99_asimd_elt_to_gr" 6
@@ -395,26 +384,24 @@
        (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
   "thunderx2t99_f01")
 
-(define_insn_reservation "thunderx2t99_asimd_ext" 7
-  (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_shift_imm_narrow_q,neon_sat_shift_imm_narrow_q"))
-  "thunderx2t99_f01")
-
 ;; ASIMD load instructions.
 
 ; NOTE: These reservations attempt to model latency and throughput correctly,
 ; but the cycle timing of unit allocation is not necessarily accurate (because
 ; insns are split into uops, and those may be issued out-of-order).
 
-(define_insn_reservation "thunderx2t99_asimd_load1_1_mult" 4
+(define_insn_reservation "thunderx2t99_asimd_load1_ldp" 5
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q"))
-  "thunderx2t99_ls01")
+       (eq_attr "type" "neon_ldp,neon_ldp_q"))
+  "thunderx2t99_ls01,thunderx2t99_i012")
 
-(define_insn_reservation "thunderx2t99_asimd_load1_2_mult" 4
+(define_insn_reservation "thunderx2t99_asimd_load1" 4
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
-  "thunderx2t99_ls_both")
+       (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\
+			neon_load1_2reg,neon_load1_2reg_q,\
+			neon_load1_3reg,neon_load1_3reg_q,\
+			neon_load1_4reg,neon_load1_4reg_q"))
+  "thunderx2t99_ls01")
 
 (define_insn_reservation "thunderx2t99_asimd_load1_onelane" 5
   (and (eq_attr "tune" "thunderx2t99")
@@ -431,36 +418,59 @@
        (eq_attr "type" "neon_load2_2reg,neon_load2_2reg_q,\
 			neon_load2_one_lane,neon_load2_one_lane_q,\
 			neon_load2_all_lanes,neon_load2_all_lanes_q"))
-  "(thunderx2t99_l0delay,thunderx2t99_f01)|(thunderx2t99_l1delay,\
-    thunderx2t99_f01)")
+  "thunderx2t99_l01delay,thunderx2t99_f01")
+
+(define_insn_reservation "thunderx2t99_asimd_load3" 7
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+			neon_load3_one_lane,neon_load3_one_lane_q,\
+			neon_load3_all_lanes,neon_load3_all_lanes_q"))
+  "thunderx2t99_l01delay,thunderx2t99_f01")
+
+(define_insn_reservation "thunderx2t99_asimd_load4" 8
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q,\
+			neon_load4_one_lane,neon_load4_one_lane_q,\
+			neon_load4_all_lanes,neon_load4_all_lanes_q"))
+  "thunderx2t99_l01delay,thunderx2t99_f01")
 
 ;; ASIMD store instructions.
 
 ; Same note applies as for ASIMD load instructions.
 
-(define_insn_reservation "thunderx2t99_asimd_store1_1_mult" 1
+(define_insn_reservation "thunderx2t99_asimd_store_stp" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q"))
-  "thunderx2t99_ls01")
+       (eq_attr "type" "neon_stp,neon_stp_q"))
+  "thunderx2t99_ls01,thunderx2t99_sd")
 
-(define_insn_reservation "thunderx2t99_asimd_store1_2_mult" 1
+(define_insn_reservation "thunderx2t99_asimd_store1" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_store1_2reg,neon_store1_2reg_q"))
-  "thunderx2t99_ls_both")
+       (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,\
+			neon_store1_2reg,neon_store1_2reg_q,\
+			neon_store1_3reg,neon_store1_4reg"))
+  "thunderx2t99_ls01")
 
 (define_insn_reservation "thunderx2t99_asimd_store1_onelane" 1
   (and (eq_attr "tune" "thunderx2t99")
        (eq_attr "type" "neon_store1_one_lane,neon_store1_one_lane_q"))
   "thunderx2t99_ls01,thunderx2t99_f01")
 
-(define_insn_reservation "thunderx2t99_asimd_store2_mult" 1
+(define_insn_reservation "thunderx2t99_asimd_store2" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_store2_2reg,neon_store2_2reg_q"))
-  "thunderx2t99_ls_both,thunderx2t99_f01")
+       (eq_attr "type" "neon_store2_2reg,neon_store2_2reg_q,\
+			neon_store2_one_lane,neon_store2_one_lane_q"))
+  "thunderx2t99_ls01,thunderx2t99_f01")
+
+(define_insn_reservation "thunderx2t99_asimd_store3" 1
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "neon_store3_3reg,neon_store3_3reg_q,\
+			neon_store3_one_lane,neon_store3_one_lane_q"))
+  "thunderx2t99_ls01,thunderx2t99_f01")
 
-(define_insn_reservation "thunderx2t99_asimd_store2_onelane" 1
+(define_insn_reservation "thunderx2t99_asimd_store4" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_store2_one_lane,neon_store2_one_lane_q"))
+       (eq_attr "type" "neon_store4_4reg,neon_store4_4reg_q,\
+			neon_store4_one_lane,neon_store4_one_lane_q"))
   "thunderx2t99_ls01,thunderx2t99_f01")
 
 ;; Crypto extensions.

Reply via email to