Re: Add znver2 scheduler model

2019-07-30 Thread Jan Hubicka
> Hi,
> this patch adds znver2 scheduler model. Znver2 is close enough to znver1
> that I have decided to implement it in one automaton.  The main
> difference is extra AGU unit that seems to be used for store only
> (according to CPU diagram), the fact that 256bit vector operations are
> no longer split (and thus they behave like 128bit) and reduced latency
> of fp multiply and conversion operations.
> 
> The patch seems to have very little effect on overall performance but
> since we do not model the out of order core and thus we think that the
> CPU is mostly starved by not having enough parallelism to exectue.
> Still it is better to be precise.
> 
> Bootstrapped/regtested x86_64-linux, commited.
> 
>   * common/config/i386/i386-common.c: Use PROCESSOR_ZNVER2 scheduler for
>   znver2.
>   * config/i386/znver1.md: Enable patterns for znver2 and add store
>   variants which use extra AGU unit.

Hello,
I have backported this patch to gcc 9 branch now too.

Honza


Add znver2 scheduler model

2019-07-23 Thread Jan Hubicka
Hi,
this patch adds znver2 scheduler model. Znver2 is close enough to znver1
that I have decided to implement it in one automaton.  The main
difference is extra AGU unit that seems to be used for store only
(according to CPU diagram), the fact that 256bit vector operations are
no longer split (and thus they behave like 128bit) and reduced latency
of fp multiply and conversion operations.

The patch seems to have very little effect on overall performance but
since we do not model the out of order core and thus we think that the
CPU is mostly starved by not having enough parallelism to exectue.
Still it is better to be precise.

Bootstrapped/regtested x86_64-linux, commited.

* common/config/i386/i386-common.c: Use PROCESSOR_ZNVER2 scheduler for
znver2.
* config/i386/znver1.md: Enable patterns for znver2 and add store
variants which use extra AGU unit.
Index: common/config/i386/i386-common.c
===
--- common/config/i386/i386-common.c(revision 273727)
+++ common/config/i386/i386-common.c(working copy)
@@ -1760,7 +1760,7 @@ const pta processor_alias_table[] =
   | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
   | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
   | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
-  {"znver2", PROCESSOR_ZNVER2, CPU_ZNVER1,
+  {"znver2", PROCESSOR_ZNVER2, CPU_ZNVER2,
 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
   | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
   | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
Index: config/i386/znver1.md
===
--- config/i386/znver1.md   (revision 273727)
+++ config/i386/znver1.md   (working copy)
@@ -17,10 +17,11 @@
 ;; .
 ;;
 
 (define_attr "znver1_decode" "direct,vector,double"
   (const_string "direct"))
 
-;; AMD znver1 Scheduling
+;; AMD znver1 and znver2 Scheduling
 ;; Modeling automatons for zen decoders, integer execution pipes,
 ;; AGU pipes and floating point execution units.
 (define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu")
@@ -51,13 +52,21 @@
 (define_cpu_unit "znver1-ieu3" "znver1_ieu")
 (define_reservation "znver1-ieu" 
"znver1-ieu0|znver1-ieu1|znver1-ieu2|znver1-ieu3")
 
-;; 2 AGU pipes.
+;; 2 AGU pipes in znver1 and 3 AGU pipes in znver2
+;; According to CPU diagram last AGU unit is used only for stores.
 (define_cpu_unit "znver1-agu0" "znver1_agu")
 (define_cpu_unit "znver1-agu1" "znver1_agu")
+(define_cpu_unit "znver2-agu2" "znver1_agu")
 (define_reservation "znver1-agu-reserve" "znver1-agu0|znver1-agu1")
+(define_reservation "znver2-store-agu-reserve" 
"znver1-agu0|znver1-agu1|znver2-agu2")
 
+;; Load is 4 cycles. We do not model reservation of load unit.
+;;(define_reservation "znver1-load" "znver1-agu-reserve, nothing, nothing, 
nothing")
 (define_reservation "znver1-load" "znver1-agu-reserve")
+;; Store operations differs between znver1 and znver2 because extra AGU
+;; was added.
 (define_reservation "znver1-store" "znver1-agu-reserve")
+(define_reservation "znver2-store" "znver2-store-agu-reserve")
 
 ;; vectorpath (microcoded) instructions are single issue instructions.
 ;; So, they occupy all the integer units.
@@ -65,6 +74,9 @@
  +znver1-ieu2+znver1-ieu3
  +znver1-agu0+znver1-agu1")
 
+(define_reservation "znver2-ivector" "znver1-ieu0+znver1-ieu1
+ +znver1-ieu2+znver1-ieu3
+ +znver1-agu0+znver1-agu1+znver2-agu2")
 ;; Floating point unit 4 FP pipes.
 (define_cpu_unit "znver1-fp0" "znver1_fp")
 (define_cpu_unit "znver1-fp1" "znver1_fp")
@@ -76,6 +88,9 @@
 (define_reservation "znver1-fvector" "znver1-fp0+znver1-fp1
  +znver1-fp2+znver1-fp3
  +znver1-agu0+znver1-agu1")
+(define_reservation "znver2-fvector" "znver1-fp0+znver1-fp1
+ +znver1-fp2+znver1-fp3
+ +znver1-agu0+znver1-agu1+znver2-agu2")
 
 ;; Call instruction
 (define_insn_reservation "znver1_call" 1
@@ -83,27 +98,36 @@
  (eq_attr "type" "call,callv"))
 "znver1-double,znver1-store,znver1-ieu0|znver1-ieu3")
 
+(define_insn_reservation "znver2_call" 1
+(and (eq_attr "cpu" "znver2")
+ (eq_attr "type" "call,callv"))
+"znver1-double,znver2-store,znver1-ieu0|znver1-ieu3")
+
 ;; General instructions
 (define_insn_reservation "znver1_push" 1
 (and (eq_attr "cpu" "znver1")
  (and (eq_attr "type" "push")
-  (eq_attr "memory" "none,unknown")))
+  (eq_attr "memory" "store")))