Re: [PATCH] AArch64: Add cost table for Cortex-A76

2020-11-18 Thread Richard Earnshaw via Gcc-patches
On 18/11/2020 14:55, Wilco Dijkstra via Gcc-patches wrote:
> Add an initial cost table for Cortex-A76 - this is copied from
> cotexa57_extra_costs but updates it based on the Optimization Guide.
> Use the new cost table on all Neoverse tunings and ensure the tunings
> are consistent for all.  As a result more compact code is generated
> with more combined shift+alu operations. Eg. -mcpu=cortex-a76 will now
> merge the shifts in:
> 
> int f(int x, int y) { return (x & y << 3) * (x | y << 3); }
> 
> and  w2, w0, w1, lsl 3
> orr  w0, w0, w1, lsl 3
> mul  w0, w2, w0
> ret
> 
> SPEC2017 codesize improves by 0.02% and SPECINT2017 shows 0.24% gain.
> 
> Bootstrap OK, regress passes, OK for commit?
> 
> ChangeLog:
> 2020-11-18  Wilco Dijkstra  
> 
> * config/aarch64/aarch64.c (neoversen1_tunings): Use new
> cortexa76_extra_costs.
> (neoversev1_tunings): Likewise.
> (neoversen2_tunines): Likewise.
> * config/arm/aarch-cost-tables.h (cortexa76_extra_costs):
> add new costs.
> 
> ---
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> 6bf2f9aa344f9150dec72db660d951e50521285c..65ff49d2b4125013466f90a54ff698ae810580f0
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -1312,7 +1312,7 @@ static const struct tune_params thunderx3t110_tunings =
>  
>  static const struct tune_params neoversen1_tunings =
>  {
> -  _extra_costs,
> +  _extra_costs,
>_addrcost_table,
>_regmove_cost,
>_vector_cost,
> @@ -1338,7 +1338,7 @@ static const struct tune_params neoversen1_tunings =
>  
>  static const struct tune_params neoversev1_tunings =
>  {
> -  _extra_costs,
> +  _extra_costs,
>_addrcost_table,
>_regmove_cost,
>_vector_cost,
> @@ -1364,7 +1364,7 @@ static const struct tune_params neoversev1_tunings =
>  
>  static const struct tune_params neoversen2_tunings =
>  {
> -  _extra_costs,
> +  _extra_costs,
>_addrcost_table,
>_regmove_cost,
>_vector_cost,
> diff --git a/gcc/config/arm/aarch-cost-tables.h 
> b/gcc/config/arm/aarch-cost-tables.h
> index 
> cf8186599018cc5e51cf44e4f2080a502d895e1d..1b9d53d07b54bddf1767121236b06d2b4581631c
>  100644
> --- a/gcc/config/arm/aarch-cost-tables.h
> +++ b/gcc/config/arm/aarch-cost-tables.h
> @@ -331,6 +331,109 @@ const struct cpu_cost_table cortexa57_extra_costs =
>}
>  };
>  
> +const struct cpu_cost_table cortexa76_extra_costs =
> +{
> +  /* ALU */
> +  {
> +0, /* arith.  */
> +0, /* logical.  */
> +0, /* shift.  */
> +0,  /* shift_reg.  */
> +COSTS_N_INSNS (1), /* arith_shift.  */
> +COSTS_N_INSNS (1), /* arith_shift_reg.  */
> +0,  /* log_shift.  */
> +COSTS_N_INSNS (1), /* log_shift_reg.  */
> +0, /* extend.  */
> +COSTS_N_INSNS (1), /* extend_arith.  */
> +COSTS_N_INSNS (1), /* bfi.  */
> +0, /* bfx.  */
> +0, /* clz.  */
> +0,  /* rev.  */
> +0, /* non_exec.  */
> +true   /* non_exec_costs_exec.  */
> +  },
> +  {
> +/* MULT SImode */
> +{
> +  COSTS_N_INSNS (1),   /* simple.  */
> +  COSTS_N_INSNS (2),   /* flag_setting.  */
> +  COSTS_N_INSNS (1),   /* extend.  */
> +  COSTS_N_INSNS (1),   /* add.  */
> +  COSTS_N_INSNS (1),   /* extend_add.  */
> +  COSTS_N_INSNS (6) /* idiv.  */
> +},
> +/* MULT DImode */
> +{
> +  COSTS_N_INSNS (3),   /* simple.  */
> +  0,   /* flag_setting (N/A).  */
> +  COSTS_N_INSNS (1),   /* extend.  */
> +  COSTS_N_INSNS (3),   /* add.  */
> +  COSTS_N_INSNS (1),   /* extend_add.  */
> +  COSTS_N_INSNS (10)   /* idiv.  */
> +}
> +  },
> +  /* LD/ST */
> +  {
> +COSTS_N_INSNS (3), /* load.  */
> +COSTS_N_INSNS (3), /* load_sign_extend.  */
> +COSTS_N_INSNS (3), /* ldrd.  */
> +COSTS_N_INSNS (2), /* ldm_1st.  */
> +1, /* ldm_regs_per_insn_1st.  */
> +2, /* ldm_regs_per_insn_subsequent.  */
> +COSTS_N_INSNS (4), /* loadf.  */
> +COSTS_N_INSNS (4), /* loadd.  */
> +COSTS_N_INSNS (5), /* load_unaligned.  */
> +0, /* store.  */
> +0, /* strd.  */
> +0, /* stm_1st.  */
> +1, /* stm_regs_per_insn_1st.  */
> +2, /* stm_regs_per_insn_subsequent.  */
> +0, /* storef.  */
> +0, /* stored.  */
> +COSTS_N_INSNS (1), /* store_unaligned.  */
> +COSTS_N_INSNS (1), /* loadv.  */
> +COSTS_N_INSNS (1)  /* storev.  */
> +  },
> +  {
> +/* FP SFmode */
> +{
> +  

[PATCH] AArch64: Add cost table for Cortex-A76

2020-11-18 Thread Wilco Dijkstra via Gcc-patches
Add an initial cost table for Cortex-A76 - this is copied from
cotexa57_extra_costs but updates it based on the Optimization Guide.
Use the new cost table on all Neoverse tunings and ensure the tunings
are consistent for all.  As a result more compact code is generated
with more combined shift+alu operations. Eg. -mcpu=cortex-a76 will now
merge the shifts in:

int f(int x, int y) { return (x & y << 3) * (x | y << 3); }

and  w2, w0, w1, lsl 3
orr  w0, w0, w1, lsl 3
mul  w0, w2, w0
ret

SPEC2017 codesize improves by 0.02% and SPECINT2017 shows 0.24% gain.

Bootstrap OK, regress passes, OK for commit?

ChangeLog:
2020-11-18  Wilco Dijkstra  

* config/aarch64/aarch64.c (neoversen1_tunings): Use new
cortexa76_extra_costs.
(neoversev1_tunings): Likewise.
(neoversen2_tunines): Likewise.
* config/arm/aarch-cost-tables.h (cortexa76_extra_costs):
add new costs.

---
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
6bf2f9aa344f9150dec72db660d951e50521285c..65ff49d2b4125013466f90a54ff698ae810580f0
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1312,7 +1312,7 @@ static const struct tune_params thunderx3t110_tunings =
 
 static const struct tune_params neoversen1_tunings =
 {
-  _extra_costs,
+  _extra_costs,
   _addrcost_table,
   _regmove_cost,
   _vector_cost,
@@ -1338,7 +1338,7 @@ static const struct tune_params neoversen1_tunings =
 
 static const struct tune_params neoversev1_tunings =
 {
-  _extra_costs,
+  _extra_costs,
   _addrcost_table,
   _regmove_cost,
   _vector_cost,
@@ -1364,7 +1364,7 @@ static const struct tune_params neoversev1_tunings =
 
 static const struct tune_params neoversen2_tunings =
 {
-  _extra_costs,
+  _extra_costs,
   _addrcost_table,
   _regmove_cost,
   _vector_cost,
diff --git a/gcc/config/arm/aarch-cost-tables.h 
b/gcc/config/arm/aarch-cost-tables.h
index 
cf8186599018cc5e51cf44e4f2080a502d895e1d..1b9d53d07b54bddf1767121236b06d2b4581631c
 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -331,6 +331,109 @@ const struct cpu_cost_table cortexa57_extra_costs =
   }
 };
 
+const struct cpu_cost_table cortexa76_extra_costs =
+{
+  /* ALU */
+  {
+0, /* arith.  */
+0, /* logical.  */
+0, /* shift.  */
+0,  /* shift_reg.  */
+COSTS_N_INSNS (1), /* arith_shift.  */
+COSTS_N_INSNS (1), /* arith_shift_reg.  */
+0,/* log_shift.  */
+COSTS_N_INSNS (1), /* log_shift_reg.  */
+0, /* extend.  */
+COSTS_N_INSNS (1), /* extend_arith.  */
+COSTS_N_INSNS (1), /* bfi.  */
+0, /* bfx.  */
+0, /* clz.  */
+0,  /* rev.  */
+0, /* non_exec.  */
+true   /* non_exec_costs_exec.  */
+  },
+  {
+/* MULT SImode */
+{
+  COSTS_N_INSNS (1),   /* simple.  */
+  COSTS_N_INSNS (2),   /* flag_setting.  */
+  COSTS_N_INSNS (1),   /* extend.  */
+  COSTS_N_INSNS (1),   /* add.  */
+  COSTS_N_INSNS (1),   /* extend_add.  */
+  COSTS_N_INSNS (6)   /* idiv.  */
+},
+/* MULT DImode */
+{
+  COSTS_N_INSNS (3),   /* simple.  */
+  0,   /* flag_setting (N/A).  */
+  COSTS_N_INSNS (1),   /* extend.  */
+  COSTS_N_INSNS (3),   /* add.  */
+  COSTS_N_INSNS (1),   /* extend_add.  */
+  COSTS_N_INSNS (10)   /* idiv.  */
+}
+  },
+  /* LD/ST */
+  {
+COSTS_N_INSNS (3), /* load.  */
+COSTS_N_INSNS (3), /* load_sign_extend.  */
+COSTS_N_INSNS (3), /* ldrd.  */
+COSTS_N_INSNS (2), /* ldm_1st.  */
+1, /* ldm_regs_per_insn_1st.  */
+2, /* ldm_regs_per_insn_subsequent.  */
+COSTS_N_INSNS (4), /* loadf.  */
+COSTS_N_INSNS (4), /* loadd.  */
+COSTS_N_INSNS (5), /* load_unaligned.  */
+0, /* store.  */
+0, /* strd.  */
+0, /* stm_1st.  */
+1, /* stm_regs_per_insn_1st.  */
+2, /* stm_regs_per_insn_subsequent.  */
+0, /* storef.  */
+0, /* stored.  */
+COSTS_N_INSNS (1), /* store_unaligned.  */
+COSTS_N_INSNS (1), /* loadv.  */
+COSTS_N_INSNS (1)  /* storev.  */
+  },
+  {
+/* FP SFmode */
+{
+  COSTS_N_INSNS (10),  /* div.  */
+  COSTS_N_INSNS (2),   /* mult.  */
+  COSTS_N_INSNS (3),   /* mult_addsub.  */
+  COSTS_N_INSNS (3),   /* fma.  */
+  COSTS_N_INSNS (1),   /* addsub.  */
+  0,   /* fpconst.  */
+  0,   /* neg.  */
+  0,