Re: [RFC] [aarch64] Add HiSilicon tsv110 CPU support.

Kyrill Tkachov Wed, 23 May 2018 01:10:39 -0700


On 23/05/18 05:54, Zhangshaokun wrote:

Hi Kyrill,


On 2018/5/22 18:52, Kyrill Tkachov wrote:

Hi Shaokun,

On 22/05/18 09:40, Shaokun Zhang wrote:

This patch adds HiSilicon's an mcpu: tsv110.

---
  gcc/ChangeLog                            |   9 +++
  gcc/config/aarch64/aarch64-cores.def     |   5 ++
  gcc/config/aarch64/aarch64-cost-tables.h | 103 +++++++++++++++++++++++++++++++
  gcc/config/aarch64/aarch64-tune.md       |   2 +-
  gcc/config/aarch64/aarch64.c             |  79 ++++++++++++++++++++++++
  gcc/doc/invoke.texi                      |   2 +-
  6 files changed, 198 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index cec2892..5d44966 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2018-05-22  Shaokun Zhang <zhangshao...@hisilicon.com>
+            Bo Zhou  <zbo.z...@hisilicon.com>
+
+       * config/aarch64/aarch64-cores.def (tsv110): New CPU.
+       * config/aarch64/aarch64-tune.md: Regenerated.
+       * doc/invoke.texi (AArch61 Options/-mtune): Add "tsv110".

typo: AArch64.

Good catch, my mistake.

+       * gcc/config/aarch64/aarch64.c (tsv110_tunings): New tuning table.
+       * gcc/config/aarch64/aarch64-cost-tables.h: Add "tsv110" extra costs.

Please start the path with config/.

Sure, Will remove gcc/ next version.

+
  2018-05-21  Michael Meissner <meiss...@linux.ibm.com>

          PR target/85657
diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 33b96ca..db7a412 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -91,6 +91,11 @@ AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2
  /* Qualcomm ('Q') cores. */
  AARCH64_CORE("saphira",     saphira,    falkor,    8_3A, 
AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   0x51, 0xC01, -1)

+/* ARMv8.4-A Architecture Processors.  */
+
+/* HiSilicon ('H') cores. */
+AARCH64_CORE("tsv110",     tsv110,    tsv110,    8_4A, AARCH64_FL_FOR_ARCH8_4 
| AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 
0xd01, -1)
+

The third field is the scheduler model to use when optimising.
Since there is no tsv110 scheduling model, using the name "tsv110"
in the third field will generally give pretty poor schedules.
I recommend you specify an scheduling model that most closely matches your core
for the time being. But I don't think it's required and I wouldn't let it hold

I checked it again, cortexa57 is most closely matches tsv110 and thanks your
suggestion.
If i choose cortexa57, can i add the tsv110_tunings which will use tsv110's
pipeline features, like the rest patch as follow or only use generic feature?


If you use cortexa57 for the scheduling model (the 3rd field) you should still
use tsv110_tunings in the 6th field as this will specify other important 
parameters
like instruction selection costs, fusion capabilities, alignment requirements 
etc.

Thanks,
Kyrill

up the patch.

You'll need approval from an aarch64 maintainer (cc'ed some for you).

Good, thanks for your nice guidance.

Thanks,
Shaokun

Thanks,
Kyrill

  /* ARMv8-A big.LITTLE implementations.  */

  AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  
AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 
0xd03), -1)
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h 
b/gcc/config/aarch64/aarch64-cost-tables.h
index a455c62..b6890d6 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -334,4 +334,107 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
    }
  };

+const struct cpu_cost_table tsv110_extra_costs =
+{
+  /* ALU */
+  {
+    0,                 /* arith.  */
+    0,                 /* logical.  */
+    0,                 /* shift.  */
+    0,                 /* shift_reg.  */
+    COSTS_N_INSNS (1), /* arith_shift.  */
+    COSTS_N_INSNS (1), /* arith_shift_reg.  */
+    COSTS_N_INSNS (1), /* log_shift.  */
+    COSTS_N_INSNS (1), /* log_shift_reg.  */
+    0,                 /* extend.  */
+    COSTS_N_INSNS (1), /* extend_arith.  */
+    0,                 /* bfi.  */
+    0,                 /* bfx.  */
+    0,                 /* clz.  */
+    0,                /* rev.  */
+    0,                 /* non_exec.  */
+    true               /* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (2),       /* simple.  */
+      COSTS_N_INSNS (2),       /* flag_setting.  */
+      COSTS_N_INSNS (2),       /* extend.  */
+      COSTS_N_INSNS (2),       /* add.  */
+      COSTS_N_INSNS (2),       /* extend_add.  */
+      COSTS_N_INSNS (11)       /* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (3),       /* simple.  */
+      0,                       /* flag_setting (N/A).  */
+      COSTS_N_INSNS (3),       /* extend.  */
+      COSTS_N_INSNS (3),       /* add.  */
+      COSTS_N_INSNS (3),       /* extend_add.  */
+      COSTS_N_INSNS (19)       /* idiv.  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (3),         /* load.  */
+    COSTS_N_INSNS (4),         /* load_sign_extend.  */
+    COSTS_N_INSNS (3),         /* ldrd.  */
+    COSTS_N_INSNS (3),         /* ldm_1st.  */
+    1,                         /* ldm_regs_per_insn_1st. */
+    2,                         /* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (4),         /* loadf.  */
+    COSTS_N_INSNS (4),         /* loadd.  */
+    COSTS_N_INSNS (4),         /* load_unaligned.  */
+    0,                         /* store.  */
+    0,                         /* strd.  */
+    0,                         /* stm_1st.  */
+    1,                         /* stm_regs_per_insn_1st. */
+    2,                         /* stm_regs_per_insn_subsequent.  */
+    0,                         /* storef.  */
+    0,                         /* stored.  */
+    COSTS_N_INSNS (1),         /* store_unaligned.  */
+    COSTS_N_INSNS (4),         /* loadv.  */
+    COSTS_N_INSNS (4)          /* storev.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (10),      /* div.  */
+      COSTS_N_INSNS (4),       /* mult.  */
+      COSTS_N_INSNS (4),       /* mult_addsub.  */
+      COSTS_N_INSNS (4),       /* fma.  */
+      COSTS_N_INSNS (4),       /* addsub.  */
+      COSTS_N_INSNS (1),       /* fpconst.  */
+      COSTS_N_INSNS (1),       /* neg.  */
+      COSTS_N_INSNS (1),       /* compare.  */
+      COSTS_N_INSNS (2),       /* widen.  */
+      COSTS_N_INSNS (2),       /* narrow.  */
+      COSTS_N_INSNS (2),       /* toint.  */
+      COSTS_N_INSNS (1),       /* fromint.  */
+      COSTS_N_INSNS (2)        /* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (17),      /* div.  */
+      COSTS_N_INSNS (4),       /* mult.  */
+      COSTS_N_INSNS (6),       /* mult_addsub.  */
+      COSTS_N_INSNS (6),       /* fma.  */
+      COSTS_N_INSNS (3),       /* addsub.  */
+      COSTS_N_INSNS (1),       /* fpconst.  */
+      COSTS_N_INSTS (1),       /* neg.  */
+      COSTS_N_INSTS (1),       /* compare.  */
+      COSTS_N_INSNS (2),       /* widen.  */
+      COSTS_N_INSNS (2),       /* narrow.  */
+      COSTS_N_INSNS (2),       /* toint.  */
+      COSTS_N_INSNS (1),       /* fromint.  */
+      COSTS_N_INSNS (2)        /* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)  /* alu.  */
+  }
+};
+
  #endif
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 7b3a746..a10f2e7 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
  ;; -*- buffer-read-only: t -*-
  ;; Generated automatically by gentune.sh from aarch64-cores.def
  (define_attr "tune"
- 
"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
+ 
"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,tsv110,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
          (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 6bf6c05..0788c14 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -266,6 +266,22 @@ static const struct cpu_addrcost_table 
generic_addrcost_table =
    0 /* imm_offset  */
  };

+static const struct cpu_addrcost_table tsv110_addrcost_table =
+{
+    {
+      1, /* hi  */
+      0, /* si  */
+      0, /* di  */
+      1, /* ti  */
+    },
+  0, /* pre_modify  */
+  0, /* post_modify  */
+  0, /* register_offset  */
+  1, /* register_sextend  */
+  1, /* register_zextend  */
+  0 /* imm_offset  */
+};
+
  static const struct cpu_addrcost_table exynosm1_addrcost_table =
  {
      {
@@ -344,6 +360,16 @@ static const struct cpu_regmove_cost 
cortexa53_regmove_cost =
    2 /* FP2FP  */
  };

+static const struct cpu_regmove_cost tsv110_regmove_cost =
+{
+  1, /* GP2GP  */
+  /* Avoid the use of slow int<->fp moves for spilling by setting
+     their cost higher than memmov_cost.  */
+  2, /* GP2FP  */
+  3, /* FP2GP  */
+  2  /* FP2FP  */
+};
+
  static const struct cpu_regmove_cost exynosm1_regmove_cost =
  {
    1, /* GP2GP  */
@@ -450,6 +476,25 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
    1 /* cond_not_taken_branch_cost  */
  };

+static const struct cpu_vector_cost tsv110_vector_cost =
+{
+  1, /* scalar_int_stmt_cost  */
+  1, /* scalar_fp_stmt_cost  */
+  5, /* scalar_load_cost  */
+  1, /* scalar_store_cost  */
+  2, /* vec_int_stmt_cost  */
+  2, /* vec_fp_stmt_cost  */
+  2, /* vec_permute_cost  */
+  3, /* vec_to_scalar_cost  */
+  2, /* scalar_to_vec_cost  */
+  5, /* vec_align_load_cost  */
+  5, /* vec_unalign_load_cost  */
+  1, /* vec_unalign_store_cost  */
+  1, /* vec_store_cost  */
+  1, /* cond_taken_branch_cost  */
+  1 /* cond_not_taken_branch_cost  */
+};
+
  static const struct cpu_vector_cost exynosm1_vector_cost =
  {
    1, /* scalar_int_stmt_cost  */
@@ -550,6 +595,15 @@ static const cpu_prefetch_tune generic_prefetch_tune =
    -1                   /* default_opt_level  */
  };

+static const cpu_prefetch_tune tsv110_prefetch_tune =
+{
+  0,                   /* num_slots  */
+  64,                  /* l1_cache_size  */
+  64,                  /* l1_cache_line_size  */
+  512,                 /* l2_cache_size  */
+  -1                   /* default_opt_level  */
+};
+
  static const cpu_prefetch_tune exynosm1_prefetch_tune =
  {
    0,                   /* num_slots  */
@@ -751,6 +805,31 @@ static const struct tune_params cortexa73_tunings =
  };


+static const struct tune_params tsv110_tunings =
+{
+  &tsv110_extra_costs,
+  &tsv110_addrcost_table,
+  &tsv110_regmove_cost,
+  &tsv110_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  4, /* memmov_cost  */
+  4, /* issue_rate  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
+   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
+  16,  /* function_align.  */
+  4,   /* jump_align.  */
+  8,   /* loop_align.  */
+  2,   /* int_reassoc_width.  */
+  4,   /* fp_reassoc_width.  */
+  1,   /* vec_reassoc_width.  */
+  2,   /* min_div_recip_mul_sf.  */
+  2,   /* min_div_recip_mul_df.  */
+  0,   /* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE),   /* tune_flags.  */
+  &tsv110_prefetch_tune
+};

  static const struct tune_params exynosm1_tunings =
  {
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index beba295..55fcd42 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14713,7 +14713,7 @@ performance of the code. Permissible values for this 
option are:
  @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
  @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
  @samp{exynos-m1}, @samp{falkor}, @samp{qdf24xx}, @samp{saphira},
-@samp{xgene1}, @samp{vulcan}, @samp{thunderx},
+@samp{xgene1}, @samp{vulcan}, @samp{thunderx}, @samp{tsv110},
  @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
  @samp{thunderxt83}, @samp{thunderx2t99}, @samp{cortex-a57.cortex-a53},
  @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35},
--
2.7.4

Re: [RFC] [aarch64] Add HiSilicon tsv110 CPU support.

Reply via email to