I would like to submit this pipeline description but I'm unsure about
whether adding options that only influences 1 specific mtune is
acceptable.

You could argue for creating separate mtunes for each option but I'm not
willing to do that because there will be more configurable parts in the
future (e.g. -mfpu).  That would result in a combinatorial explosion.

Making the option apply to any pipelines seems unnecessarily invasive.

Any input on this is appreciated!

---8<---

This patch introduces the pipeline description for the Synopsys RMX-100
series processor to the RISC-V GCC backend.  The RMX-100 has a short,
three-stage, in-order execution pipeline with configurable multiply
unit options.

The option -mmpy-option was added to control which version of the MPY
unit the core has and what the latency of multiply instructions should
be similar to ARCv2 cores (see gcc/config/arc/arc.opt:60).

gcc/ChangeLog:

        * config/riscv/riscv-cores.def (RISCV_TUNE): Add
          arc-v-rmx-100-series.
        * config/riscv/riscv-opts.h (enum riscv_microarchitecture_type):
          Add arcv_rmx100.
        (enum arcv_mpy_option_enum): New enum for ARC-V multiply
        options.
        * config/riscv/riscv-protos.h (arcv_mpy_1c_bypass_p): New
          declaration.
        (arcv_mpy_2c_bypass_p): New declaration.
        (arcv_mpy_10c_bypass_p): New declaration.
        * config/riscv/riscv.cc (arcv_mpy_1c_bypass_p): New function.
        (arcv_mpy_2c_bypass_p): New function.
        (arcv_mpy_10c_bypass_p): New function.
        * config/riscv/riscv.md: Add arcv_rmx100.
        * config/riscv/riscv.opt: New option for RMX-100 multiply unit
          configuration
        * doc/riscv-mtune.texi: Document arc-v-rmx-100-series.
        * config/riscv/arcv-rmx100.md: New file.

Authored-by: Artemiy Volkov <[email protected]>
Co-authored-by: Michiel Derhaeg <[email protected]>
Signed-off-by: Luis Silva <[email protected]>
---
 gcc/config/riscv/arcv-rmx100.md  | 104 +++++++++++++++++++++++++++++++
 gcc/config/riscv/riscv-cores.def |   1 +
 gcc/config/riscv/riscv-opts.h    |   8 +++
 gcc/config/riscv/riscv-protos.h  |   4 ++
 gcc/config/riscv/riscv.cc        |  49 +++++++++++++++
 gcc/config/riscv/riscv.md        |   4 +-
 gcc/config/riscv/riscv.opt       |  17 +++++
 gcc/doc/riscv-mtune.texi         |   2 +
 8 files changed, 188 insertions(+), 1 deletion(-)
 create mode 100644 gcc/config/riscv/arcv-rmx100.md

diff --git a/gcc/config/riscv/arcv-rmx100.md b/gcc/config/riscv/arcv-rmx100.md
new file mode 100644
index 00000000000..bd6423b9a28
--- /dev/null
+++ b/gcc/config/riscv/arcv-rmx100.md
@@ -0,0 +1,104 @@
+;; DFA scheduling description of the Synopsys RMX-100 cpu
+;; for GNU C compiler
+;; Copyright (C) 2025 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "arcv_rmx100")
+
+(define_cpu_unit "arcv_rmx100_ALU"    "arcv_rmx100")
+(define_cpu_unit "arcv_rmx100_FPU"    "arcv_rmx100")
+(define_cpu_unit "arcv_rmx100_MPY"    "arcv_rmx100")
+(define_cpu_unit "arcv_rmx100_DIV"    "arcv_rmx100")
+(define_cpu_unit "arcv_rmx100_DMP"    "arcv_rmx100")
+
+;; Instruction reservation for arithmetic instructions.
+(define_insn_reservation "arcv_rmx100_alu_arith" 1
+  (and (eq_attr "tune" "arcv_rmx100")
+       (eq_attr "type" "unknown, const, arith, shift, slt, multi, auipc, nop,
+                       logical, move, atomic, mvpair, bitmanip, clz, ctz, cpop,
+                       zicond, condmove, clmul, min, max, minu, maxu, rotate"))
+  "arcv_rmx100_ALU")
+
+(define_insn_reservation "arcv_rmx100_jmp_insn" 1
+  (and (eq_attr "tune" "arcv_rmx100")
+       (eq_attr "type" "branch, jump, call, jalr, ret, trap"))
+  "arcv_rmx100_ALU")
+
+; DIV insn: latency may be overridden by a define_bypass
+(define_insn_reservation "arcv_rmx100_div_insn" 35
+  (and (eq_attr "tune" "arcv_rmx100")
+       (eq_attr "type" "idiv"))
+  "arcv_rmx100_DIV*35")
+
+; MPY insn: latency may be overridden by a define_bypass
+(define_insn_reservation "arcv_rmx100_mpy32_insn" 9
+  (and (eq_attr "tune" "arcv_rmx100")
+       (eq_attr "type" "imul"))
+  "arcv_rmx100_MPY")
+
+(define_insn_reservation "arcv_rmx100_load_insn" 3
+  (and (eq_attr "tune" "arcv_rmx100")
+       (eq_attr "type" "load"))
+  "arcv_rmx100_DMP,nothing*2")
+
+(define_insn_reservation "arcv_rmx100_store_insn" 1
+  (and (eq_attr "tune" "arcv_rmx100")
+       (eq_attr "type" "store,fpstore"))
+  "arcv_rmx100_DMP")
+
+;; FPU scheduling.  FIXME: This is based on the "fast" unit for now, the "slow"
+;; option remains to be implemented later (together with the -mfpu flag).
+
+(define_insn_reservation "arcv_rmx100_fpload_insn" 3
+  (and (eq_attr "tune" "arcv_rmx100")
+       (eq_attr "type" "fpload"))
+  "arcv_rmx100_DMP,nothing*2")
+
+(define_insn_reservation "arcv_rmx100_farith_insn" 2
+  (and (eq_attr "tune" "arcv_rmx100")
+       (eq_attr "type" "fadd,fcmp"))
+  "arcv_rmx100_FPU,nothing")
+
+(define_insn_reservation "arcv_rmx100_xfer" 1
+  (and (eq_attr "tune" "arcv_rmx100")
+       (eq_attr "type" "fmove,mtc,mfc,fcvt,fcvt_f2i,fcvt_i2f"))
+   "arcv_rmx100_FPU")
+
+(define_insn_reservation "arcv_rmx100_fmul_insn" 2
+  (and (eq_attr "tune" "arcv_rmx100")
+       (eq_attr "type" "fmul"))
+  "arcv_rmx100_FPU,nothing")
+
+(define_insn_reservation "arcv_rmx100_fmac_insn" 2
+  (and (eq_attr "tune" "arcv_rmx100")
+       (eq_attr "type" "fmadd"))
+  "arcv_rmx100_FPU,nothing")
+
+(define_insn_reservation "arcv_rmx100_fdiv_insn" 10
+  (and (eq_attr "tune" "arcv_rmx100")
+       (eq_attr "type" "fdiv,fsqrt"))
+  "arcv_rmx100_FPU")
+
+
+(define_bypass 1 "arcv_rmx100_mpy32_insn"
+  "arcv_rmx100_*" "arcv_mpy_1c_bypass_p")
+(define_bypass 2 "arcv_rmx100_mpy32_insn"
+  "arcv_rmx100_*" "arcv_mpy_2c_bypass_p")
+
+(define_bypass 9 "arcv_rmx100_div_insn" "arcv_rmx100_*" "arcv_mpy_1c_bypass_p")
+(define_bypass 9 "arcv_rmx100_div_insn" "arcv_rmx100_*" "arcv_mpy_2c_bypass_p")
diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index 7266b5eac11..e1287121101 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -51,6 +51,7 @@ RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info)
 RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info)
 RISCV_TUNE("xiangshan-kunminghu", xiangshan, generic_ooo_tune_info)
 RISCV_TUNE("spacemit-x60", spacemit_x60, spacemit_x60_tune_info)
+RISCV_TUNE("arc-v-rmx-100-series", arcv_rmx100, arcv_rmx100_tune_info)
 RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info)
 RISCV_TUNE("size", generic, optimize_size_tune_info)
 RISCV_TUNE("mips-p8700", mips_p8700, mips_p8700_tune_info)
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 9b92a965e27..bcfc7a642bc 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -65,6 +65,7 @@ enum riscv_microarchitecture_type {
   andes_23_series,
   andes_45_series,
   spacemit_x60,
+  arcv_rmx100,
 };
 extern enum riscv_microarchitecture_type riscv_microarchitecture;
 
@@ -89,6 +90,13 @@ enum rvv_max_lmul_enum {
   RVV_DYNAMIC = 9
 };
 
+/* ARC-V multiply option.  */
+enum arcv_mpy_option_enum {
+  ARCV_MPY_OPTION_1C = 1,
+  ARCV_MPY_OPTION_2C = 2,
+  ARCV_MPY_OPTION_10C = 8,
+};
+
 enum riscv_multilib_select_kind {
   /* Select multilib by builtin way.  */
   select_by_builtin,
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index abf9df77891..fa5d906d205 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -839,6 +839,10 @@ extern const char *th_output_move (rtx, rtx);
 extern bool th_print_operand_address (FILE *, machine_mode, rtx);
 #endif
 
+extern bool arcv_mpy_1c_bypass_p (rtx_insn *, rtx_insn *);
+extern bool arcv_mpy_2c_bypass_p (rtx_insn *, rtx_insn *);
+extern bool arcv_mpy_10c_bypass_p (rtx_insn *, rtx_insn *);
+
 extern bool strided_load_broadcast_p (void);
 extern bool riscv_prefer_agnostic_p (void);
 extern bool riscv_use_divmod_expander (void);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 96519c96a2b..ca29db4ffee 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -834,6 +834,31 @@ static const struct riscv_tune_param andes_45_tune_info = {
   true,                                                /* prefer-agnostic.  */
 };
 
+/* Costs to use when optimizing for Synopsys RMX-100.  */
+static const struct riscv_tune_param arcv_rmx100_tune_info = {
+  {COSTS_N_INSNS (2), COSTS_N_INSNS (2)},      /* fp_add */
+  {COSTS_N_INSNS (2), COSTS_N_INSNS (2)},      /* fp_mul */
+  {COSTS_N_INSNS (17), COSTS_N_INSNS (17)},    /* fp_div */
+  {COSTS_N_INSNS (2), COSTS_N_INSNS (2)},      /* int_mul */
+  {COSTS_N_INSNS (17), COSTS_N_INSNS (17)},    /* int_div */
+  1,                                           /* issue_rate */
+  4,                                           /* branch_cost */
+  2,                                           /* memory_cost */
+  4,                                           /* fmv_cost */
+  false,                                       /* slow_unaligned_access */
+  false,                                       /* vector_unaligned_access */
+  false,                                       /* use_divmod_expansion */
+  false,                                       /* overlap_op_by_pieces */
+  true,                                                /* use_zero_stride_load 
*/
+  false,                                       /* speculative_sched_vsetvl */
+  RISCV_FUSE_NOTHING,                          /* fusible_ops */
+  NULL,                                                /* vector cost */
+  NULL,                                                /* function_align */
+  NULL,                                                /* jump_align */
+  NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic.  */
+};
+
 static bool riscv_avoid_shrink_wrapping_separate ();
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
@@ -10583,6 +10608,30 @@ riscv_store_data_bypass_p (rtx_insn *out_insn, 
rtx_insn *in_insn)
   return store_data_bypass_p (out_insn, in_insn);
 }
 
+/* Implement one boolean function for each of the values of the
+   arcv_mpy_option enum, for the needs of arcv-rmx100.md.  */
+
+bool
+arcv_mpy_1c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED,
+                      rtx_insn *in_insn ATTRIBUTE_UNUSED)
+{
+  return arcv_mpy_option == ARCV_MPY_OPTION_1C;
+}
+
+bool
+arcv_mpy_2c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED,
+                      rtx_insn *in_insn ATTRIBUTE_UNUSED)
+{
+  return arcv_mpy_option == ARCV_MPY_OPTION_2C;
+}
+
+bool
+arcv_mpy_10c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED,
+                       rtx_insn *in_insn ATTRIBUTE_UNUSED)
+{
+  return arcv_mpy_option == ARCV_MPY_OPTION_10C;
+}
+
 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
 
    When floating-point registers are wider than integer ones, moves between
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 6f8cd26e5c9..8cc5dbd8efa 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -674,7 +674,8 @@
 ;; Keep this in sync with enum riscv_microarchitecture.
 (define_attr "tune"
   "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,
-   tt_ascalon_d8,andes_25_series,andes_23_series,andes_45_series,spacemit_x60"
+   tt_ascalon_d8,andes_25_series,andes_23_series,andes_45_series,spacemit_x60,
+   arcv_rmx100"
   (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)")))
 
 ;; Describe a user's asm statement.
@@ -4993,3 +4994,4 @@
 (include "andes-25-series.md")
 (include "andes-45-series.md")
 (include "spacemit-x60.md")
+(include "arcv-rmx100.md")
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 452062c6500..5ed35b7fb49 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -412,3 +412,20 @@ Specifies whether the fence.tso instruction should be used.
 mautovec-segment
 Target Integer Var(riscv_mautovec_segment) Init(1)
 Enable (default) or disable generation of vector segment load/store 
instructions.
+
+Enum
+Name(arcv_mpy_option) Type(enum arcv_mpy_option_enum)
+Valid arguments to -mmpy_option=:
+
+EnumValue
+Enum(arcv_mpy_option) String(1c) Value(ARCV_MPY_OPTION_1C)
+
+EnumValue
+Enum(arcv_mpy_option) String(2c) Value(ARCV_MPY_OPTION_2C)
+
+EnumValue
+Enum(arcv_mpy_option) String(10c) Value(ARCV_MPY_OPTION_10C)
+
+mmpy-option=
+Target RejectNegative Joined Enum(arcv_mpy_option) Var(arcv_mpy_option) 
Init(ARCV_MPY_OPTION_2C)
+The type of MPY unit used by the RMX-100 core (to be used in combination with 
-mtune=arc-v-rmx-100-series) (default: 2c).
diff --git a/gcc/doc/riscv-mtune.texi b/gcc/doc/riscv-mtune.texi
index 3e61d11462a..c9c2fa62dd3 100644
--- a/gcc/doc/riscv-mtune.texi
+++ b/gcc/doc/riscv-mtune.texi
@@ -52,6 +52,8 @@ particular CPU name.  Permissible values for this option are:
 
 @samp{spacemit-x60},
 
+@samp{arc-v-rmx-100-series},
+
 @samp{generic-ooo},
 
 @samp{size},
-- 
2.34.0

Reply via email to