Add mtune and mcpu options for ARCHS release 310a type CPU. The
mtune=release31a is designed to be used as an alternative to the
mcpu=hs4x_rel31 option.
ARCHS4x release 31a uses DSP instructions which are implemented a bit
different than mpy9. Hence, use safer mpy2 option.

gcc/
        * config/arc/arc-arch.h (arc_tune_attr): Add
        ARC_TUNE_ARCHS4X_REL31A variant.
        * config/arc/arc.cc (arc_override_options): Tune options for
        release 310a.
        (arc_sched_issue_rate): Use correct enum.
        (arc600_corereg_hazard): Textual change.
        (arc_hazard): Add release 310a tunning.
        * config/arc/arc.md (tune): Update and take into consideration new
        tune option.
        (tune_dspmpy): Likewise.
        (tune_store): New attribute.
        * config/arc/arc.opt (mtune): New tune option.
        * config/arc/arcHS4x.md (hs4x_brcc0, hs4x_brcc1): New cpu units.
        (hs4x_brcc_op): New instruction rezervation.
        (hs4x_data_store_1_op): Likewise.
        * config/arc/arc-cpus.def (hs4x_rel31): New cpu variant.
        * config/arc/arc-tables.opt: Regenerate.
        * config/arc/t-multilib: Likewise.
        * doc/invoke.texi (ARC): Update mcpu and tune sections.

Signed-off-by: Claudiu Zissulescu <claz...@gmail.com>
---
 gcc/config/arc/arc-arch.h     |   3 +-
 gcc/config/arc/arc-cpus.def   |   1 +
 gcc/config/arc/arc-tables.opt |   3 +
 gcc/config/arc/arc.cc         | 192 +++++++++++++++++++++-------------
 gcc/config/arc/arc.md         |  32 +++---
 gcc/config/arc/arc.opt        |   3 +
 gcc/config/arc/arcHS4x.md     |  17 ++-
 gcc/config/arc/t-multilib     |   4 +-
 gcc/doc/invoke.texi           |  16 +++
 9 files changed, 181 insertions(+), 90 deletions(-)

diff --git a/gcc/config/arc/arc-arch.h b/gcc/config/arc/arc-arch.h
index 4c728a87453..83b156ee34a 100644
--- a/gcc/config/arc/arc-arch.h
+++ b/gcc/config/arc/arc-arch.h
@@ -77,7 +77,8 @@ enum arc_tune_attr
     ARC_TUNE_CORE_3,
     ARC_TUNE_ARCHS4X,
     ARC_TUNE_ARCHS4XD,
-    ARC_TUNE_ARCHS4XD_SLOW
+    ARC_TUNE_ARCHS4XD_SLOW,
+    ARC_TUNE_ARCHS4X_REL31A
   };
 
 /* Extra options for a processor template to hold any CPU specific
diff --git a/gcc/config/arc/arc-cpus.def b/gcc/config/arc/arc-cpus.def
index baf61db02ed..5668b0fbf19 100644
--- a/gcc/config/arc/arc-cpus.def
+++ b/gcc/config/arc/arc-cpus.def
@@ -64,6 +64,7 @@ ARC_CPU (hs38,             hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, 
NONE, NONE)
 ARC_CPU (hs38_linux, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64|FL_FPU_FPUD_ALL, NONE, 
NONE)
 ARC_CPU (hs4x,  hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, ARCHS4X)
 ARC_CPU (hs4xd, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, ARCHS4XD)
+ARC_CPU (hs4x_rel31, hs, FL_MPYOPT_2|FL_DIVREM|FL_LL64, NONE, ARCHS4X_REL31A)
 
 ARC_CPU (arc600,         6xx, FL_BS, NONE, ARC600)
 ARC_CPU (arc600_norm,    6xx, FL_BS|FL_NORM, NONE, ARC600)
diff --git a/gcc/config/arc/arc-tables.opt b/gcc/config/arc/arc-tables.opt
index 8cc5135205d..0a0d354db60 100644
--- a/gcc/config/arc/arc-tables.opt
+++ b/gcc/config/arc/arc-tables.opt
@@ -69,6 +69,9 @@ Enum(processor_type) String(hs4x) Value(PROCESSOR_hs4x)
 EnumValue
 Enum(processor_type) String(hs4xd) Value(PROCESSOR_hs4xd)
 
+EnumValue
+Enum(processor_type) String(hs4x_rel31) Value(PROCESSOR_hs4x_rel31)
+
 EnumValue
 Enum(processor_type) String(arc600) Value(PROCESSOR_arc600)
 
diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc
index 77730c88e55..064790bf396 100644
--- a/gcc/config/arc/arc.cc
+++ b/gcc/config/arc/arc.cc
@@ -646,8 +646,8 @@ arc_sched_issue_rate (void)
 {
   switch (arc_tune)
     {
-    case TUNE_ARCHS4X:
-    case TUNE_ARCHS4XD:
+    case ARC_TUNE_ARCHS4X:
+    case ARC_TUNE_ARCHS4XD:
       return 3;
     default:
       break;
@@ -1458,6 +1458,12 @@ arc_override_options (void)
   if (!OPTION_SET_P (unaligned_access) && TARGET_HS)
     unaligned_access = 1;
 
+  if (TARGET_HS && (arc_tune == ARC_TUNE_ARCHS4X_REL31A))
+    {
+      TARGET_CODE_DENSITY_FRAME = 0;
+      flag_delayed_branch = 0;
+    }
+
   /* These need to be done at start up.  It's convenient to do them here.  */
   arc_init ();
 }
@@ -7817,6 +7823,115 @@ arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* 
consumer)
   return arc_store_addr_hazard_internal_p (producer, consumer);
 }
 
+/* Return length adjustment for INSN.
+   For ARC600:
+   A write to a core reg greater or equal to 32 must not be immediately
+   followed by a use.  Anticipate the length requirement to insert a nop
+   between PRED and SUCC to prevent a hazard.  */
+
+static int
+arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
+{
+  if (!TARGET_ARC600)
+    return 0;
+  if (GET_CODE (PATTERN (pred)) == SEQUENCE)
+    pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
+  if (GET_CODE (PATTERN (succ)) == SEQUENCE)
+    succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
+  if (recog_memoized (pred) == CODE_FOR_mulsi_600
+      || recog_memoized (pred) == CODE_FOR_umul_600
+      || recog_memoized (pred) == CODE_FOR_mac_600
+      || recog_memoized (pred) == CODE_FOR_mul64_600
+      || recog_memoized (pred) == CODE_FOR_mac64_600
+      || recog_memoized (pred) == CODE_FOR_umul64_600
+      || recog_memoized (pred) == CODE_FOR_umac64_600)
+    return 0;
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
+    {
+      const_rtx x = *iter;
+      switch (GET_CODE (x))
+       {
+       case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
+         break;
+       default:
+         /* This is also fine for PRE/POST_MODIFY, because they
+            contain a SET.  */
+         continue;
+       }
+      rtx dest = XEXP (x, 0);
+      /* Check if this sets a an extension register.  N.B. we use 61 for the
+        condition codes, which is definitely not an extension register.  */
+      if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
+         /* Check if the same register is used by the PAT.  */
+         && (refers_to_regno_p
+             (REGNO (dest),
+              REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
+              PATTERN (succ), 0)))
+       return 4;
+    }
+  return 0;
+}
+
+/* For ARC600:
+   A write to a core reg greater or equal to 32 must not be immediately
+   followed by a use.  Anticipate the length requirement to insert a nop
+   between PRED and SUCC to prevent a hazard.  */
+
+int
+arc_hazard (rtx_insn *pred, rtx_insn *succ)
+{
+  if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
+    return 0;
+
+  if (TARGET_ARC600)
+    return arc600_corereg_hazard (pred, succ);
+
+  return 0;
+}
+
+/* When compiling for release 310a, insert a nop before any
+   conditional jump.  */
+
+static int
+arc_check_release31a (rtx_insn *pred, rtx_insn *succ)
+{
+  if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
+    return 0;
+
+  if (!JUMP_P (pred) && !single_set (pred))
+    return 0;
+
+  if (!JUMP_P (succ) && !single_set (succ))
+    return 0;
+
+  if (TARGET_HS && (arc_tune == ARC_TUNE_ARCHS4X_REL31A))
+    switch (get_attr_type (pred))
+      {
+      case TYPE_STORE:
+       switch (get_attr_type (succ))
+         {
+         case TYPE_BRCC:
+         case TYPE_BRCC_NO_DELAY_SLOT:
+         case TYPE_LOOP_END:
+           return 1;
+         default:
+           break;
+         }
+       break;
+      case TYPE_BRCC:
+      case TYPE_BRCC_NO_DELAY_SLOT:
+      case TYPE_LOOP_END:
+       if (get_attr_type (succ) == TYPE_STORE)
+         return 1;
+       break;
+      default:
+       break;
+      }
+
+  return 0;
+}
+
 /* The same functionality as arc_hazard.  It is called in machine
    reorg before any other optimization.  Hence, the NOP size is taken
    into account when doing branch shortening.  */
@@ -7830,10 +7945,8 @@ workaround_arc_anomaly (void)
   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
     {
       succ0 = next_real_insn (insn);
-      if (arc_hazard (insn, succ0))
-       {
-         emit_insn_before (gen_nopv (), succ0);
-       }
+      if (arc_hazard (insn, succ0) || arc_check_release31a (insn, succ0))
+       emit_insn_before (gen_nopv (), succ0);
     }
 
   if (!TARGET_ARC700)
@@ -9324,56 +9437,6 @@ disi_highpart (rtx in)
   return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4);
 }
 
-/* Return length adjustment for INSN.
-   For ARC600:
-   A write to a core reg greater or equal to 32 must not be immediately
-   followed by a use.  Anticipate the length requirement to insert a nop
-   between PRED and SUCC to prevent a hazard.  */
-
-static int
-arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
-{
-  if (!TARGET_ARC600)
-    return 0;
-  if (GET_CODE (PATTERN (pred)) == SEQUENCE)
-    pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
-  if (GET_CODE (PATTERN (succ)) == SEQUENCE)
-    succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
-  if (recog_memoized (pred) == CODE_FOR_mulsi_600
-      || recog_memoized (pred) == CODE_FOR_umul_600
-      || recog_memoized (pred) == CODE_FOR_mac_600
-      || recog_memoized (pred) == CODE_FOR_mul64_600
-      || recog_memoized (pred) == CODE_FOR_mac64_600
-      || recog_memoized (pred) == CODE_FOR_umul64_600
-      || recog_memoized (pred) == CODE_FOR_umac64_600)
-    return 0;
-  subrtx_iterator::array_type array;
-  FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
-    {
-      const_rtx x = *iter;
-      switch (GET_CODE (x))
-       {
-       case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
-         break;
-       default:
-         /* This is also fine for PRE/POST_MODIFY, because they
-            contain a SET.  */
-         continue;
-       }
-      rtx dest = XEXP (x, 0);
-      /* Check if this sets an extension register.  N.B. we use 61 for the
-        condition codes, which is definitely not an extension register.  */
-      if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
-         /* Check if the same register is used by the PAT.  */
-         && (refers_to_regno_p
-             (REGNO (dest),
-              REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
-              PATTERN (succ), 0)))
-       return 4;
-    }
-  return 0;
-}
-
 /* Given a rtx, check if it is an assembly instruction or not.  */
 
 static int
@@ -9408,23 +9471,6 @@ arc_asm_insn_p (rtx x)
   return 0;
 }
 
-/* For ARC600:
-   A write to a core reg greater or equal to 32 must not be immediately
-   followed by a use.  Anticipate the length requirement to insert a nop
-   between PRED and SUCC to prevent a hazard.  */
-
-int
-arc_hazard (rtx_insn *pred, rtx_insn *succ)
-{
-  if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
-    return 0;
-
-  if (TARGET_ARC600)
-    return arc600_corereg_hazard (pred, succ);
-
-  return 0;
-}
-
 /* Return length adjustment for INSN.  */
 
 int
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 39b358052c1..7170445309f 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -645,22 +645,21 @@ (define_delay (eq_attr "type" "sfunc")
 ;;   is made that makes conditional execution required.
 
 (define_attr "tune" "none,arc600,arc7xx,arc700_4_2_std,arc700_4_2_xmac, \
-core_3, archs4x, archs4xd, archs4xd_slow"
+archs4x, archs4xd"
   (const
-   (cond [(symbol_ref "arc_tune == TUNE_ARC600")
+   (cond [(symbol_ref "arc_tune == ARC_TUNE_ARC600")
          (const_string "arc600")
          (symbol_ref "arc_tune == ARC_TUNE_ARC7XX")
          (const_string "arc7xx")
-         (symbol_ref "arc_tune == TUNE_ARC700_4_2_STD")
+         (symbol_ref "arc_tune == ARC_TUNE_ARC700_4_2_STD")
          (const_string "arc700_4_2_std")
-         (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC")
+         (symbol_ref "arc_tune == ARC_TUNE_ARC700_4_2_XMAC")
          (const_string "arc700_4_2_xmac")
-         (symbol_ref "arc_tune == ARC_TUNE_CORE_3")
-         (const_string "core_3")
-         (symbol_ref "arc_tune == TUNE_ARCHS4X")
+         (ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X")
+              (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X_REL31A"))
          (const_string "archs4x")
-         (ior (symbol_ref "arc_tune == TUNE_ARCHS4XD")
-              (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW"))
+         (ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD")
+              (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD_SLOW"))
          (const_string "archs4xd")]
         (const_string "none"))))
 
@@ -671,13 +670,22 @@ (define_attr "tune_arc700" "false,true"
 
 (define_attr "tune_dspmpy" "none, slow, fast"
   (const
-  (cond [(ior (symbol_ref "arc_tune == TUNE_ARCHS4X")
-             (symbol_ref "arc_tune == TUNE_ARCHS4XD"))
+  (cond [(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X")
+             (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD"))
         (const_string "fast")
-        (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW")
+        (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD_SLOW")
         (const_string "slow")]
        (const_string "none"))))
 
+(define_attr "tune_store" "none, normal, rel31a"
+  (const
+  (cond [(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X")
+             (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD"))
+        (const_string "normal")
+        (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X_REL31A")
+        (const_string "rel31a")]
+       (const_string "none"))))
+
 ;; Move instructions.
 (define_expand "movqi"
   [(set (match_operand:QI 0 "move_dest_operand" "")
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index eb85f49b283..0add5a2a21f 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -276,6 +276,9 @@ Enum(arc_tune_attr) String(arc750d) 
Value(ARC_TUNE_ARC700_4_2_XMAC)
 EnumValue
 Enum(arc_tune_attr) String(core3) Value(ARC_TUNE_CORE_3)
 
+EnumValue
+Enum(arc_tune_attr) String(release31a) Value(ARC_TUNE_ARCHS4X_REL31A)
+
 mindexed-loads
 Target Var(TARGET_INDEXED_LOADS) Init(TARGET_INDEXED_LOADS_DEFAULT)
 Enable the use of indexed loads.
diff --git a/gcc/config/arc/arcHS4x.md b/gcc/config/arc/arcHS4x.md
index 5136eba2b98..1009833d400 100644
--- a/gcc/config/arc/arcHS4x.md
+++ b/gcc/config/arc/arcHS4x.md
@@ -27,14 +27,21 @@ (define_cpu_unit "hs4x_divrem" "ARCHS4x")
 (define_cpu_unit "hs4x_mult" "ARCHS4x")
 (define_cpu_unit "hs4x_x1, hs4x_x2" "ARCHS4x")
 (define_cpu_unit "hs4x_y1, hs4x_y2" "ARCHS4x")
+(define_cpu_unit "hs4x_brcc0, hs4x_brcc1" "ARCHS4x")
 
 (define_insn_reservation "hs4x_brj_op" 1
   (and (match_test "TARGET_HS")
        (eq_attr "tune" "archs4x, archs4xd")
        (eq_attr "type" "call, call_no_delay_slot, uncond_branch, jump, \
-branch, brcc,brcc_no_delay_slot, sfunc"))
+branch, sfunc"))
   "hs4x_issue0")
 
+(define_insn_reservation "hs4x_brcc_op" 1
+  (and (match_test "TARGET_HS")
+       (eq_attr "tune" "archs4x, archs4xd")
+       (eq_attr "type" "brcc,brcc_no_delay_slot,loop_end"))
+  "hs4x_issue0 + hs4x_brcc0 + hs4x_brcc1")
+
 (define_insn_reservation "hs4x_data_load_op" 4
   (and (match_test "TARGET_HS")
        (eq_attr "tune" "archs4x, archs4xd")
@@ -43,10 +50,16 @@ (define_insn_reservation "hs4x_data_load_op" 4
 
 (define_insn_reservation "hs4x_data_store_op" 1
   (and (match_test "TARGET_HS")
-       (eq_attr "tune" "archs4x, archs4xd")
+       (eq_attr "tune_store" "normal")
        (eq_attr "type" "store"))
   "hs4x_issue1 + hs4x_ld_st")
 
+(define_insn_reservation "hs4x_data_store_1_op" 2
+  (and (match_test "TARGET_HS")
+       (eq_attr "tune_store" "rel31a")
+       (eq_attr "type" "store"))
+  "hs4x_issue1 + hs4x_ld_st + hs4x_brcc0, hs4x_brcc1")
+
 ;; Advanced ALU
 (define_insn_reservation "hs4x_adv_alue_op" 4
   (and (match_test "TARGET_HS")
diff --git a/gcc/config/arc/t-multilib b/gcc/config/arc/t-multilib
index 8d97ad1deea..921945eba44 100644
--- a/gcc/config/arc/t-multilib
+++ b/gcc/config/arc/t-multilib
@@ -21,9 +21,9 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-MULTILIB_OPTIONS = 
mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=hs4x/mcpu=hs4xd/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400
+MULTILIB_OPTIONS = 
mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=hs4x/mcpu=hs4xd/mcpu=hs4x_rel31/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400
 
-MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda 
quarkse_em hs archs hs34 hs38 hs38_linux hs4x hs4xd arc600 arc600_norm 
arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 arc601_mul32x16 
arc700 nps400
+MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda 
quarkse_em hs archs hs34 hs38 hs38_linux hs4x hs4xd hs4x_rel31 arc600 
arc600_norm arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 
arc601_mul32x16 arc700 nps400
 
 # Aliases:
 MULTILIB_MATCHES  = mcpu?arc600=mcpu?ARC600
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 84d6f0f9860..94fe57aa4e2 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -20053,6 +20053,15 @@ Compile for ARC HS38 CPU.
 @item hs38_linux
 Compile for ARC HS38 CPU with all hardware extensions on.
 
+@item hs4x
+Compile for ARC HS4x CPU.
+
+@item hs4xd
+Compile for ARC HS4xD CPU.
+
+@item hs4x_rel31
+Compile for ARC HS4x CPU release 3.10a.
+
 @item arc600_norm
 Compile for ARC 600 CPU with @code{norm} instructions enabled.
 
@@ -20662,6 +20671,13 @@ Tune for ARC725D CPU.
 @item ARC750D
 Tune for ARC750D CPU.
 
+@item core3
+Tune for ARCv2 core3 type CPU.  This option enable usage of
+@code{dbnz} instruction.
+
+@item release31a
+Tune for ARC4x release 3.10a.
+
 @end table
 
 @item -mmultcost=@var{num}
-- 
2.30.2

Reply via email to