ping

Various CPUs have max_cond_insns set to 5 due to historical reasons.
Benchmarking shows that max_cond_insns=2 is fastest on modern Cortex-A
cores, so change it to 2 for all Cortex-A cores.  Set max_cond_insns
to 4 on Thumb-2 architectures given it's already limited to that by
MAX_INSN_PER_IT_BLOCK.  Also use the CPU tuning setting when a CPU/tune
is selected if -mrestrict-it is not explicitly set.

On Cortex-A57 this gives 1.1% performance gain on SPECINT2006 as well
as a 0.4% codesize reduction.

Bootstrapped on armhf. OK for commit?

ChangeLog:

2019-08-19  Wilco Dijkstra  <wdijk...@arm.com>

        * gcc/config/arm/arm.c (arm_option_override_internal):
        Use max_cond_insns from CPU tuning unless -mrestrict-it is used.
        (arm_v6t2_tune): set max_cond_insns to 4.
        (arm_cortex_tune): set max_cond_insns to 2.
        (arm_cortex_a8_tune): Likewise.
        (arm_cortex_a7_tune): Likewise.
        (arm_cortex_a35_tune): Likewise.
        (arm_cortex_a53_tune): Likewise.
        (arm_cortex_a5_tune): Likewise.
        (arm_cortex_a9_tune): Likewise.
        (arm_v6m_tune): set max_cond_insns to 4.
---

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 
628cf02f23fb29392a63d87f561c3ee2fb73a515..38ac16ad1def91ca78ccfa98fd1679b2b5114851
 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1943,7 +1943,7 @@ const struct tune_params arm_v6t2_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  4,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   1,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1968,7 +1968,7 @@ const struct tune_params arm_cortex_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  2,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   2,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1991,7 +1991,7 @@ const struct tune_params arm_cortex_a8_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  2,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   2,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2014,7 +2014,7 @@ const struct tune_params arm_cortex_a7_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  2,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   2,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2060,7 +2060,7 @@ const struct tune_params arm_cortex_a35_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  2,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   1,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2083,7 +2083,7 @@ const struct tune_params arm_cortex_a53_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  2,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   2,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2167,9 +2167,6 @@ const struct tune_params arm_xgene1_tune =
   tune_params::SCHED_AUTOPREF_OFF
 };
 
-/* Branches can be dual-issued on Cortex-A5, so conditional execution is
-   less appealing.  Set max_insns_skipped to a low value.  */
-
 const struct tune_params arm_cortex_a5_tune =
 {
   &cortexa5_extra_costs,
@@ -2178,7 +2175,7 @@ const struct tune_params arm_cortex_a5_tune =
   arm_cortex_a5_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  1,                                           /* Max cond insns.  */
+  2,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   2,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2201,7 +2198,7 @@ const struct tune_params arm_cortex_a9_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  2,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   2,                                           /* Issue rate.  */
   ARM_PREFETCH_BENEFICIAL(4,32,32),
@@ -2328,7 +2325,7 @@ const struct tune_params arm_v6m_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  4,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   1,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -3050,6 +3047,11 @@ arm_option_override_internal (struct gcc_options *opts,
   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
     opts->x_arm_restrict_it = 0;
 
+  /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
+  if (!opts_set->x_arm_restrict_it
+      && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
+    opts->x_arm_restrict_it = 0;
+
   /* Enable -munaligned-access by default for
      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
      i.e. Thumb2 and ARM state only.

Reply via email to