Hi Kyrill,

> Hmm, I'm not too confident on that. I'd support such a change for the 
> generic arm_cortex_tune, definitely, and the Armv8-a based ones, but I 
> don't think the argument is as strong for Cortex-A7, Cortex-A8, Cortex-A9.
>
> So let's make the change for the Armv8-A-based cores now. If you get 
> benchmarking data for the older ones (such systems may or may not be 
> easy to get a hold of) we can update those separately.

I ran some experiments on Cortex-A53 and this shows the difference between
2, 3 and 4 is less than for out-of-order cores (which clearly prefer 2).
So it seems alright to set it to 4 for the older in-order cores - see updated 
patch
below.

>>   Set max_cond_insns
>> to 4 on Thumb-2 architectures given it's already limited to that by
>> MAX_INSN_PER_IT_BLOCK.  Also use the CPU tuning setting when a CPU/tune
>> is selected if -mrestrict-it is not explicitly set.
>
> This can go in as a separate patch from the rest, thanks.

Sure, I'll split that part off into a separate patch.

Cheers,
Wilco

[PATCH v2][ARM] Improve max_cond_insns setting for Cortex cores

Various CPUs have max_cond_insns set to 5 due to historical reasons.
Benchmarking shows that max_cond_insns=2 is fastest on modern Cortex-A
cores, so change it to 2. Set it to 4 on older in-order cores as that is
the MAX_INSN_PER_IT_BLOCK limit for Thumb-2.

Bootstrapped on armhf. OK for commit?

ChangeLog:

2019-12-03  Wilco Dijkstra  <wdijk...@arm.com>

        * config/arm/arm.c (arm_v6t2_tune): Set max_cond_insns to 4.
        (arm_cortex_tune): Set max_cond_insns to 2.
        (arm_cortex_a8_tune): Set max_cond_insns to 4.
        (arm_cortex_a7_tune): Likewise.
        (arm_cortex_a35_tune): Set max_cond_insns to 2.
        (arm_cortex_a53_tune): Likewise.
        (arm_cortex_a5_tune): Set max_cond_insns to 4.
        (arm_cortex_a9_tune): Likewise.
        (arm_v6m_tune): Likewise.
--

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 
a6b401b7f2e3738ff68316bd83d6e5a2bcf0e7d7..daebe76352d62ad94556762b4e3bc3d0532ad411
 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1947,7 +1947,7 @@ const struct tune_params arm_v6t2_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  4,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   1,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1971,7 +1971,7 @@ const struct tune_params arm_cortex_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  2,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   2,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1993,7 +1993,7 @@ const struct tune_params arm_cortex_a8_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  4,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   2,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2015,7 +2015,7 @@ const struct tune_params arm_cortex_a7_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  4,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   2,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2059,7 +2059,7 @@ const struct tune_params arm_cortex_a35_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  2,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   1,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2081,7 +2081,7 @@ const struct tune_params arm_cortex_a53_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  2,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   2,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2161,9 +2161,6 @@ const struct tune_params arm_xgene1_tune =
   tune_params::SCHED_AUTOPREF_OFF
 };
 
-/* Branches can be dual-issued on Cortex-A5, so conditional execution is
-   less appealing.  Set max_insns_skipped to a low value.  */
-
 const struct tune_params arm_cortex_a5_tune =
 {
   &cortexa5_extra_costs,
@@ -2172,7 +2169,7 @@ const struct tune_params arm_cortex_a5_tune =
   arm_cortex_a5_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  1,                                           /* Max cond insns.  */
+  4,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   2,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2194,7 +2191,7 @@ const struct tune_params arm_cortex_a9_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  4,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   2,                                           /* Issue rate.  */
   ARM_PREFETCH_BENEFICIAL(4,32,32),
@@ -2316,7 +2313,7 @@ const struct tune_params arm_v6m_tune =
   arm_default_branch_cost,
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
   1,                                           /* Constant limit.  */
-  5,                                           /* Max cond insns.  */
+  4,                                           /* Max cond insns.  */
   8,                                           /* Memset max inline.  */
   1,                                           /* Issue rate.  */
   ARM_PREFETCH_NOT_BENEFICIAL,

Reply via email to