> Richard Earnshaw wrote:
> If all cores seem to benefit from FP reassociation set to 4, then it
> seems odd that 4 is not also the default for generic.
> 
> Andrew, you may need to pick a target-specific value for ThunderX; I
> think Wilco has just picked something that seems plausible because he
> needs to put a real value in there.
> 
> What happens if the integer and vector numbers are bumped up?  I'd have
> thought that integer numbers >1 would be appropriate on all dual-issue
> or greater cores.

I tried int and vector as well, and setting int to 2 did give an improvement,
but vector had no effect, so I'll leave to 1 for now. The patch is the same 
as last time, it just sets integer to 2, and uses the same settings for all 
CPUs.

OK for commit?

ChangeLog:
2014-11-24  Wilco Dijkstra  <wdijk...@arm.com>

        * gcc/config/aarch64/aarch64-protos.h (tune-params):
        Add reasociation tuning parameters.
        * gcc/config/aarch64/aarch64.c (TARGET_SCHED_REASSOCIATION_WIDTH):
        Define.  (aarch64_reassociation_width): New function.
        (generic_tunings) Add reassociation tuning parameters.
        (cortexa53_tunings): Likewise.
        (cortexa57_tunings): Likewise.
        (thunderx_tunings): Likewise.

---
 gcc/config/aarch64/aarch64-protos.h |  3 +++
 gcc/config/aarch64/aarch64.c        | 36 ++++++++++++++++++++++++++++++++----
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index a9985b5..ac3487b 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -170,6 +170,9 @@ struct tune_params
   const struct cpu_vector_cost *const vec_costs;
   const int memmov_cost;
   const int issue_rate;
+  const int int_reassoc_width;
+  const int fp_reassoc_width;
+  const int vec_reassoc_width;
 };
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3832123..e543161 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -314,7 +314,10 @@ static const struct tune_params generic_tunings =
   &generic_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  2,   /* int_reassoc_width.  */
+  4,   /* fp_reassoc_width.  */
+  1    /* vec_reassoc_width.  */
 };
 
 static const struct tune_params cortexa53_tunings =
@@ -324,7 +327,10 @@ static const struct tune_params cortexa53_tunings =
   &cortexa53_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  2,   /* int_reassoc_width.  */
+  4,   /* fp_reassoc_width.  */
+  1    /* vec_reassoc_width.  */
 };
 
 static const struct tune_params cortexa57_tunings =
@@ -334,7 +340,10 @@ static const struct tune_params cortexa57_tunings =
   &cortexa57_regmove_cost,
   &cortexa57_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 3)
+  NAMED_PARAM (issue_rate, 3),
+  2,   /* int_reassoc_width.  */
+  4,   /* fp_reassoc_width.  */
+  1    /* vec_reassoc_width.  */
 };
 
 static const struct tune_params thunderx_tunings =
@@ -344,7 +353,10 @@ static const struct tune_params thunderx_tunings =
   &thunderx_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 6),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  2,   /* int_reassoc_width.  */
+  4,   /* fp_reassoc_width.  */
+  1    /* vec_reassoc_width.  */
 };
 
 /* A processor implementing AArch64.  */
@@ -437,6 +449,19 @@ static const char * const aarch64_condition_codes[] =
   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 };
 
+static int
+aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
+                            enum machine_mode mode)
+{
+  if (VECTOR_MODE_P (mode))
+    return aarch64_tune_params->vec_reassoc_width;
+  if (INTEGRAL_MODE_P (mode))
+    return aarch64_tune_params->int_reassoc_width;
+  if (FLOAT_MODE_P (mode))
+    return aarch64_tune_params->fp_reassoc_width;
+  return 1;
+}
+
 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 unsigned
 aarch64_dbx_register_number (unsigned regno)
@@ -10499,6 +10524,9 @@ aarch64_gen_ccmp_next (rtx prev, int cmp_code, rtx op0, 
rtx op1, int
bit_code)
 #undef TARGET_PREFERRED_RELOAD_CLASS
 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
 
+#undef TARGET_SCHED_REASSOCIATION_WIDTH
+#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
+
 #undef TARGET_SECONDARY_RELOAD
 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
 
-- 
1.9.1



Reply via email to