> Richard Earnshaw wrote: > If all cores seem to benefit from FP reassociation set to 4, then it > seems odd that 4 is not also the default for generic. > > Andrew, you may need to pick a target-specific value for ThunderX; I > think Wilco has just picked something that seems plausible because he > needs to put a real value in there. > > What happens if the integer and vector numbers are bumped up? I'd have > thought that integer numbers >1 would be appropriate on all dual-issue > or greater cores.
I tried int and vector as well, and setting int to 2 did give an improvement, but vector had no effect, so I'll leave to 1 for now. The patch is the same as last time, it just sets integer to 2, and uses the same settings for all CPUs. OK for commit? ChangeLog: 2014-11-24 Wilco Dijkstra <wdijk...@arm.com> * gcc/config/aarch64/aarch64-protos.h (tune-params): Add reasociation tuning parameters. * gcc/config/aarch64/aarch64.c (TARGET_SCHED_REASSOCIATION_WIDTH): Define. (aarch64_reassociation_width): New function. (generic_tunings) Add reassociation tuning parameters. (cortexa53_tunings): Likewise. (cortexa57_tunings): Likewise. (thunderx_tunings): Likewise. --- gcc/config/aarch64/aarch64-protos.h | 3 +++ gcc/config/aarch64/aarch64.c | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index a9985b5..ac3487b 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -170,6 +170,9 @@ struct tune_params const struct cpu_vector_cost *const vec_costs; const int memmov_cost; const int issue_rate; + const int int_reassoc_width; + const int fp_reassoc_width; + const int vec_reassoc_width; }; HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 3832123..e543161 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -314,7 +314,10 @@ static const struct tune_params generic_tunings = &generic_regmove_cost, &generic_vector_cost, NAMED_PARAM (memmov_cost, 4), - NAMED_PARAM (issue_rate, 2) + NAMED_PARAM (issue_rate, 2), + 2, /* int_reassoc_width. */ + 4, /* fp_reassoc_width. */ + 1 /* vec_reassoc_width. */ }; static const struct tune_params cortexa53_tunings = @@ -324,7 +327,10 @@ static const struct tune_params cortexa53_tunings = &cortexa53_regmove_cost, &generic_vector_cost, NAMED_PARAM (memmov_cost, 4), - NAMED_PARAM (issue_rate, 2) + NAMED_PARAM (issue_rate, 2), + 2, /* int_reassoc_width. */ + 4, /* fp_reassoc_width. */ + 1 /* vec_reassoc_width. */ }; static const struct tune_params cortexa57_tunings = @@ -334,7 +340,10 @@ static const struct tune_params cortexa57_tunings = &cortexa57_regmove_cost, &cortexa57_vector_cost, NAMED_PARAM (memmov_cost, 4), - NAMED_PARAM (issue_rate, 3) + NAMED_PARAM (issue_rate, 3), + 2, /* int_reassoc_width. */ + 4, /* fp_reassoc_width. */ + 1 /* vec_reassoc_width. */ }; static const struct tune_params thunderx_tunings = @@ -344,7 +353,10 @@ static const struct tune_params thunderx_tunings = &thunderx_regmove_cost, &generic_vector_cost, NAMED_PARAM (memmov_cost, 6), - NAMED_PARAM (issue_rate, 2) + NAMED_PARAM (issue_rate, 2), + 2, /* int_reassoc_width. */ + 4, /* fp_reassoc_width. */ + 1 /* vec_reassoc_width. */ }; /* A processor implementing AArch64. */ @@ -437,6 +449,19 @@ static const char * const aarch64_condition_codes[] = "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" }; +static int +aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + if (VECTOR_MODE_P (mode)) + return aarch64_tune_params->vec_reassoc_width; + if (INTEGRAL_MODE_P (mode)) + return aarch64_tune_params->int_reassoc_width; + if (FLOAT_MODE_P (mode)) + return aarch64_tune_params->fp_reassoc_width; + return 1; +} + /* Provide a mapping from gcc register numbers to dwarf register numbers. */ unsigned aarch64_dbx_register_number (unsigned regno) @@ -10499,6 +10524,9 @@ aarch64_gen_ccmp_next (rtx prev, int cmp_code, rtx op0, rtx op1, int bit_code) #undef TARGET_PREFERRED_RELOAD_CLASS #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class +#undef TARGET_SCHED_REASSOCIATION_WIDTH +#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width + #undef TARGET_SECONDARY_RELOAD #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload -- 1.9.1