>From 6376f11ae2ff95d992d93efee9a6b6506835477e Mon Sep 17 00:00:00 2001
From: Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>
Date: Thu, 15 Dec 2016 12:42:54 +0000
Subject: [PATCH 3/6] Port prefetch configuration from aarch32 to aarch64
 backend.

	* config/aarch64/aarch64-protos.h (struct tune_params):
	Replace cache_line_size with struct { } prefetch -- same as in
	arm-protos.h.
	* config/aarch64/aarch64.c (AARCH64_PREFETCH_NOT_BENEFICIAL,)
	(AARCH64_PREFETCH_BENEFICIAL): New macros.
	(tune_params *_tunings): Update all tunings (no functional change).
	(aarch64_override_options_internal): Set PARAM_SIMULTANEOUS_PREFETCHES,
	PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE, and PARAM_L2_CACHE_SIZE
	from tunings structures.

Change-Id: I1ddbac1863dcf078a2e5b14dd904debc76a7da94
---
 gcc/config/aarch64/aarch64-protos.h | 11 +++++---
 gcc/config/aarch64/aarch64.c        | 54 +++++++++++++++++++++++++++----------
 2 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index ac91865..edca3e2 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -224,9 +224,14 @@ struct tune_params
   int min_div_recip_mul_df;
   /* Value for aarch64_case_values_threshold; or 0 for the default.  */
   unsigned int max_case_values;
-  /* Value for PARAM_L1_CACHE_LINE_SIZE; or 0 to use the default.  */
-  unsigned int cache_line_size;
-
+  /* Explicit prefetch data.  */
+  struct
+    {
+      int num_slots;
+      int l1_cache_size;
+      int l1_cache_line_size;
+      int l2_cache_size;
+    } prefetch;
 /* An enum specifying how to take into account CPU autoprefetch capabilities
    during instruction scheduling:
    - AUTOPREFETCHER_OFF: Do not take autoprefetch capabilities into account.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5e546bf..a87b216 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -526,6 +526,15 @@ static const cpu_approx_modes xgene1_approx_modes =
   AARCH64_APPROX_ALL	/* recip_sqrt  */
 };
 
+#define AARCH64_PREFETCH_NOT_BENEFICIAL { 0, -1, -1, -1 }
+#define AARCH64_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size,l2_size) \
+  {									\
+    num_slots,								\
+    l1_size,								\
+    l1_line_size,							\
+    l2_size								\
+  }
+
 static const struct tune_params generic_tunings =
 {
   &cortexa57_extra_costs,
@@ -546,7 +555,7 @@ static const struct tune_params generic_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
+  AARCH64_PREFETCH_NOT_BENEFICIAL,
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
 };
@@ -572,7 +581,7 @@ static const struct tune_params cortexa35_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
+  AARCH64_PREFETCH_NOT_BENEFICIAL,
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
 };
@@ -598,7 +607,7 @@ static const struct tune_params cortexa53_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
+  AARCH64_PREFETCH_NOT_BENEFICIAL,
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
 };
@@ -624,7 +633,7 @@ static const struct tune_params cortexa57_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
+  AARCH64_PREFETCH_NOT_BENEFICIAL,
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)	/* tune_flags.  */
 };
@@ -650,7 +659,7 @@ static const struct tune_params cortexa72_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
+  AARCH64_PREFETCH_NOT_BENEFICIAL,
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
 };
@@ -676,7 +685,7 @@ static const struct tune_params cortexa73_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
+  AARCH64_PREFETCH_NOT_BENEFICIAL,
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
 };
@@ -701,7 +710,7 @@ static const struct tune_params exynosm1_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   48,	/* max_case_values.  */
-  64,	/* cache_line_size.  */
+  AARCH64_PREFETCH_BENEFICIAL (0, -1, 64, -1),
   tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE) /* tune_flags.  */
 };
@@ -726,7 +735,7 @@ static const struct tune_params thunderx_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
+  AARCH64_PREFETCH_NOT_BENEFICIAL,
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)	/* tune_flags.  */
 };
@@ -751,7 +760,7 @@ static const struct tune_params xgene1_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
+  AARCH64_PREFETCH_NOT_BENEFICIAL,
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
 };
@@ -777,7 +786,7 @@ static const struct tune_params qdf24xx_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  64,	/* cache_line_size.  */
+  AARCH64_PREFETCH_BENEFICIAL (0, -1, 64, -1),
   tune_params::AUTOPREFETCHER_STRONG,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)		/* tune_flags.  */
 };
@@ -802,7 +811,7 @@ static const struct tune_params thunderx2t99_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  64,	/* cache_line_size.  */
+  AARCH64_PREFETCH_BENEFICIAL (0, -1, 64, -1),
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
 };
@@ -8749,10 +8758,27 @@ aarch64_override_options_internal (struct gcc_options *opts)
 			 opts->x_param_values,
 			 global_options_set.x_param_values);
 
-  /* Set the L1 cache line size.  */
-  if (selected_cpu->tune->cache_line_size != 0)
+  /* Set up parameters to be used in prefetching algorithm.  Do not
+     override the defaults unless we are tuning for a core we have
+     researched values for.  */
+  if (aarch64_tune_params.prefetch.num_slots > 0)
+    maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+			   aarch64_tune_params.prefetch.num_slots,
+			   opts->x_param_values,
+			   global_options_set.x_param_values);
+  if (aarch64_tune_params.prefetch.l1_cache_size >= 0)
+    maybe_set_param_value (PARAM_L1_CACHE_SIZE,
+			   aarch64_tune_params.prefetch.l1_cache_size,
+			   opts->x_param_values,
+			   global_options_set.x_param_values);
+  if (aarch64_tune_params.prefetch.l1_cache_line_size >= 0)
     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
-			   selected_cpu->tune->cache_line_size,
+			   aarch64_tune_params.prefetch.l1_cache_line_size,
+			   opts->x_param_values,
+			   global_options_set.x_param_values);
+  if (aarch64_tune_params.prefetch.l2_cache_size >= 0)
+    maybe_set_param_value (PARAM_L2_CACHE_SIZE,
+			   aarch64_tune_params.prefetch.l2_cache_size,
 			   opts->x_param_values,
 			   global_options_set.x_param_values);
 
-- 
2.7.4

