From: Dhruv Chawla <[email protected]>

This patch aims to implement summary support in auto-profile, similar to
LLVM. The summary information stores various information about the
profile being read such as the number of functions, the maximum sample
count, the total number of samples and so on.

It also adds a section called the "detailed summary" which contains a
histogram-based calculation of the minimum execution count for a sample
needed to belong to a specific percentile of samples. This is used to
decide the hot count threshold (which can be controlled with a command
line parameter). The default is any sample belonging to the 99th percentile
being marked as hot.

The corresponding AutoFDO pull request is
https://github.com/google/autofdo/pull/251.

Signed-off-by: Dhruv Chawla <[email protected]>

gcc/ChangeLog:

        * auto-profile.cc (INCLUDE_ALGORITHM): Add <algorithm> include.
        (AUTO_PROFILE_VERSION): Update to 3 from 2.
        (maybe_hot_afdo_count_p): Fix typo
        afdo_hot_bb_threshod -> afdo_hot_bb_threshold.
        (struct summary_info): New struct.
        (summary_info::read): New function.
        (summary_info::get_threshold_count): Likewise.
        (function_instance::read_function_instance): Read
        afdo_profile_info->sum_max directly from summary info.
        (autofdo_source_profile::read): Set afdo_hot_bb_threshold from
        param_afdo_profile_summary_cutoff_hot.
        (read_profile): Call summary_info->read.
        (end_auto_profile): Free afdo_summary_info.
        (min_afdo_hot_count): New function.
        (max_afdo_cold_count): Likewise.
        * auto-profile.h (min_afdo_hot_count): Declare.
        (max_afdo_cold_count): Likewise.
        * doc/invoke.texi: Document the new parameters.
        * gcov-io.h (GCOV_TAG_AFDO_SUMMARY): New define.
        * params.opt: New parameters.

gcc/c/ChangeLog:

        * Make-lang.in: Update GCOV version to 3.

gcc/cp/ChangeLog:

        * Make-lang.in: Update GCOV version to 3.

gcc/lto/ChangeLog:

        * Make-lang.in: Update GCOV version to 3.

gcc/testsuite/ChangeLog:

        * lib/profopt.exp: Update GCOV version to 3.
---
 gcc/auto-profile.cc           | 157 +++++++++++++++++++++++++++++-----
 gcc/auto-profile.h            |   6 ++
 gcc/c/Make-lang.in            |   4 +-
 gcc/cp/Make-lang.in           |   4 +-
 gcc/doc/invoke.texi           |  10 +++
 gcc/gcov-io.h                 |   2 +
 gcc/lto/Make-lang.in          |   4 +-
 gcc/params.opt                |   8 ++
 gcc/testsuite/lib/profopt.exp |   2 +-
 9 files changed, 169 insertions(+), 28 deletions(-)

diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
index 6971204ddf5..2271cab4864 100644
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -19,6 +19,7 @@ along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
 #include "config.h"
+#define INCLUDE_ALGORITHM
 #define INCLUDE_MAP
 #define INCLUDE_SET
 #include "system.h"
@@ -122,18 +123,18 @@ along with GCC; see the file COPYING3.  If not see
 */
 
 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
-#define AUTO_PROFILE_VERSION 2
+#define AUTO_PROFILE_VERSION 3
 
 /* profile counts determined by AFDO smaller than afdo_hot_bb_threshold are
    considered cols.  */
-gcov_type afdo_hot_bb_threshod = -1;
+gcov_type afdo_hot_bb_threshold = -1;
 
 /* Return true if COUNT is possibly hot.  */
 bool
 maybe_hot_afdo_count_p (profile_count count)
 {
   gcc_checking_assert (count.ipa ().initialized_p ());
-  return count.ipa ().to_gcov_type () >= afdo_hot_bb_threshod;
+  return count.ipa ().to_gcov_type () >= afdo_hot_bb_threshold;
 }
 
 /* Return true if location of STMT may be expressed by debug info.  */
@@ -228,6 +229,31 @@ struct string_compare
   }
 };
 
+/* Store the summary information for the profile.  */
+struct summary_info
+{
+  struct detailed_summary
+  {
+    uint32_t cutoff;
+    uint64_t min_count;
+    uint64_t num_counts;
+  };
+
+  uint64_t total_count;
+  uint64_t max_count;
+  uint64_t max_function_count;
+  uint64_t num_counts;
+  uint64_t num_functions;
+  auto_vec<detailed_summary> detailed_summaries;
+  std::map<uint32_t, unsigned> percentile_idx_map;
+
+  /* Read profile, return TRUE on success.  */
+  bool read ();
+
+  /* Get the minimum count required for percentile CUTOFF.  */
+  uint64_t get_threshold_count (uint32_t cutoff);
+};
+
 /* Store a string array, indexed by string position in the array.  */
 class string_table
 {
@@ -592,6 +618,9 @@ private:
   auto_vec <function_instance *> duplicate_functions_;
 };
 
+/* Store the summary information from the GCOV file.  */
+static summary_info *afdo_summary_info;
+
 /* Store the strings read from the profile data file.  */
 static string_table *afdo_string_table;
 
@@ -826,6 +855,66 @@ get_relative_location_for_stmt (tree fn, gimple *stmt)
           gimple_location (stmt));
 }
 
+/* Member functions for summary_info.  */
+
+bool
+summary_info::read ()
+{
+  if (gcov_read_unsigned () != GCOV_TAG_AFDO_SUMMARY)
+    return false;
+
+  total_count = gcov_read_counter ();
+  max_count = gcov_read_counter ();
+  max_function_count = gcov_read_counter ();
+  num_counts = gcov_read_counter ();
+  num_functions = gcov_read_counter ();
+  uint64_t num_detailed_summaries = gcov_read_counter ();
+  detailed_summaries.reserve (num_detailed_summaries);
+  for (uint64_t i = 0; i < num_detailed_summaries; i++)
+    {
+      detailed_summary detailed;
+      detailed.cutoff = gcov_read_unsigned ();
+      detailed.min_count = gcov_read_counter ();
+      detailed.num_counts = gcov_read_counter ();
+      detailed_summaries.quick_push (detailed);
+      percentile_idx_map[detailed.cutoff] = i;
+    }
+
+  return !gcov_is_error ();
+}
+
+/* Get the minimum count required for percentile CUTOFF.  */
+
+uint64_t
+summary_info::get_threshold_count (uint32_t cutoff)
+{
+  auto it = percentile_idx_map.find (cutoff);
+  if (it != percentile_idx_map.end ())
+    return detailed_summaries[it->second].min_count;
+
+  /* The cutoffs stored in the GCOV are fractions multiplied by 1,000,000.  */
+  gcc_checking_assert (cutoff < 1'000'000);
+
+  auto cutoff_it
+    = std::partition_point (detailed_summaries.begin (),
+                           detailed_summaries.end (),
+                           [cutoff] (const detailed_summary &summary) {
+                             return summary.cutoff <= cutoff;
+                           });
+
+  unsigned idx = std::distance (detailed_summaries.begin (), cutoff_it);
+  /* If the cutoff is too high, clamp it to the highest.  */
+  if (cutoff_it == detailed_summaries.end ())
+    {
+      gcc_checking_assert (idx == detailed_summaries.length ());
+      idx = detailed_summaries.length () - 1;
+    }
+
+  /* Cache the result of the lookup.  */
+  percentile_idx_map[cutoff] = idx;
+  return detailed_summaries[idx].min_count;
+}
+
 /* Member functions for string_table.  */
 
 /* Deconstructor.  */
@@ -2365,8 +2454,6 @@ function_instance::read_function_instance 
(function_instance_stack *stack,
       unsigned num_targets = gcov_read_unsigned ();
       gcov_type count = gcov_read_counter ();
       s->pos_counts[offset].count = count;
-      afdo_profile_info->sum_max = std::max (afdo_profile_info->sum_max,
-                                            count);
 
       for (unsigned j = 0; j < stack->length (); j++)
         (*stack)[j]->total_count_ += count;
@@ -2644,7 +2731,7 @@ autofdo_source_profile::read ()
                     "auto-profile contains duplicated function instance %s",
                     afdo_string_table->get_name (s->name ()));
     }
-  int hot_frac = param_hot_bb_count_fraction;
+  afdo_profile_info->sum_max = afdo_summary_info->max_count;
   /* Scale up the profile, but leave some bits in case some counts gets
      bigger than sum_max eventually.  */
   if (afdo_profile_info->sum_max)
@@ -2652,22 +2739,25 @@ autofdo_source_profile::read ()
       = MAX (((gcov_type)1 << (profile_count::n_bits - 10))
             / afdo_profile_info->sum_max, 1);
   afdo_profile_info->cutoff *= afdo_count_scale;
-  afdo_hot_bb_threshod
-    = hot_frac
-      ? afdo_profile_info->sum_max * afdo_count_scale / hot_frac
-      : (gcov_type)profile_count::max_count;
-  set_hot_bb_threshold (afdo_hot_bb_threshod);
+  /* Derive the hot count threshold from the profile summary.  */
+  afdo_hot_bb_threshold
+    = min_afdo_hot_count (param_afdo_profile_summary_cutoff_hot)
+      * afdo_count_scale;
+  set_hot_bb_threshold (afdo_hot_bb_threshold);
   if (dump_file)
-    fprintf (dump_file, "Max count in profile %" PRIu64 "\n"
-                       "Setting scale %" PRIu64 "\n"
-                       "Scaled max count %" PRIu64 "\n"
-                       "Cutoff %" PRIu64 "\n"
-                       "Hot count threshold %" PRIu64 "\n\n",
-            (int64_t)afdo_profile_info->sum_max,
-            (int64_t)afdo_count_scale,
-            (int64_t)(afdo_profile_info->sum_max * afdo_count_scale),
-            (int64_t)afdo_profile_info->cutoff,
-            (int64_t)afdo_hot_bb_threshod);
+    fprintf (dump_file,
+            "Max count in profile %" PRIu64 "\n"
+            "Setting scale %" PRIu64 "\n"
+            "Scaled max count %" PRIu64 "\n"
+            "Cutoff %" PRIu64 "\n"
+            "Unscaled hot count threshold %" PRIu64 "\n"
+            "Hot count threshold %" PRIu64 "\n\n",
+            (int64_t) afdo_profile_info->sum_max, (int64_t) afdo_count_scale,
+            (int64_t) (afdo_profile_info->sum_max * afdo_count_scale),
+            (int64_t) afdo_profile_info->cutoff,
+            (int64_t) min_afdo_hot_count (
+              param_afdo_profile_summary_cutoff_hot),
+            (int64_t) afdo_hot_bb_threshold);
   afdo_profile_info->sum_max *= afdo_count_scale;
   return true;
 }
@@ -2761,6 +2851,14 @@ read_profile (void)
   /* Skip the empty integer.  */
   gcov_read_unsigned ();
 
+  /* summary_info.  */
+  afdo_summary_info = new summary_info ();
+  if (!afdo_summary_info->read ())
+    {
+      error ("cannot read summary information from %s", auto_profile_file);
+      return;
+    }
+
   /* string_table.  */
   afdo_string_table = new string_table ();
   if (!afdo_string_table->read ())
@@ -4199,6 +4297,7 @@ end_auto_profile (void)
 {
   delete autofdo::afdo_source_profile;
   delete autofdo::afdo_string_table;
+  delete autofdo::afdo_summary_info;
   profile_info = NULL;
 }
 
@@ -4303,6 +4402,22 @@ remove_afdo_speculative_target (cgraph_edge *e)
   autofdo::afdo_source_profile->remove_icall_target (e);
 }
 
+/* Get the minimum count needed to achieve the hot threshold percentile.  */
+
+uint64_t
+min_afdo_hot_count (uint32_t cutoff)
+{
+  return autofdo::afdo_summary_info->get_threshold_count (cutoff);
+}
+
+/* Get the maximum count needed to be considered cold for the percentile.  */
+
+uint64_t
+max_afdo_cold_count (uint32_t cutoff)
+{
+  return autofdo::afdo_summary_info->get_threshold_count (cutoff);
+}
+
 namespace
 {
 
diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h
index 3cce5f2152c..66ef20419e8 100644
--- a/gcc/auto-profile.h
+++ b/gcc/auto-profile.h
@@ -42,4 +42,10 @@ extern gcov_type afdo_hot_bb_threshold;
 /* Return true if COUNT is possibly hot.  */
 extern bool maybe_hot_afdo_count_p (profile_count count);
 
+/* Get the minimum count needed to achieve the hot threshold percentile.  */
+extern uint64_t min_afdo_hot_count (uint32_t cutoff);
+
+/* Get the maximum count needed to be considered cold for the percentile.  */
+extern uint64_t max_afdo_cold_count (uint32_t cutoff);
+
 #endif /* AUTO_PROFILE_H */
diff --git a/gcc/c/Make-lang.in b/gcc/c/Make-lang.in
index f09fc99467b..95a7dad8350 100644
--- a/gcc/c/Make-lang.in
+++ b/gcc/c/Make-lang.in
@@ -102,7 +102,7 @@ create_fdas_for_cc1: ../stage1-gcc/cc1$(exeext) 
../prev-gcc/$(PERF_DATA)
          echo $$perf_path; \
          if [ -f $$perf_path ]; then \
            profile_name=cc1_$$component_in_prev.fda; \
-           $(CREATE_GCOV) -binary ../stage1-gcc/cc1$(exeext) -gcov 
$$profile_name -profile $$perf_path -gcov_version 2 || exit 1; \
+           $(CREATE_GCOV) -binary ../stage1-gcc/cc1$(exeext) -gcov 
$$profile_name -profile $$perf_path -gcov_version 3 || exit 1; \
          fi; \
        done;
 
@@ -112,7 +112,7 @@ create_fdas_for_cc1: ../stage1-gcc/cc1$(exeext) 
../prev-gcc/$(PERF_DATA)
          echo $$perf_path; \
          if [ -f $$perf_path ]; then \
            profile_name=cc1_$$component_in_prev_target.fda; \
-           $(CREATE_GCOV) -binary ../prev-gcc/cc1$(exeext) -gcov 
$$profile_name -profile $$perf_path -gcov_version 2 || exit 1; \
+           $(CREATE_GCOV) -binary ../prev-gcc/cc1$(exeext) -gcov 
$$profile_name -profile $$perf_path -gcov_version 3 || exit 1; \
          fi; \
        done;
 
diff --git a/gcc/cp/Make-lang.in b/gcc/cp/Make-lang.in
index 70cfe2b1663..20ae96c18ed 100644
--- a/gcc/cp/Make-lang.in
+++ b/gcc/cp/Make-lang.in
@@ -201,7 +201,7 @@ create_fdas_for_cc1plus: ../stage1-gcc/cc1plus$(exeext) 
../prev-gcc/$(PERF_DATA)
          echo $$perf_path; \
          if [ -f $$perf_path ]; then \
            profile_name=cc1plus_$$component_in_prev.fda; \
-           $(CREATE_GCOV) -binary ../stage1-gcc/cc1plus$(exeext) -gcov 
$$profile_name -profile $$perf_path -gcov_version 2 || exit 1; \
+           $(CREATE_GCOV) -binary ../stage1-gcc/cc1plus$(exeext) -gcov 
$$profile_name -profile $$perf_path -gcov_version 3 || exit 1; \
          fi; \
        done;
 
@@ -211,7 +211,7 @@ create_fdas_for_cc1plus: ../stage1-gcc/cc1plus$(exeext) 
../prev-gcc/$(PERF_DATA)
          echo $$perf_path; \
          if [ -f $$perf_path ]; then \
            profile_name=cc1plus_$$component_in_prev_target.fda; \
-           $(CREATE_GCOV) -binary ../prev-gcc/cc1plus$(exeext) -gcov 
$$profile_name -profile $$perf_path -gcov_version 2 || exit 1; \
+           $(CREATE_GCOV) -binary ../prev-gcc/cc1plus$(exeext) -gcov 
$$profile_name -profile $$perf_path -gcov_version 3 || exit 1; \
          fi; \
        done;
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index f93fe43733d..c9715b202d7 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -16773,6 +16773,16 @@ eligible for inlining.
 Select fraction of the maximal frequency of executions of a basic block in
 a function to align the basic block.
 
+@item afdo-profile-summary-cutoff-hot
+
+The percentile (expressed as a fraction multiplied by 1000000) from which the
+minimum count required for a sample to be considered hot is calculated.
+
+@item afdo-profile-summary-cutoff-cold
+
+The percentile (expressed as a fraction multiplied by 1000000) from which the
+minimum count required for a sample to be considered cold is calculated.
+
 @item align-loop-iterations
 
 A loop expected to iterate at least the selected number of iterations is
diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index 313c15cecbb..7b1d93f961d 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -274,6 +274,8 @@ typedef uint64_t gcov_type_unsigned;
 #define GCOV_TAG_OBJECT_SUMMARY  ((gcov_unsigned_t)0xa1000000)
 #define GCOV_TAG_OBJECT_SUMMARY_LENGTH (2 * GCOV_WORD_SIZE)
 #define GCOV_TAG_PROGRAM_SUMMARY ((gcov_unsigned_t)0xa3000000) /* Obsolete */
+
+#define GCOV_TAG_AFDO_SUMMARY    ((gcov_unsigned_t)0xa8000000)
 #define GCOV_TAG_AFDO_FILE_NAMES ((gcov_unsigned_t)0xaa000000)
 #define GCOV_TAG_AFDO_FUNCTION ((gcov_unsigned_t)0xac000000)
 #define GCOV_TAG_AFDO_WORKING_SET ((gcov_unsigned_t)0xaf000000)
diff --git a/gcc/lto/Make-lang.in b/gcc/lto/Make-lang.in
index 2af8bba44ca..a24fa059a7a 100644
--- a/gcc/lto/Make-lang.in
+++ b/gcc/lto/Make-lang.in
@@ -115,7 +115,7 @@ create_fdas_for_lto1: ../stage1-gcc/lto1$(exeext) 
../prev-gcc/$(PERF_DATA)
          echo $$perf_path; \
          if [ -f $$perf_path ]; then \
            profile_name=lto1_$$component_in_prev.fda; \
-           $(CREATE_GCOV) -binary ../stage1-gcc/lto1$(exeext) -gcov 
$$profile_name -profile $$perf_path -gcov_version 2 || exit 1; \
+           $(CREATE_GCOV) -binary ../stage1-gcc/lto1$(exeext) -gcov 
$$profile_name -profile $$perf_path -gcov_version 3 || exit 1; \
          fi; \
        done;
 
@@ -125,7 +125,7 @@ create_fdas_for_lto1: ../stage1-gcc/lto1$(exeext) 
../prev-gcc/$(PERF_DATA)
          echo $$perf_path; \
          if [ -f $$perf_path ]; then \
            profile_name=lto1_$$component_in_prev_target.fda; \
-           $(CREATE_GCOV) -binary ../prev-gcc/lto1$(exeext) -gcov 
$$profile_name -profile $$perf_path -gcov_version 2 || exit 1; \
+           $(CREATE_GCOV) -binary ../prev-gcc/lto1$(exeext) -gcov 
$$profile_name -profile $$perf_path -gcov_version 3 || exit 1; \
          fi; \
        done;
 
diff --git a/gcc/params.opt b/gcc/params.opt
index 1f6297de163..56e13649d17 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -22,6 +22,14 @@
 
 ; Please try to keep this file in ASCII collating order.
 
+-param=afdo-profile-summary-cutoff-hot=
+Common Joined RejectNegative UInteger 
Var(param_afdo_profile_summary_cutoff_hot) Init(990000) IntegerRange(0, 
1000000) Param Optimization
+Consider a count hot if it exceeds the minimum count needed to reach this 
percentile (expressed as a fraction times 1000000) of counts.
+
+-param=afdo-profile-summary-cutoff-cold=
+Common Joined RejectNegative UInteger 
Var(param_afdo_profile_summary_cutoff_cold) Init(999999) IntegerRange(0, 
1000000) Param Optimization
+Consider a count cold if it falls below the minimum count needed to reach this 
percentile (expressed as a fraction times 1000000) of counts.
+
 -param=align-loop-iterations=
 Common Joined UInteger Var(param_align_loop_iterations) Init(4) Param 
Optimization
 Loops iterating at least selected number of iterations will get loop alignment.
diff --git a/gcc/testsuite/lib/profopt.exp b/gcc/testsuite/lib/profopt.exp
index 81d86c632d1..0001f6798dd 100644
--- a/gcc/testsuite/lib/profopt.exp
+++ b/gcc/testsuite/lib/profopt.exp
@@ -452,7 +452,7 @@ proc profopt-execute { src } {
            # convert profile
            if { $run_autofdo == 1 } {
                 set bprefix "afdo."
-               set cmd "create_gcov --binary $execname1 
--profile=$tmpdir/$base.perf.data -gcov_version=2 
--gcov=$tmpdir/$bprefix$base.$ext"
+               set cmd "create_gcov --binary $execname1 
--profile=$tmpdir/$base.perf.data --gcov_version=3 
--gcov=$tmpdir/$bprefix$base.$ext"
                verbose "Running $cmd"
                set id [remote_spawn "" $cmd]
                if { $id < 0 } {
-- 
2.44.0

Reply via email to