Hi,
this patch adds -fauto-profile-inlining which can be used to control
the auto-profile directed inlning.  The feature is quite interesitng
but also may trigger unexpected code size growth or prevent useful
IPA inlining in the profiled binary.

Bootstrapped/regtested x86_64. Plan to commit it tomorrow.

gcc/ChangeLog:

        * common.opt: (fauto-profile-inlining): New
        * doc/invoke.texi (-fauto-profile-inlining): Document.
        * ipa-inline.cc (inline_functions_by_afdo): Check
        flag_auto_profile.
        (early_inliner): Also do inline_functions_by_afdo with
        !flag_early_inlining.

diff --git a/gcc/common.opt b/gcc/common.opt
index 0e50305dde8..9f26dedec89 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1187,6 +1187,11 @@ Common Joined RejectNegative Var(auto_profile_file)
 Use sample profile information for call graph node weights. The profile
 file is specified in the argument.
 
+fauto-profile-inlining
+Common Var(flag_auto_profile_inlining) Init(1) Optimization
+Perform inlining using auto-profile
+
+
 ; -fcheck-bounds causes gcc to generate array bounds checks.
 ; For C, C++ and ObjC: defaults off.
 ; For Java: defaults to on.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 693bd57691e..4dd00e2de70 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -573,7 +573,7 @@ Objective-C and Objective-C++ Dialects}.
 -fmin-function-alignment=[@var{n}]
 -fno-allocation-dce -fallow-store-data-races
 -fassociative-math  -fauto-profile  -fauto-profile[=@var{path}]
--fauto-inc-dec  -fbranch-probabilities
+-fauto-profile-inlining -fauto-inc-dec  -fbranch-probabilities
 -fcaller-saves
 -fcombine-stack-adjustments  -fconserve-stack
 -ffold-mem-offsets
@@ -15508,6 +15508,12 @@ The following options control compiler behavior 
regarding floating-point
 arithmetic.  These options trade off between speed and
 correctness.  All must be specifically enabled.
 
+@opindex fauto-profile-inlining
+@item -fauto-profile-inlining
+When auto-profile is available inline all relevant functions which was
+inlined in the tran run before reading the profile feedback.  This improves
+context sensitivity of the profile.  Enabled by default.
+
 @table @gcctabopt
 @opindex fexcess-precision
 @item -fexcess-precision=@var{style}
diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc
index a960d55b661..ca605b027dc 100644
--- a/gcc/ipa-inline.cc
+++ b/gcc/ipa-inline.cc
@@ -3120,7 +3120,7 @@ early_inline_small_functions (struct cgraph_node *node)
 static bool
 inline_functions_by_afdo (struct cgraph_node *node, bool *speculative_calls)
 {
-  if (!flag_auto_profile)
+  if (!flag_auto_profile || !flag_auto_profile_inlining)
     return false;
   struct cgraph_edge *e;
   bool inlined = false;
@@ -3320,6 +3320,25 @@ early_inliner (function *fun)
        fprintf (dump_file, "Iterations: %i\n", iterations);
     }
 
+  /* do AFDO inlining in case it was not done as part of early inlining.  */
+  if (optimize
+      && !flag_no_inline
+      && !flag_early_inlining
+      && flag_auto_profile_inlining)
+    {
+      bool speculative_calls = false;
+      inlined |= inline_functions_by_afdo (node, &speculative_calls);
+      if (speculative_calls)
+       {
+         cgraph_edge *next;
+         for (cgraph_edge *e = node->callees; e; e = next)
+           {
+             next = e->next_callee;
+             cgraph_edge::redirect_call_stmt_to_callee (e);
+           }
+       }
+    }
+
   if (inlined)
     {
       timevar_push (TV_INTEGRATION);

Reply via email to