https://gcc.gnu.org/g:b0d50cbb42ab2ce5fab8a832cb82fc54b371c914

commit r16-1346-gb0d50cbb42ab2ce5fab8a832cb82fc54b371c914
Author: Jan Hubicka <hubi...@ucw.cz>
Date:   Fri Jun 6 17:57:00 2025 +0200

    Fix profile updating in ipa-cp
    
    Bootstrapping with autoprofiledbootstrap, LTO and checking enables ICEs in 
WPA
    because we end up mixing local and IPA count in
    ipa-cp.cc:update_specialized_profile.  This is because of missing call to
    profile_count::adjust_for_ipa_scaling.  While looking into that I however
    noticed that the function forgets to update indirect call edges. This made 
me
    to commonize same logic which currently exists in clone_inlined_nodes,
    update_specialized_profile, update_profiling_info and
    update_counts_for_self_gen_clones.
    
    While testing it I noticed that we also ICE when linking with
    -fdump-ipa-all-details-blocks since IPA and local counts are temporarily 
mixed
    during IPA transformation stage, so I also added check to 
profile_count::dump
    to not crash and added verifier to gimple_verify_flow_info.
    
    Other problem I also noticed is that while profile updates done by inliner 
(via
    cgraph_node::clone) are correctly using global0 profiles instead of erasing
    profile completely when IPA counts drops to 0, the scaling in ipa-cp is not
    doing that, so we lose info and possibly some code quality.  I will fix that
    incrementally. Similarly ipa-split, when offlining region with 0 entry count
    may re-do frequency propagation to get something useful.
    
    gcc/ChangeLog:
    
            * cgraph.cc (cgraph_node::apply_scale): New member function.
            * cgraph.h (struct cgraph_node): declare.
            * ipa-cp.cc (update_counts_for_self_gen_clones):
            Use cgraph_node::apply_scale.
            (update_profiling_info): Do not overwrite local
            profile when dropping to 0 global profile.
            (update_specialized_profile): Likewise.
            * ipa-inline-transform.cc (update_noncloned_counts): Remove.
            (can_remove_node_now_p_1): Fix formating.
            (clone_inlined_nodes): Use cgraph_node::apply_scale.
            * profile-count.cc (profile_count::dump): Do not ICE
            when count is not compatible with entry block count.
            * tree-cfg.cc (gimple_verify_flow_info): Check
            compatibility of count and entry block count.

Diff:
---
 gcc/cgraph.cc               | 20 +++++++++++++
 gcc/cgraph.h                |  3 ++
 gcc/ipa-cp.cc               | 73 ++++++++++++++-------------------------------
 gcc/ipa-inline-transform.cc | 25 ++--------------
 gcc/profile-count.cc        | 13 ++++++--
 gcc/tree-cfg.cc             | 12 ++++++++
 6 files changed, 70 insertions(+), 76 deletions(-)

diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
index 3f95ca1fa85c..4a037a7bab10 100644
--- a/gcc/cgraph.cc
+++ b/gcc/cgraph.cc
@@ -179,6 +179,26 @@ cgraph_node::function_version (void)
   return cgraph_fnver_htab->find (&key);
 }
 
+/* Scale profile by NUM/DEN.  Walk into inlined clones.  */
+
+void
+cgraph_node::apply_scale (profile_count num, profile_count den)
+{
+  struct cgraph_edge *e;
+
+  profile_count::adjust_for_ipa_scaling (&num, &den);
+
+  for (e = callees; e; e = e->next_callee)
+    {
+      if (!e->inline_failed)
+       e->callee->apply_scale (num, den);
+      e->count = e->count.apply_scale (num, den);
+    }
+  for (e = indirect_calls; e; e = e->next_callee)
+    e->count = e->count.apply_scale (num, den);
+  count = count.apply_scale (num, den);
+}
+
 /* Insert a new cgraph_function_version_info node into cgraph_fnver_htab
    corresponding to cgraph_node NODE.  */
 cgraph_function_version_info *
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 8dbe36eac09d..ba9a8a25e396 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1256,6 +1256,9 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
public symtab_node
      it is not used in any other non-standard way.  */
   bool only_called_directly_p (void);
 
+  /* Scale profile by NUM/DEN.  Walk into inlined clones.  */
+  void apply_scale (profile_count num, profile_count den);
+
   /* Return true when function is only called directly or it has alias.
      i.e. it is not externally visible, address was not taken and
      it is not used in any other non-standard way.  */
diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index 73cf9040fad7..92e234e67162 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -4666,19 +4666,12 @@ update_counts_for_self_gen_clones (cgraph_node 
*orig_node,
   unsigned i = 0;
   for (cgraph_node *n : self_gen_clones)
     {
-      profile_count orig_count = n->count;
       profile_count new_count
        = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
       new_count = lenient_count_portion_handling (new_count, orig_node);
-      n->count = new_count;
-      profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
+      n->apply_scale (new_count, n->count);
       for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
-       {
-         cs->count = cs->count.apply_scale (new_count, orig_count);
-         processed_edges.add (cs);
-       }
-      for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
-       cs->count = cs->count.apply_scale (new_count, orig_count);
+       processed_edges.add (cs);
 
       i++;
     }
@@ -4811,20 +4804,24 @@ update_profiling_info (struct cgraph_node *orig_node,
              /* The NEW_NODE count and counts of all its outgoing edges
                 are still unmodified copies of ORIG_NODE's.  Just clear
                 the latter and bail out.  */
-             profile_count zero;
               if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
-                zero = profile_count::zero ().guessed_local ();
+               orig_node->count = orig_node->count.guessed_local ();
              else
-               zero = profile_count::adjusted_zero ();
-             orig_node->count = zero;
+               orig_node->count = orig_node->count.global0adjusted ();
              for (cgraph_edge *cs = orig_node->callees;
                   cs;
                   cs = cs->next_callee)
-               cs->count = zero;
+               if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
+                 cs->count = orig_node->count.guessed_local ();
+               else
+                 cs->count = orig_node->count.global0adjusted ();
              for (cgraph_edge *cs = orig_node->indirect_calls;
                   cs;
                   cs = cs->next_callee)
-               cs->count = zero;
+               if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
+                 cs->count = orig_node->count.guessed_local ();
+               else
+                 cs->count = orig_node->count.global0adjusted ();
              return;
            }
        }
@@ -4874,26 +4871,12 @@ update_profiling_info (struct cgraph_node *orig_node,
                                                orig_node);
 
   new_sum = orig_node_count.combine_with_ipa_count (new_sum);
-  new_node->count = new_sum;
   orig_node->count = remainder;
 
-  profile_count orig_new_node_count = orig_node_count;
-  profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
-  for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
-    cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
-  for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
-    cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
+  new_node->apply_scale (new_sum, new_node->count);
 
   if (!orig_edges_processed)
-    {
-      profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
-      for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
-       cs->count = cs->count.apply_scale (remainder, orig_node_count);
-      for (cgraph_edge *cs = orig_node->indirect_calls;
-          cs;
-          cs = cs->next_callee)
-       cs->count = cs->count.apply_scale (remainder, orig_node_count);
-    }
+    orig_node->apply_scale (remainder, orig_node->count);
 
   if (dump_file)
     {
@@ -4911,35 +4894,25 @@ update_specialized_profile (struct cgraph_node 
*new_node,
                            struct cgraph_node *orig_node,
                            profile_count redirected_sum)
 {
-  struct cgraph_edge *cs;
-  profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
-
   if (dump_file)
     {
       fprintf (dump_file, "    the sum of counts of redirected  edges is ");
       redirected_sum.dump (dump_file);
       fprintf (dump_file, "\n    old ipa count of the original node is ");
-      orig_node_count.dump (dump_file);
+      orig_node->count.dump (dump_file);
       fprintf (dump_file, "\n");
     }
-  if (!orig_node_count.nonzero_p ())
+  if (!orig_node->count.ipa ().nonzero_p ()
+      || !redirected_sum.nonzero_p ())
     return;
 
-  new_node_count = new_node->count;
-  new_node->count += redirected_sum;
-  orig_node->count
-    = lenient_count_portion_handling (orig_node->count - redirected_sum,
-                                     orig_node);
+  orig_node->apply_scale
+    (lenient_count_portion_handling (orig_node->count.ipa () - redirected_sum,
+                                    orig_node),
+     orig_node->count);
 
-  for (cs = new_node->callees; cs; cs = cs->next_callee)
-    cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
-
-  for (cs = orig_node->callees; cs; cs = cs->next_callee)
-    {
-      profile_count dec = cs->count.apply_scale (redirected_sum,
-                                                orig_node_count);
-      cs->count -= dec;
-    }
+  new_node->apply_scale (new_node->count.ipa () + redirected_sum,
+                        new_node->count);
 
   if (dump_file)
     {
diff --git a/gcc/ipa-inline-transform.cc b/gcc/ipa-inline-transform.cc
index 46b8e5bb6790..3c6a84570b7f 100644
--- a/gcc/ipa-inline-transform.cc
+++ b/gcc/ipa-inline-transform.cc
@@ -58,27 +58,6 @@ along with GCC; see the file COPYING3.  If not see
 int ncalls_inlined;
 int nfunctions_inlined;
 
-/* Scale counts of NODE edges by NUM/DEN.  */
-
-static void
-update_noncloned_counts (struct cgraph_node *node,
-                        profile_count num, profile_count den)
-{
-  struct cgraph_edge *e;
-
-  profile_count::adjust_for_ipa_scaling (&num, &den);
-
-  for (e = node->callees; e; e = e->next_callee)
-    {
-      if (!e->inline_failed)
-        update_noncloned_counts (e->callee, num, den);
-      e->count = e->count.apply_scale (num, den);
-    }
-  for (e = node->indirect_calls; e; e = e->next_callee)
-    e->count = e->count.apply_scale (num, den);
-  node->count = node->count.apply_scale (num, den);
-}
-
 /* We removed or are going to remove the last call to NODE.
    Return true if we can and want proactively remove the NODE now.
    This is important to do, since we want inliner to know when offline
@@ -93,7 +72,7 @@ can_remove_node_now_p_1 (struct cgraph_node *node, struct 
cgraph_edge *e)
     {
       cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
       if ((alias->callers && alias->callers != e)
-          || !can_remove_node_now_p_1 (alias, e))
+         || !can_remove_node_now_p_1 (alias, e))
        return false;
     }
   /* FIXME: When address is taken of DECL_EXTERNAL function we still
@@ -212,7 +191,7 @@ clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
            }
          duplicate = false;
          e->callee->externally_visible = false;
-          update_noncloned_counts (e->callee, e->count, e->callee->count);
+         e->callee->apply_scale (e->count, e->callee->count);
 
          dump_callgraph_transformation (e->callee, inlining_into,
                                         "inlining to");
diff --git a/gcc/profile-count.cc b/gcc/profile-count.cc
index 22c109ab528c..e857cddea7ee 100644
--- a/gcc/profile-count.cc
+++ b/gcc/profile-count.cc
@@ -94,9 +94,16 @@ profile_count::dump (FILE *f, struct function *fun) const
   else if (fun && initialized_p ()
           && fun->cfg
           && ENTRY_BLOCK_PTR_FOR_FN (fun)->count.initialized_p ())
-    fprintf (f, "%" PRId64 " (%s, freq %.4f)", m_val,
-            profile_quality_display_names[m_quality],
-            to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (fun)->count).to_double ());
+    {
+      if (compatible_p (ENTRY_BLOCK_PTR_FOR_FN (fun)->count))
+       fprintf (f, "%" PRId64 " (%s, freq %.4f)", m_val,
+                profile_quality_display_names[m_quality],
+                to_sreal_scale
+                  (ENTRY_BLOCK_PTR_FOR_FN (fun)->count).to_double ());
+      else
+       fprintf (f, "%" PRId64 " (%s, incompatible with entry block count)",
+                m_val, profile_quality_display_names[m_quality]);
+    }
   else
     fprintf (f, "%" PRId64 " (%s)", m_val,
             profile_quality_display_names[m_quality]);
diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
index b342b147716a..fad308e7f7b7 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -5757,6 +5757,12 @@ gimple_verify_flow_info (void)
       error ("probability of edge from entry block not initialized");
       err = true;
     }
+  if (!EXIT_BLOCK_PTR_FOR_FN (cfun)
+       ->count.compatible_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count))
+    {
+      error ("exit block count is not compoatible with entry block count");
+      err = true;
+    }
 
 
   FOR_EACH_BB_FN (bb, cfun)
@@ -5780,6 +5786,12 @@ gimple_verify_flow_info (void)
                err = true;
              }
         }
+      if (!bb->count.compatible_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count))
+       {
+         error ("count of bb %d is not compoatible with entry block count",
+                bb->index);
+         err = true;
+       }
 
       /* Skip labels on the start of basic block.  */
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))

Reply via email to