Hi,
just for those who are interested, this is quick&dirty patch adding another
pass of local optimization passes at WPA time.  I've added early inliner and
IPA-SRA because I was curious how much of optimization oppurtunities we are
missing by limiting those to early pass.

With Early inlining it seems to be very little. We inline one extra call when
building Mozilla in LTO mode.

IPA SRA is different story.  While we do 579 IPA SRA clones in the early pass,
the late pass produces 13014 clones (22 times more ;) suggesting that the pass
might be interesting at IPA level after all.

There are 78686 functions after inlining in Mozilla, so one out of 7 functions
is touched.

Size difference of libxul is not great, about 100Kb reduction. I will try
benchmarking it eventually, too.

Honza


Index: cgraph.c
===================================================================
*** cgraph.c    (revision 175350)
--- cgraph.c    (working copy)
*************** cgraph_release_function_body (struct cgr
*** 1389,1396 ****
        }
        if (cfun->cfg)
        {
!         gcc_assert (dom_computed[0] == DOM_NONE);
!         gcc_assert (dom_computed[1] == DOM_NONE);
          clear_edges ();
        }
        if (cfun->value_histograms)
--- 1393,1403 ----
        }
        if (cfun->cfg)
        {
!         /*gcc_assert (dom_computed[0] == DOM_NONE);
!         gcc_assert (dom_computed[1] == DOM_NONE);*/
! free_dominance_info (CDI_DOMINATORS);
! free_dominance_info (CDI_POST_DOMINATORS);
! 
          clear_edges ();
        }
        if (cfun->value_histograms)
Index: tree-pass.h
===================================================================
*** tree-pass.h (revision 175350)
--- tree-pass.h (working copy)
*************** extern struct simple_ipa_opt_pass pass_i
*** 452,458 ****
  extern struct simple_ipa_opt_pass pass_ipa_function_and_variable_visibility;
  extern struct simple_ipa_opt_pass pass_ipa_tree_profile;
  
! extern struct simple_ipa_opt_pass pass_early_local_passes;
  
  extern struct ipa_opt_pass_d pass_ipa_whole_program_visibility;
  extern struct ipa_opt_pass_d pass_ipa_lto_gimple_out;
--- 452,458 ----
  extern struct simple_ipa_opt_pass pass_ipa_function_and_variable_visibility;
  extern struct simple_ipa_opt_pass pass_ipa_tree_profile;
  
! extern struct simple_ipa_opt_pass pass_early_local_passes, 
pass_late_local_passes, pass_late_local_passes2;
  
  extern struct ipa_opt_pass_d pass_ipa_whole_program_visibility;
  extern struct ipa_opt_pass_d pass_ipa_lto_gimple_out;
Index: ipa-inline-analysis.c
===================================================================
*** ipa-inline-analysis.c       (revision 175350)
--- ipa-inline-analysis.c       (working copy)
*************** estimate_function_body_sizes (struct cgr
*** 1535,1542 ****
                  edge->call_stmt_cannot_inline_p = true;
                  gimple_call_set_cannot_inline (stmt, true);
                }
!             else
!               gcc_assert (!gimple_call_cannot_inline_p (stmt));
            }
  
          /* TODO: When conditional jump or swithc is known to be constant, but
--- 1535,1542 ----
                  edge->call_stmt_cannot_inline_p = true;
                  gimple_call_set_cannot_inline (stmt, true);
                }
!             /*else
!               gcc_assert (!gimple_call_cannot_inline_p (stmt));*/
            }
  
          /* TODO: When conditional jump or swithc is known to be constant, but
Index: tree-inline.c
===================================================================
*** tree-inline.c       (revision 175350)
--- tree-inline.c       (working copy)
*************** expand_call_inline (basic_block bb, gimp
*** 3891,3897 ****
    id->src_cfun = DECL_STRUCT_FUNCTION (fn);
    id->gimple_call = stmt;
  
!   gcc_assert (!id->src_cfun->after_inlining);
  
    id->entry_bb = bb;
    if (lookup_attribute ("cold", DECL_ATTRIBUTES (fn)))
--- 3891,3897 ----
    id->src_cfun = DECL_STRUCT_FUNCTION (fn);
    id->gimple_call = stmt;
  
!   /*gcc_assert (!id->src_cfun->after_inlining);*/
  
    id->entry_bb = bb;
    if (lookup_attribute ("cold", DECL_ATTRIBUTES (fn)))
Index: tree-optimize.c
===================================================================
*** tree-optimize.c     (revision 175350)
--- tree-optimize.c     (working copy)
*************** struct simple_ipa_opt_pass pass_early_lo
*** 123,128 ****
--- 123,189 ----
  /* Gate: execute, or not, all of the non-trivial optimizations.  */
  
  static bool
+ gate_all_late_local_passes (void)
+ {
+         /* Don't bother doing anything if the program has errors.  */
+   return (!seen_error () && optimize);
+ }
+ 
+ static unsigned int
+ execute_all_late_local_passes (void)
+ {
+   /* Once this pass (and its sub-passes) are complete, all functions
+      will be in SSA form.  Technically this state change is happening
+      a tad late, since the sub-passes have not yet run, but since
+      none of the sub-passes are IPA passes and do not create new
+      functions, this is ok.  We're setting this value for the benefit
+      of IPA passes that follow.  */
+   if (cgraph_state < CGRAPH_STATE_IPA_SSA)
+     cgraph_state = CGRAPH_STATE_IPA_SSA;
+   return 0;
+ }
+ 
+ struct simple_ipa_opt_pass pass_late_local_passes =
+ {
+  {
+   SIMPLE_IPA_PASS,
+   "late_local_cleanups",              /* name */
+   gate_all_late_local_passes,         /* gate */
+   execute_all_late_local_passes,      /* execute */
+   NULL,                                       /* sub */
+   NULL,                                       /* next */
+   0,                                  /* static_pass_number */
+   TV_EARLY_LOCAL,                     /* tv_id */
+   0,                                  /* properties_required */
+   0,                                  /* properties_provided */
+   0,                                  /* properties_destroyed */
+   0,                                  /* todo_flags_start */
+   TODO_remove_functions                       /* todo_flags_finish */
+  }
+ };
+ 
+ struct simple_ipa_opt_pass pass_late_local_passes2 =
+ {
+  {
+   SIMPLE_IPA_PASS,
+   "late_local_cleanups2",             /* name */
+   gate_all_late_local_passes,         /* gate */
+   execute_all_late_local_passes,      /* execute */
+   NULL,                                       /* sub */
+   NULL,                                       /* next */
+   0,                                  /* static_pass_number */
+   TV_EARLY_LOCAL,                     /* tv_id */
+   0,                                  /* properties_required */
+   0,                                  /* properties_provided */
+   0,                                  /* properties_destroyed */
+   0,                                  /* todo_flags_start */
+   TODO_remove_functions                       /* todo_flags_finish */
+  }
+ };
+ 
+ /* Gate: execute, or not, all of the non-trivial optimizations.  */
+ 
+ static bool
  gate_all_early_optimizations (void)
  {
    return (optimize >= 1
Index: passes.c
===================================================================
*** passes.c    (revision 175350)
--- passes.c    (working copy)
*************** init_optimization_passes (void)
*** 1263,1268 ****
--- 1263,1288 ----
       passes are executed after partitioning and thus see just parts of the
       compiled unit.  */
    p = &all_late_ipa_passes;
+   NEXT_PASS (pass_late_local_passes);
+     {
+       struct opt_pass **p = &pass_late_local_passes.pass.sub;
+       NEXT_PASS (pass_inline_parameters);
+       NEXT_PASS (pass_release_ssa_names);
+     }
+   NEXT_PASS (pass_late_local_passes2);
+     {
+       struct opt_pass **p = &pass_late_local_passes2.pass.sub;
+       NEXT_PASS (pass_early_inline);
+       NEXT_PASS (pass_remove_cgraph_callee_edges);
+       NEXT_PASS (pass_ccp);
+       NEXT_PASS (pass_forwprop);
+       NEXT_PASS (pass_fre);
+       NEXT_PASS (pass_cd_dce);
+       NEXT_PASS (pass_early_ipa_sra);
+       NEXT_PASS (pass_release_ssa_names);
+       NEXT_PASS (pass_rebuild_cgraph_edges);
+       NEXT_PASS (pass_inline_parameters);
+     }
    NEXT_PASS (pass_ipa_pta);
    *p = NULL;
    /* These passes are run after IPA passes on every function that is being
Index: statistics.c
===================================================================
*** statistics.c        (revision 175350)
--- statistics.c        (working copy)
*************** statistics_fini_pass_3 (void **slot, voi
*** 171,176 ****
--- 171,178 ----
  void
  statistics_fini_pass (void)
  {
+   if (!current_pass)
+     return;
    if (current_pass->static_pass_number == -1)
      return;
  

Reply via email to