On Fri, Oct 11, 2019 at 11:03 AM Jan Hubicka <hubi...@ucw.cz> wrote: > > Hi, > this patch prevents tree creation druing WPA stream out (to avoid > touching pages and triggering COW). It fixes the following > - gimple streamer produces MEM_REF wrappings for global decls. > This is to preserve the type of access and is not necessary for > WPA->LTRANS streaming when decls ar eno longer going to be merged. > - we renumber stmt uids during streaming WPA summaries > - loop optimizer is initialized in output_function. > After testing the patch I noticed that output_function does one extra > renumbering of stmts. This seems quite broken and I will fix it > incrementally. > > Bootstrapped/regtested x86_64-linux, comitted.
Huh. Why do we stream function bodies at WPA time at all? We should already have input sections we can copy/remap? That is, why does gcc_assert (!flag_wpa) in output_function trip? Richard. > > * gimple-streamer-out.c (output_gimple_stmt): Add explicit function > parameter. > * lto-streamer-out.c: Include tree-dfa.h. > (output_cfg): Do not use cfun. > (lto_prepare_function_for_streaming): New. > (output_function): Do not push cfun; do not initialize loop optimizer. > * lto-streamer.h (lto_prepare_function_for_streaming): Declare. > * passes.c (ipa_write_summaries): Use it. > (ipa_write_optimization_summaries): Do not modify bodies. > * tree-dfa.c (renumber_gimple_stmt_uids): Add function parameter. > * tree.dfa.h (renumber_gimple_stmt_uids): Update prototype. > * tree-ssa-dse.c (pass_dse::execute): Update use of > renumber_gimple_stmt_uids. > * tree-ssa-math-opts.c (pass_optimize_widening_mul::execute): > Likewise. > > * lto.c (lto_wpa_write_files): Prepare all bodies for streaming. > Index: gimple-streamer-out.c > =================================================================== > --- gimple-streamer-out.c (revision 276850) > +++ gimple-streamer-out.c (working copy) > @@ -57,7 +57,7 @@ output_phi (struct output_block *ob, gph > /* Emit statement STMT on the main stream of output block OB. */ > > static void > -output_gimple_stmt (struct output_block *ob, gimple *stmt) > +output_gimple_stmt (struct output_block *ob, struct function *fn, gimple > *stmt) > { > unsigned i; > enum gimple_code code; > @@ -80,7 +80,7 @@ output_gimple_stmt (struct output_block > as_a <gassign *> (stmt)), > 1); > bp_pack_value (&bp, gimple_has_volatile_ops (stmt), 1); > - hist = gimple_histogram_value (cfun, stmt); > + hist = gimple_histogram_value (fn, stmt); > bp_pack_value (&bp, hist != NULL, 1); > bp_pack_var_len_unsigned (&bp, stmt->subcode); > > @@ -139,7 +139,7 @@ output_gimple_stmt (struct output_block > so that we do not have to deal with type mismatches on > merged symbols during IL read in. The first operand > of GIMPLE_DEBUG must be a decl, not MEM_REF, though. */ > - if (op && (i || !is_gimple_debug (stmt))) > + if (!flag_wpa && op && (i || !is_gimple_debug (stmt))) > { > basep = &op; > if (TREE_CODE (*basep) == ADDR_EXPR) > @@ -147,7 +147,7 @@ output_gimple_stmt (struct output_block > while (handled_component_p (*basep)) > basep = &TREE_OPERAND (*basep, 0); > if (VAR_P (*basep) > - && !auto_var_in_fn_p (*basep, current_function_decl) > + && !auto_var_in_fn_p (*basep, fn->decl) > && !DECL_REGISTER (*basep)) > { > bool volatilep = TREE_THIS_VOLATILE (*basep); > @@ -228,7 +228,7 @@ output_bb (struct output_block *ob, basi > print_gimple_stmt (streamer_dump_file, stmt, 0, TDF_SLIM); > } > > - output_gimple_stmt (ob, stmt); > + output_gimple_stmt (ob, fn, stmt); > > /* Emit the EH region holding STMT. */ > region = lookup_stmt_eh_lp_fn (fn, stmt); > Index: lto/lto.c > =================================================================== > --- lto/lto.c (revision 276850) > +++ lto/lto.c (working copy) > @@ -304,6 +304,13 @@ lto_wpa_write_files (void) > > timevar_push (TV_WHOPR_WPA_IO); > > + cgraph_node *node; > + /* Do body modifications needed for streaming before we fork out > + worker processes. */ > + FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) > + if (gimple_has_body_p (node->decl)) > + lto_prepare_function_for_streaming (node); > + > /* Generate a prefix for the LTRANS unit files. */ > blen = strlen (ltrans_output_list); > temp_filename = (char *) xmalloc (blen + sizeof ("2147483648.o")); > Index: lto-streamer-out.c > =================================================================== > --- lto-streamer-out.c (revision 276850) > +++ lto-streamer-out.c (working copy) > @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. > #include "debug.h" > #include "omp-offload.h" > #include "print-tree.h" > +#include "tree-dfa.h" > > > static void lto_write_tree (struct output_block*, tree, bool); > @@ -1893,7 +1894,7 @@ output_cfg (struct output_block *ob, str > > streamer_write_hwi (ob, -1); > > - bb = ENTRY_BLOCK_PTR_FOR_FN (cfun); > + bb = ENTRY_BLOCK_PTR_FOR_FN (fn); > while (bb->next_bb) > { > streamer_write_hwi (ob, bb->next_bb->index); > @@ -1902,9 +1903,6 @@ output_cfg (struct output_block *ob, str > > streamer_write_hwi (ob, -1); > > - /* ??? The cfgloop interface is tied to cfun. */ > - gcc_assert (cfun == fn); > - > /* Output the number of loops. */ > streamer_write_uhwi (ob, number_of_loops (fn)); > > @@ -2062,6 +2060,22 @@ collect_block_tree_leafs (tree root, vec > collect_block_tree_leafs (BLOCK_SUBBLOCKS (root), leafs); > } > > +/* This performs function body modifications that are needed for streaming > + to work. */ > + > +void > +lto_prepare_function_for_streaming (struct cgraph_node *node) > +{ > + if (number_of_loops (DECL_STRUCT_FUNCTION (node->decl))) > + { > + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); > + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); > + loop_optimizer_finalize (); > + pop_cfun (); > + } > + renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (node->decl)); > +} > + > /* Output the body of function NODE->DECL. */ > > static void > @@ -2085,9 +2099,6 @@ output_function (struct cgraph_node *nod > > gcc_assert (current_function_decl == NULL_TREE && cfun == NULL); > > - /* Set current_function_decl and cfun. */ > - push_cfun (fn); > - > /* Make string 0 be a NULL string. */ > streamer_write_char_stream (ob->string_stream, 0); > > @@ -2124,9 +2135,6 @@ output_function (struct cgraph_node *nod > debug info. */ > if (gimple_has_body_p (function)) > { > - /* Fixup loops if required to match discovery done in the reader. */ > - loop_optimizer_init (AVOID_CFG_MODIFICATIONS); > - > streamer_write_uhwi (ob, 1); > output_struct_function_base (ob, fn); > > @@ -2143,8 +2151,8 @@ output_function (struct cgraph_node *nod > statement numbers. We do not assign UIDs to PHIs here because > virtual PHIs get re-computed on-the-fly which would make numbers > inconsistent. */ > - set_gimple_stmt_max_uid (cfun, 0); > - FOR_ALL_BB_FN (bb, cfun) > + set_gimple_stmt_max_uid (fn, 0); > + FOR_ALL_BB_FN (bb, fn) > { > for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); > gsi_next (&gsi)) > @@ -2153,25 +2161,25 @@ output_function (struct cgraph_node *nod > > /* Virtual PHIs are not going to be streamed. */ > if (!virtual_operand_p (gimple_phi_result (stmt))) > - gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun)); > + gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn)); > } > for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); > gsi_next (&gsi)) > { > gimple *stmt = gsi_stmt (gsi); > - gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun)); > + gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn)); > } > } > /* To avoid keeping duplicate gimple IDs in the statements, renumber > virtual phis now. */ > - FOR_ALL_BB_FN (bb, cfun) > + FOR_ALL_BB_FN (bb, fn) > { > for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); > gsi_next (&gsi)) > { > gphi *stmt = gsi.phi (); > if (virtual_operand_p (gimple_phi_result (stmt))) > - gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun)); > + gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn)); > } > } > > @@ -2183,9 +2191,6 @@ output_function (struct cgraph_node *nod > streamer_write_record_start (ob, LTO_null); > > output_cfg (ob, fn); > - > - loop_optimizer_finalize (); > - pop_cfun (); > } > else > streamer_write_uhwi (ob, 0); > Index: lto-streamer.h > =================================================================== > --- lto-streamer.h (revision 276850) > +++ lto-streamer.h (working copy) > @@ -909,6 +909,7 @@ void lto_output_decl_state_refs (struct > struct lto_out_decl_state *); > void lto_output_location (struct output_block *, struct bitpack_d *, > location_t); > void lto_output_init_mode_table (void); > +void lto_prepare_function_for_streaming (cgraph_node *); > > > /* In lto-cgraph.c */ > Index: passes.c > =================================================================== > --- passes.c (revision 276850) > +++ passes.c (working copy) > @@ -2705,20 +2705,12 @@ ipa_write_summaries (void) > { > struct cgraph_node *node = order[i]; > > - if (gimple_has_body_p (node->decl)) > + if (node->definition && node->need_lto_streaming) > { > - /* When streaming out references to statements as part of some IPA > - pass summary, the statements need to have uids assigned and the > - following does that for all the IPA passes here. Naturally, this > - ordering then matches the one IPA-passes get in their stmt_fixup > - hooks. */ > - > - push_cfun (DECL_STRUCT_FUNCTION (node->decl)); > - renumber_gimple_stmt_uids (); > - pop_cfun (); > + if (gimple_has_body_p (node->decl)) > + lto_prepare_function_for_streaming (node); > + lto_set_symtab_encoder_in_partition (encoder, node); > } > - if (node->definition && node->need_lto_streaming) > - lto_set_symtab_encoder_in_partition (encoder, node); > } > > FOR_EACH_DEFINED_FUNCTION (node) > @@ -2786,28 +2778,13 @@ void > ipa_write_optimization_summaries (lto_symtab_encoder_t encoder) > { > struct lto_out_decl_state *state = lto_new_out_decl_state (); > - lto_symtab_encoder_iterator lsei; > state->symtab_node_encoder = encoder; > > lto_output_init_mode_table (); > lto_push_out_decl_state (state); > - for (lsei = lsei_start_function_in_partition (encoder); > - !lsei_end_p (lsei); lsei_next_function_in_partition (&lsei)) > - { > - struct cgraph_node *node = lsei_cgraph_node (lsei); > - /* When streaming out references to statements as part of some IPA > - pass summary, the statements need to have uids assigned. > - > - For functions newly born at WPA stage we need to initialize > - the uids here. */ > - if (node->definition > - && gimple_has_body_p (node->decl)) > - { > - push_cfun (DECL_STRUCT_FUNCTION (node->decl)); > - renumber_gimple_stmt_uids (); > - pop_cfun (); > - } > - } > + > + /* Be sure that we did not forget to renumber stmt uids. */ > + gcc_checking_assert (flag_wpa); > > gcc_assert (flag_wpa); > pass_manager *passes = g->get_passes (); > Index: tree-dfa.c > =================================================================== > --- tree-dfa.c (revision 276850) > +++ tree-dfa.c (working copy) > @@ -61,23 +61,23 @@ static void collect_dfa_stats (struct df > /* Renumber all of the gimple stmt uids. */ > > void > -renumber_gimple_stmt_uids (void) > +renumber_gimple_stmt_uids (struct function *fun) > { > basic_block bb; > > - set_gimple_stmt_max_uid (cfun, 0); > - FOR_ALL_BB_FN (bb, cfun) > + set_gimple_stmt_max_uid (fun, 0); > + FOR_ALL_BB_FN (bb, fun) > { > gimple_stmt_iterator bsi; > for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) > { > gimple *stmt = gsi_stmt (bsi); > - gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun)); > + gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fun)); > } > for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) > { > gimple *stmt = gsi_stmt (bsi); > - gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun)); > + gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fun)); > } > } > } > Index: tree-dfa.h > =================================================================== > --- tree-dfa.h (revision 276850) > +++ tree-dfa.h (working copy) > @@ -20,7 +20,7 @@ along with GCC; see the file COPYING3. > #ifndef GCC_TREE_DFA_H > #define GCC_TREE_DFA_H > > -extern void renumber_gimple_stmt_uids (void); > +extern void renumber_gimple_stmt_uids (struct function *); > extern void renumber_gimple_stmt_uids_in_blocks (basic_block *, int); > extern void dump_variable (FILE *, tree); > extern void debug_variable (tree); > Index: tree-ssa-dse.c > =================================================================== > --- tree-ssa-dse.c (revision 276850) > +++ tree-ssa-dse.c (working copy) > @@ -1113,7 +1113,7 @@ pass_dse::execute (function *fun) > { > need_eh_cleanup = BITMAP_ALLOC (NULL); > > - renumber_gimple_stmt_uids (); > + renumber_gimple_stmt_uids (cfun); > > /* We might consider making this a property of each pass so that it > can be [re]computed on an as-needed basis. Particularly since > Index: tree-ssa-math-opts.c > =================================================================== > --- tree-ssa-math-opts.c (revision 276850) > +++ tree-ssa-math-opts.c (working copy) > @@ -3850,7 +3850,7 @@ pass_optimize_widening_mul::execute (fun > > memset (&widen_mul_stats, 0, sizeof (widen_mul_stats)); > calculate_dominance_info (CDI_DOMINATORS); > - renumber_gimple_stmt_uids (); > + renumber_gimple_stmt_uids (cfun); > > math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun)); >