> This patch actually adds the analysis bits of IPA-SRA - both the > function summary generation and the interprocedural analysis and > decision stage. The transformation itself then happens in the call > graph cloning infrastructure changes which are in the previous patch. > Please see the cover letter of the whole patch-set for more > information. > > This is mostly only a rebase on the current trunk of the earlier > submission, the only functional change is that the pass does not clone > when all the work (unused parameter removal) has already been done by > IPA-CP. > > Martin > > 2019-08-20 Martin Jambor <mjam...@suse.cz> > > * Makefile.in (GTFILES): Added ipa-sra.c. > (OBJS): Added ipa-sra.o. > * dbgcnt.def: Added ipa_sra_params and ipa_sra_retvalues. > * doc/invoke.texi (ipa-sra-max-replacements): New. > * ipa-sra.c: New file. > * lto-section-in.c (lto_section_name): Added ipa-sra section. > * lto-streamer.h (lto_section_type): Likewise. > * params.def (PARAM_IPA_SRA_MAX_REPLACEMENTS): New. > * passes.def: Add new IPA-SRA. > * tree-pass.h (make_pass_ipa_sra): Declare. OK > +#define IPA_SRA_MAX_PARAM_FLOW_LEN 7 I would move this to the place ISRA_ARG_SIZE_LIMIT_BITS is defined so they appaer at same place. Perhaps C++ way would be to use constant member variable? > + > +/* Structure to describe which formal parameters feed into a particular > actual > + arguments. */ > + > +struct isra_param_flow > +{ > + /* Number of elements in array inputs that contain valid data. */ > + char length; > + /* Indices of formal parameters that feed into the described actual > argument. > + If aggregate_pass_through or pointer_pass_through below are true, it > must > + contain exactly one element which is passed through from a formal > + parameter if the given number. Otherwise, the array contains indices of > + collee's formal parameters which are used to calculate value of this > + actual argument. */ > + unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN]; > + > + /* Offset within the formal parameter. */ > + unsigned unit_offset; > + /* Size of the portion of the formal parameter that is being passed. */ > + unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
> + > + /* True when the value of this actual copy is a portion of a formal > + parameter. */ > + unsigned aggregate_pass_through : 1; > + /* True when the value of this actual copy is a verbatim pass through of an > + obtained pointer. */ > + unsigned pointer_pass_through : 1; > + /* True when it is safe to copy access candidates here from the callee, > which > + would mean introducing dereferences into callers of the caller. */ > + unsigned safe_to_import_accesses : 1; > +}; > + > +/* Strucutre used to convey information about calls from the intra-procedurwl > + analysis stage to inter-procedural one. */ > + > +class isra_call_summary > +{ > +public: > + isra_call_summary () > + : m_arg_flow (), m_return_ignored (false), m_return_returned (false), > + m_bit_aligned_arg (false) > + {} > + > + void init_inputs (unsigned arg_count); > + void dump (FILE *f); > + > + /* Information about what formal parameters of the caller are used to > compute > + indivisual actual arguments of this call. */ > + auto_vec <isra_param_flow> m_arg_flow; > + > + /* Set to true if the call statement does not have a LHS. */ > + unsigned m_return_ignored : 1; > + > + /* Set to true if the LHS of call statement is only used to construct the > + return value of the caller. */ > + unsigned m_return_returned : 1; > + > + /* Set when any of the call arguments are not byte-aligned. */ > + unsigned m_bit_aligned_arg : 1; > +}; > + > +/* Class to manage function summaries. */ > + > +class GTY((user)) ipa_sra_function_summaries > + : public function_summary <isra_func_summary *> > +{ > +public: > + ipa_sra_function_summaries (symbol_table *table, bool ggc): > + function_summary<isra_func_summary *> (table, ggc) { } > + > + virtual void duplicate (cgraph_node *, cgraph_node *, > + isra_func_summary *old_sum, > + isra_func_summary *new_sum); > +}; > + > +/* Hook that is called by summary when a node is duplicated. */ > + > +void > +ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *, > + isra_func_summary *old_sum, > + isra_func_summary *new_sum) > +{ > + /* TODO: Somehow stop copying when ISRA is doing the cloning, it is > + useless. */ > + new_sum->m_candidate = old_sum->m_candidate; > + new_sum->m_returns_value = old_sum->m_returns_value; > + new_sum->m_return_ignored = old_sum->m_return_ignored; > + gcc_assert (!old_sum->m_queued); > + new_sum->m_queued = false; > + > + unsigned param_count = vec_safe_length (old_sum->m_parameters); > + if (!param_count) > + return; > + vec_safe_reserve_exact (new_sum->m_parameters, param_count); > + new_sum->m_parameters->quick_grow_cleared (param_count); > + for (unsigned i = 0; i < param_count; i++) > + { > + isra_param_desc *s = &(*old_sum->m_parameters)[i]; > + isra_param_desc *d = &(*new_sum->m_parameters)[i]; > + > + d->param_size_limit = s->param_size_limit; > + d->size_reached = s->size_reached; > + d->locally_unused = s->locally_unused; > + d->split_candidate = s->split_candidate; > + d->by_ref = s->by_ref; > + > + unsigned acc_count = vec_safe_length (s->accesses); > + vec_safe_reserve_exact (d->accesses, acc_count); > + for (unsigned j = 0; j < acc_count; j++) > + { > + param_access *from = (*s->accesses)[j]; > + param_access *to = ggc_cleared_alloc<param_access> (); > + to->type = from->type; > + to->alias_ptr_type = from->alias_ptr_type; > + to->unit_offset = from->unit_offset; > + to->unit_size = from->unit_size; > + to->certain = from->certain; > + d->accesses->quick_push (to); > + } > + } > +} > + > +/* Pointer to the pass function summary holder. */ > + > +static GTY(()) ipa_sra_function_summaries *func_sums; > + > +/* Class to manage call summaries. */ > + > +class ipa_sra_call_summaries: public call_summary <isra_call_summary *> > +{ > +public: > + ipa_sra_call_summaries (symbol_table *table): > + call_summary<isra_call_summary *> (table) { } > + > + /* Duplicate info when an edge is cloned. */ > + virtual void duplicate (cgraph_edge *, cgraph_edge *, > + isra_call_summary *old_sum, > + isra_call_summary *new_sum); > +}; > + > +static ipa_sra_call_summaries *call_sums; > + > + > +/* Initialize m_arg_flow of a particular instance of isra_call_summary. > + ARG_COUNT is the number of actual arguments passed. */ > + > +void > +isra_call_summary::init_inputs (unsigned arg_count) > +{ > + if (arg_count == 0) > + { > + gcc_checking_assert (m_arg_flow.length () == 0); > + return; > + } > + if (m_arg_flow.length () == 0) > + { > + m_arg_flow.reserve_exact (arg_count); > + m_arg_flow.quick_grow_cleared (arg_count); > + } > + else > + gcc_checking_assert (arg_count == m_arg_flow.length ()); > +} > + > +/* Dump all information in call summary to F. */ > + > +void > +isra_call_summary::dump (FILE *f) > +{ > + if (m_return_ignored) > + fprintf (f, " return value ignored\n"); > + if (m_return_returned) > + fprintf (f, " return value used only to compute caller return > value\n"); > + for (unsigned i = 0; i < m_arg_flow.length (); i++) > + { > + fprintf (f, " Parameter %u:\n", i); > + isra_param_flow *ipf = &m_arg_flow[i]; > + > + if (ipf->length) > + { > + bool first = true; > + fprintf (f, " Scalar param sources: "); > + for (int j = 0; j < ipf->length; j++) > + { > + if (!first) > + fprintf (f, ", "); > + else > + first = false; > + fprintf (f, "%i", (int) ipf->inputs[j]); > + } > + fprintf (f, "\n"); > + } > + if (ipf->aggregate_pass_through) > + fprintf (f, " Aggregate pass through from the param given above, " > + "unit offset: %u , unit size: %u\n", > + ipf->unit_offset, ipf->unit_size); > + if (ipf->pointer_pass_through) > + fprintf (f, " Pointer pass through from the param given above, " > + "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses); > + } > +} > + > +/* Duplicate edge summare when an edge is cloned. */ > + > +void > +ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *, > + isra_call_summary *old_sum, > + isra_call_summary *new_sum) > +{ > + unsigned arg_count = old_sum->m_arg_flow.length (); > + new_sum->init_inputs (arg_count); > + for (unsigned i = 0; i < arg_count; i++) > + new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i]; > + > + new_sum->m_return_ignored = old_sum->m_return_ignored; > + new_sum->m_return_returned = old_sum->m_return_returned; > + new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg; > +} > + > + > +/* With all GTY stuff done, we can move to anonymous namespace. */ > +namespace { > + > +/* Return false the function is apparently unsuitable for IPA-SRA based on > it's > + attributes, return true otherwise. NODE is the cgraph node of the current > + function. */ > + > +static bool > +ipa_sra_preliminary_function_checks (cgraph_node *node) > +{ > + if (!node->local.can_change_signature) > + { > + if (dump_file) > + fprintf (dump_file, "Function cannot change signature.\n"); > + return false; > + } > + > + if (!tree_versionable_function_p (node->decl)) > + { > + if (dump_file) > + fprintf (dump_file, "Function is not versionable.\n"); > + return false; > + } > + > + if (!opt_for_fn (node->decl, optimize) > + || !opt_for_fn (node->decl, flag_ipa_sra)) > + { > + if (dump_file) > + fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this " > + "function.\n"); > + return false; > + } > + > + if (DECL_VIRTUAL_P (node->decl)) > + { > + if (dump_file) > + fprintf (dump_file, "Function is a virtual method.\n"); > + return false; > + } > + > + struct function *fun = DECL_STRUCT_FUNCTION (node->decl); > + if (fun->stdarg) > + { > + if (dump_file) > + fprintf (dump_file, "Function uses stdarg. \n"); > + return false; > + } > + > + if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))) > + { > + if (dump_file) > + fprintf (dump_file, "Function type has attributes. \n"); > + return false; > + } > + > + if (DECL_DISREGARD_INLINE_LIMITS (node->decl)) > + { > + if (dump_file) > + fprintf (dump_file, "Always inline function will be inlined " > + "anyway. \n"); > + return false; > + } > + > + return true; > +} > + > +/* Quick mapping from a decl to its param descriptor. */ > + > +static hash_map<tree, gensum_param_desc *> *decl2desc; > + > +/* Countdown of allowe Alias analysis steps during summary building. */ > + > +static int aa_walking_limit; > + > +/* This is a table in which for each basic block and parameter there is a > + distance (offset + size) in that parameter which is dereferenced and > + accessed in that BB. */ > +static HOST_WIDE_INT *bb_dereferences = NULL; > +/* How many by-reference parameters there are in the current function. */ > +static int by_ref_count; > + > +/* Bitmap of BBs that can cause the function to "stop" progressing by > + returning, throwing externally, looping infinitely or calling a function > + which might abort etc.. */ > +static bitmap final_bbs; > + > +/* Obstack to allocate various small structures required only when generating > + summary for a function. */ > +static struct obstack gensum_obstack; Perhaps place the static vars at the beginig of the namespace. I think "static" should not be used when declaring vars in a namespace. > +/* Scan body function described by NODE and FUN and create access trees for > + parameters. */ > + > +static void > +scan_function (cgraph_node *node, struct function *fun) > +{ > + basic_block bb; > + > + FOR_EACH_BB_FN (bb, fun) > + { > + gimple_stmt_iterator gsi; > + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > + { > + gimple *stmt = gsi_stmt (gsi); > + > + if (stmt_can_throw_external (fun, stmt)) > + bitmap_set_bit (final_bbs, bb->index); > + switch (gimple_code (stmt)) > + { > + case GIMPLE_RETURN: > + { > + tree t = gimple_return_retval (as_a <greturn *> (stmt)); > + if (t != NULL_TREE) > + scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb); > + bitmap_set_bit (final_bbs, bb->index); > + } > + break; > + > + case GIMPLE_ASSIGN: > + if (gimple_assign_single_p (stmt) > + && !gimple_clobber_p (stmt)) > + { > + tree rhs = gimple_assign_rhs1 (stmt); > + scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb); > + tree lhs = gimple_assign_lhs (stmt); > + scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb); > + } > + break; > + > + case GIMPLE_CALL: > + { > + unsigned argument_count = gimple_call_num_args (stmt); > + scan_call_info call_info; > + call_info.cs = node->get_edge (stmt); > + call_info.argument_count = argument_count; > + > + for (unsigned i = 0; i < argument_count; i++) > + { > + call_info.arg_idx = i; > + scan_expr_access (gimple_call_arg (stmt, i), stmt, > + ISRA_CTX_ARG, bb, &call_info); > + } > + > + tree lhs = gimple_call_lhs (stmt); > + if (lhs) > + scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb); > + int flags = gimple_call_flags (stmt); > + if ((flags & (ECF_CONST | ECF_PURE)) == 0) > + bitmap_set_bit (final_bbs, bb->index); > + } > + break; > + > + case GIMPLE_ASM: > + { > + gasm *asm_stmt = as_a <gasm *> (stmt); > + walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL, > + asm_visit_addr); > + bitmap_set_bit (final_bbs, bb->index); > + > + for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++) > + { > + tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i)); > + scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb); > + } > + for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++) > + { > + tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i)); > + scan_expr_access (t, stmt, ISRA_CTX_STORE, bb); > + } > + } Can't this be done by walk_stmt_load_store_addr_ops? > +/* Write intraproceural analysis information about NODE and all of its > outgoing > + edges into a stream for LTO WPA. */ > + > +static void > +isra_write_node_summary (output_block *ob, cgraph_node *node) > +{ > + isra_func_summary *ifs = func_sums->get (node); > + lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder; > + int node_ref = lto_symtab_encoder_encode (encoder, node); > + streamer_write_uhwi (ob, node_ref); > + > + unsigned param_desc_count = vec_safe_length (ifs->m_parameters); > + streamer_write_uhwi (ob, param_desc_count); > + for (unsigned i = 0; i < param_desc_count; i++) > + { > + isra_param_desc *desc = &(*ifs->m_parameters)[i]; > + unsigned access_count = vec_safe_length (desc->accesses); > + streamer_write_uhwi (ob, access_count); > + for (unsigned j = 0; j < access_count; j++) > + { > + param_access *acc = (*desc->accesses)[j]; > + stream_write_tree (ob, acc->type, true); > + stream_write_tree (ob, acc->alias_ptr_type, true); I wonder how things will work when type contains VLA and thus is treamed in the local stream? I believe Richi already commented on the gimple analysis bits. Honza