> This patch actually adds the analysis bits of IPA-SRA - both the
> function summary generation and the interprocedural analysis and
> decision stage.  The transformation itself then happens in the call
> graph cloning infrastructure changes which are in the previous patch.
> Please see the cover letter of the whole patch-set for more
> information.
> 
> This is mostly only a rebase on the current trunk of the earlier
> submission, the only functional change is that the pass does not clone
> when all the work (unused parameter removal) has already been done by
> IPA-CP.
> 
> Martin
> 
> 2019-08-20  Martin Jambor  <mjam...@suse.cz>
> 
>         * Makefile.in (GTFILES): Added ipa-sra.c.
>         (OBJS): Added ipa-sra.o.
>         * dbgcnt.def: Added ipa_sra_params and ipa_sra_retvalues.
>         * doc/invoke.texi (ipa-sra-max-replacements): New.
>         * ipa-sra.c: New file.
>         * lto-section-in.c (lto_section_name): Added ipa-sra section.
>         * lto-streamer.h (lto_section_type): Likewise.
>         * params.def (PARAM_IPA_SRA_MAX_REPLACEMENTS): New.
>         * passes.def: Add new IPA-SRA.
>         * tree-pass.h (make_pass_ipa_sra): Declare.
OK
> +#define IPA_SRA_MAX_PARAM_FLOW_LEN 7
I would move this to the place ISRA_ARG_SIZE_LIMIT_BITS is defined so
they appaer at same place.  Perhaps C++ way would be to use constant
member variable?
> +
> +/* Structure to describe which formal parameters feed into a particular 
> actual
> +   arguments.  */
> +
> +struct isra_param_flow
> +{
> +  /* Number of elements in array inputs that contain valid data.  */
> +  char length;
> +  /* Indices of formal parameters that feed into the described actual 
> argument.
> +     If aggregate_pass_through or pointer_pass_through below are true, it 
> must
> +     contain exactly one element which is passed through from a formal
> +     parameter if the given number.  Otherwise, the array contains indices of
> +     collee's formal parameters which are used to calculate value of this
> +     actual argument. */
> +  unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
> +
> +  /* Offset within the formal parameter.  */
> +  unsigned unit_offset;
> +  /* Size of the portion of the formal parameter that is being passed.  */
> +  unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;

> +
> +  /* True when the value of this actual copy is a portion of a formal
> +     parameter.  */
> +  unsigned aggregate_pass_through : 1;
> +  /* True when the value of this actual copy is a verbatim pass through of an
> +     obtained pointer.  */
> +  unsigned pointer_pass_through : 1;
> +  /* True when it is safe to copy access candidates here from the callee, 
> which
> +     would mean introducing dereferences into callers of the caller.  */
> +  unsigned safe_to_import_accesses : 1;
> +};
> +
> +/* Strucutre used to convey information about calls from the intra-procedurwl
> +   analysis stage to inter-procedural one.  */
> +
> +class isra_call_summary
> +{
> +public:
> +  isra_call_summary ()
> +    : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
> +      m_bit_aligned_arg (false)
> +  {}
> +
> +  void init_inputs (unsigned arg_count);
> +  void dump (FILE *f);
> +
> +  /* Information about what formal parameters of the caller are used to 
> compute
> +     indivisual actual arguments of this call.  */
> +  auto_vec <isra_param_flow> m_arg_flow;
> +
> +  /* Set to true if the call statement does not have a LHS.  */
> +  unsigned m_return_ignored : 1;
> +
> +  /* Set to true if the LHS of call statement is only used to construct the
> +     return value of the caller.  */
> +  unsigned m_return_returned : 1;
> +
> +  /* Set when any of the call arguments are not byte-aligned.  */
> +  unsigned m_bit_aligned_arg : 1;
> +};
> +
> +/* Class to manage function summaries.  */
> +
> +class GTY((user)) ipa_sra_function_summaries
> +  : public function_summary <isra_func_summary *>
> +{
> +public:
> +  ipa_sra_function_summaries (symbol_table *table, bool ggc):
> +    function_summary<isra_func_summary *> (table, ggc) { }
> +
> +  virtual void duplicate (cgraph_node *, cgraph_node *,
> +                       isra_func_summary *old_sum,
> +                       isra_func_summary *new_sum);
> +};
> +
> +/* Hook that is called by summary when a node is duplicated.  */
> +
> +void
> +ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
> +                                    isra_func_summary *old_sum,
> +                                    isra_func_summary *new_sum)
> +{
> +  /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
> +     useless.  */
> +  new_sum->m_candidate  = old_sum->m_candidate;
> +  new_sum->m_returns_value = old_sum->m_returns_value;
> +  new_sum->m_return_ignored = old_sum->m_return_ignored;
> +  gcc_assert (!old_sum->m_queued);
> +  new_sum->m_queued = false;
> +
> +  unsigned param_count = vec_safe_length (old_sum->m_parameters);
> +  if (!param_count)
> +    return;
> +  vec_safe_reserve_exact (new_sum->m_parameters, param_count);
> +  new_sum->m_parameters->quick_grow_cleared (param_count);
> +  for (unsigned i = 0; i < param_count; i++)
> +    {
> +      isra_param_desc *s = &(*old_sum->m_parameters)[i];
> +      isra_param_desc *d = &(*new_sum->m_parameters)[i];
> +
> +      d->param_size_limit = s->param_size_limit;
> +      d->size_reached = s->size_reached;
> +      d->locally_unused = s->locally_unused;
> +      d->split_candidate = s->split_candidate;
> +      d->by_ref = s->by_ref;
> +
> +      unsigned acc_count = vec_safe_length (s->accesses);
> +      vec_safe_reserve_exact (d->accesses, acc_count);
> +      for (unsigned j = 0; j < acc_count; j++)
> +     {
> +       param_access *from = (*s->accesses)[j];
> +       param_access *to = ggc_cleared_alloc<param_access> ();
> +       to->type = from->type;
> +       to->alias_ptr_type = from->alias_ptr_type;
> +       to->unit_offset = from->unit_offset;
> +       to->unit_size = from->unit_size;
> +       to->certain = from->certain;
> +       d->accesses->quick_push (to);
> +     }
> +    }
> +}
> +
> +/* Pointer to the pass function summary holder.  */
> +
> +static GTY(()) ipa_sra_function_summaries *func_sums;
> +
> +/* Class to manage call summaries.  */
> +
> +class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
> +{
> +public:
> +  ipa_sra_call_summaries (symbol_table *table):
> +    call_summary<isra_call_summary *> (table) { }
> +
> +  /* Duplicate info when an edge is cloned.  */
> +  virtual void duplicate (cgraph_edge *, cgraph_edge *,
> +                       isra_call_summary *old_sum,
> +                       isra_call_summary *new_sum);
> +};
> +
> +static ipa_sra_call_summaries *call_sums;
> +
> +
> +/* Initialize m_arg_flow of a particular instance of isra_call_summary.
> +   ARG_COUNT is the number of actual arguments passed.  */
> +
> +void
> +isra_call_summary::init_inputs (unsigned arg_count)
> +{
> +  if (arg_count == 0)
> +    {
> +      gcc_checking_assert (m_arg_flow.length () == 0);
> +      return;
> +    }
> +  if (m_arg_flow.length () == 0)
> +    {
> +      m_arg_flow.reserve_exact (arg_count);
> +      m_arg_flow.quick_grow_cleared (arg_count);
> +    }
> +  else
> +    gcc_checking_assert (arg_count == m_arg_flow.length ());
> +}
> +
> +/* Dump all information in call summary to F.  */
> +
> +void
> +isra_call_summary::dump (FILE *f)
> +{
> +  if (m_return_ignored)
> +    fprintf (f, "    return value ignored\n");
> +  if (m_return_returned)
> +    fprintf (f, "    return value used only to compute caller return 
> value\n");
> +  for (unsigned i = 0; i < m_arg_flow.length (); i++)
> +    {
> +      fprintf (f, "    Parameter %u:\n", i);
> +      isra_param_flow *ipf = &m_arg_flow[i];
> +
> +      if (ipf->length)
> +     {
> +       bool first = true;
> +       fprintf (f, "      Scalar param sources: ");
> +       for (int j = 0; j < ipf->length; j++)
> +         {
> +           if (!first)
> +             fprintf (f, ", ");
> +           else
> +             first = false;
> +           fprintf (f, "%i", (int) ipf->inputs[j]);
> +         }
> +       fprintf (f, "\n");
> +     }
> +      if (ipf->aggregate_pass_through)
> +     fprintf (f, "      Aggregate pass through from the param given above, "
> +              "unit offset: %u , unit size: %u\n",
> +              ipf->unit_offset, ipf->unit_size);
> +      if (ipf->pointer_pass_through)
> +     fprintf (f, "      Pointer pass through from the param given above, "
> +              "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
> +    }
> +}
> +
> +/* Duplicate edge summare when an edge is cloned.  */
> +
> +void
> +ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
> +                                isra_call_summary *old_sum,
> +                                isra_call_summary *new_sum)
> +{
> +  unsigned arg_count = old_sum->m_arg_flow.length ();
> +  new_sum->init_inputs (arg_count);
> +  for (unsigned i = 0; i < arg_count; i++)
> +    new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
> +
> +  new_sum->m_return_ignored = old_sum->m_return_ignored;
> +  new_sum->m_return_returned = old_sum->m_return_returned;
> +  new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
> +}
> +
> +
> +/* With all GTY stuff done, we can move to anonymous namespace.  */
> +namespace {
> +
> +/* Return false the function is apparently unsuitable for IPA-SRA based on 
> it's
> +   attributes, return true otherwise.  NODE is the cgraph node of the current
> +   function.  */
> +
> +static bool
> +ipa_sra_preliminary_function_checks (cgraph_node *node)
> +{
> +  if (!node->local.can_change_signature)
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, "Function cannot change signature.\n");
> +      return false;
> +    }
> +
> +  if (!tree_versionable_function_p (node->decl))
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, "Function is not versionable.\n");
> +      return false;
> +    }
> +
> +  if (!opt_for_fn (node->decl, optimize)
> +      || !opt_for_fn (node->decl, flag_ipa_sra))
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
> +              "function.\n");
> +      return false;
> +    }
> +
> +  if (DECL_VIRTUAL_P (node->decl))
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, "Function is a virtual method.\n");
> +      return false;
> +    }
> +
> +  struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
> +  if (fun->stdarg)
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, "Function uses stdarg. \n");
> +      return false;
> +    }
> +
> +  if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, "Function type has attributes. \n");
> +      return false;
> +    }
> +
> +  if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, "Always inline function will be inlined "
> +              "anyway. \n");
> +      return false;
> +    }
> +
> +  return true;
> +}
> +
> +/* Quick mapping from a decl to its param descriptor.  */
> +
> +static hash_map<tree, gensum_param_desc *> *decl2desc;
> +
> +/* Countdown of allowe Alias analysis steps during summary building.  */
> +
> +static int aa_walking_limit;
> +
> +/* This is a table in which for each basic block and parameter there is a
> +   distance (offset + size) in that parameter which is dereferenced and
> +   accessed in that BB.  */
> +static HOST_WIDE_INT *bb_dereferences = NULL;
> +/* How many by-reference parameters there are in the current function.  */
> +static int by_ref_count;
> +
> +/* Bitmap of BBs that can cause the function to "stop" progressing by
> +   returning, throwing externally, looping infinitely or calling a function
> +   which might abort etc.. */
> +static bitmap final_bbs;
> +
> +/* Obstack to allocate various small structures required only when generating
> +   summary for a function.  */
> +static struct obstack gensum_obstack;

Perhaps place the static vars at the beginig of the namespace.
I think "static" should not be used when declaring vars in a namespace.
> +/* Scan body function described by NODE and FUN and create access trees for
> +   parameters.  */
> +
> +static void
> +scan_function (cgraph_node *node, struct function *fun)
> +{
> +  basic_block bb;
> +
> +  FOR_EACH_BB_FN (bb, fun)
> +    {
> +      gimple_stmt_iterator gsi;
> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +     {
> +       gimple *stmt = gsi_stmt (gsi);
> +
> +       if (stmt_can_throw_external (fun, stmt))
> +         bitmap_set_bit (final_bbs, bb->index);
> +       switch (gimple_code (stmt))
> +         {
> +         case GIMPLE_RETURN:
> +           {
> +             tree t = gimple_return_retval (as_a <greturn *> (stmt));
> +             if (t != NULL_TREE)
> +               scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
> +             bitmap_set_bit (final_bbs, bb->index);
> +           }
> +           break;
> +
> +         case GIMPLE_ASSIGN:
> +           if (gimple_assign_single_p (stmt)
> +               && !gimple_clobber_p (stmt))
> +             {
> +               tree rhs = gimple_assign_rhs1 (stmt);
> +               scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
> +               tree lhs = gimple_assign_lhs (stmt);
> +               scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
> +             }
> +           break;
> +
> +         case GIMPLE_CALL:
> +           {
> +             unsigned argument_count = gimple_call_num_args (stmt);
> +             scan_call_info call_info;
> +             call_info.cs = node->get_edge (stmt);
> +             call_info.argument_count = argument_count;
> +
> +             for (unsigned i = 0; i < argument_count; i++)
> +               {
> +                 call_info.arg_idx = i;
> +                 scan_expr_access (gimple_call_arg (stmt, i), stmt,
> +                                   ISRA_CTX_ARG, bb, &call_info);
> +               }
> +
> +             tree lhs = gimple_call_lhs (stmt);
> +             if (lhs)
> +               scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
> +             int flags = gimple_call_flags (stmt);
> +             if ((flags & (ECF_CONST | ECF_PURE)) == 0)
> +               bitmap_set_bit (final_bbs, bb->index);
> +           }
> +           break;
> +
> +         case GIMPLE_ASM:
> +           {
> +             gasm *asm_stmt = as_a <gasm *> (stmt);
> +             walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
> +                                            asm_visit_addr);
> +             bitmap_set_bit (final_bbs, bb->index);
> +
> +             for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
> +               {
> +                 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
> +                 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
> +               }
> +             for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
> +               {
> +                 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
> +                 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
> +               }
> +           }

Can't this be done by walk_stmt_load_store_addr_ops?
> +/* Write intraproceural analysis information about NODE and all of its 
> outgoing
> +   edges into a stream for LTO WPA.  */
> +
> +static void
> +isra_write_node_summary (output_block *ob, cgraph_node *node)
> +{
> +  isra_func_summary *ifs = func_sums->get (node);
> +  lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
> +  int node_ref = lto_symtab_encoder_encode (encoder, node);
> +  streamer_write_uhwi (ob, node_ref);
> +
> +  unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
> +  streamer_write_uhwi (ob, param_desc_count);
> +  for (unsigned i = 0; i < param_desc_count; i++)
> +    {
> +      isra_param_desc *desc = &(*ifs->m_parameters)[i];
> +      unsigned access_count = vec_safe_length (desc->accesses);
> +      streamer_write_uhwi (ob, access_count);
> +      for (unsigned j = 0; j < access_count; j++)
> +     {
> +       param_access *acc = (*desc->accesses)[j];
> +       stream_write_tree (ob, acc->type, true);
> +       stream_write_tree (ob, acc->alias_ptr_type, true);

I wonder how things will work when type contains VLA and thus is treamed
in the local stream?

I believe Richi already commented on the gimple analysis bits.

Honza

Reply via email to