Hi,
this patch extends ipa-fnsummary to anticipate statements that will be removed
by SRA.  This is done by looking for calls passing addresses of automatic
variables.  In function body we look for dereferences from pointers of such
variables and mark them with new not_sra_candidate condition.

This is just first step which is overly optimistic.  We do not try to prove that
given automatic variable will not be SRAed even after inlining.  We now also
optimistically assume that the transformation will always happen.  I will 
restrict
this in a followup patch, but I think it is useful to gether some data on how
much code is affected by this.

This is motivated by PR109849 where we fail to fully inline push_back.
The patch alone does not solve the problem even for -O3, but improves
analysis in this case.

Bootstrapped/regtested x86_64-linux, commited.

gcc/ChangeLog:

        PR tree-optimization/109849
        * ipa-fnsummary.cc (evaluate_conditions_for_known_args): Add new 
parameter
        ES; handle ipa_predicate::not_sra_candidate.
        (evaluate_properties_for_edge): Pass es to
        evaluate_conditions_for_known_args.
        (ipa_fn_summary_t::duplicate): Handle sra candidates.
        (dump_ipa_call_summary): Dump points_to_possible_sra_candidate.
        (load_or_store_of_ptr_parameter): New function.
        (points_to_possible_sra_candidate_p): New function.
        (analyze_function_body): Initialize points_to_possible_sra_candidate;
        determine sra predicates.
        (estimate_ipcp_clone_size_and_time): Update call of
        evaluate_conditions_for_known_args.
        (remap_edge_params): Update points_to_possible_sra_candidate.
        (read_ipa_call_summary): Stream points_to_possible_sra_candidate
        (write_ipa_call_summary): Likewise.
        * ipa-predicate.cc (ipa_predicate::add_clause): Handle 
not_sra_candidate.
        (dump_condition): Dump it.
        * ipa-predicate.h (struct inline_param_summary): Add
        points_to_possible_sra_candidate.

gcc/testsuite/ChangeLog:

        PR tree-optimization/109849
        * g++.dg/ipa/devirt-45.C: Update template.

diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc
index a301396fb3f..a5f5a50c8a5 100644
--- a/gcc/ipa-fnsummary.cc
+++ b/gcc/ipa-fnsummary.cc
@@ -371,7 +371,8 @@ evaluate_conditions_for_known_args (struct cgraph_node 
*node,
                                    bool inline_p,
                                    ipa_auto_call_arg_values *avals,
                                    clause_t *ret_clause,
-                                   clause_t *ret_nonspec_clause)
+                                   clause_t *ret_nonspec_clause,
+                                   ipa_call_summary *es)
 {
   clause_t clause = inline_p ? 0 : 1 << ipa_predicate::not_inlined_condition;
   clause_t nonspec_clause = 1 << ipa_predicate::not_inlined_condition;
@@ -386,6 +387,17 @@ evaluate_conditions_for_known_args (struct cgraph_node 
*node,
       int j;
       struct expr_eval_op *op;
 
+      if (c->code == ipa_predicate::not_sra_candidate)
+       {
+         if (!inline_p
+             || !es
+             || (int)es->param.length () <= c->operand_num
+             || !es->param[c->operand_num].points_to_possible_sra_candidate)
+           clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
+         nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
+         continue;
+       }
+
       if (c->agg_contents)
        {
          if (c->code == ipa_predicate::changed
@@ -592,6 +604,7 @@ evaluate_properties_for_edge (struct cgraph_edge *e, bool 
inline_p,
   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
   class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
   class ipa_edge_args *args;
+  class ipa_call_summary *es = NULL;
 
   if (clause_ptr)
     *clause_ptr = inline_p ? 0 : 1 << ipa_predicate::not_inlined_condition;
@@ -603,8 +616,8 @@ evaluate_properties_for_edge (struct cgraph_edge *e, bool 
inline_p,
     {
       struct cgraph_node *caller;
       class ipa_node_params *caller_parms_info, *callee_pi = NULL;
-      class ipa_call_summary *es = ipa_call_summaries->get (e);
       int i, count = ipa_get_cs_argument_count (args);
+      es = ipa_call_summaries->get (e);
 
       if (count)
        {
@@ -720,7 +733,7 @@ evaluate_properties_for_edge (struct cgraph_edge *e, bool 
inline_p,
     }
 
   evaluate_conditions_for_known_args (callee, inline_p, avals, clause_ptr,
-                                     nonspec_clause_ptr);
+                                     nonspec_clause_ptr, es);
 }
 
 
@@ -847,6 +860,7 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
                                          &possible_truths,
                                          /* We are going to specialize,
                                             so ignore nonspec truths.  */
+                                         NULL,
                                          NULL);
 
       info->account_size_time (0, 0, true_pred, true_pred);
@@ -1035,6 +1049,9 @@ dump_ipa_call_summary (FILE *f, int indent, struct 
cgraph_node *node,
            if (es->param[i].points_to_local_or_readonly_memory)
              fprintf (f, "%*s op%i points to local or readonly memory\n",
                       indent + 2, "", i);
+           if (es->param[i].points_to_possible_sra_candidate)
+             fprintf (f, "%*s op%i points to possible sra candidate\n",
+                      indent + 2, "", i);
          }
       if (!edge->inline_failed)
        {
@@ -1291,6 +1308,35 @@ unmodified_parm_or_parm_agg_item (struct 
ipa_func_body_info *fbi,
                                 size_p, &aggpos->by_ref);
 }
 
+/* If stmt is simple load or store of value pointed to by a function parmaeter,
+   return its index.  */
+
+static int
+load_or_store_of_ptr_parameter (ipa_func_body_info *fbi, gimple *stmt)
+{
+  if (!optimize)
+    return -1;
+  gassign *assign = dyn_cast <gassign *> (stmt);
+  if (!assign)
+    return -1;
+  tree param;
+  if (gimple_assign_load_p (stmt))
+    param = gimple_assign_rhs1 (stmt);
+  else if (gimple_store_p (stmt))
+    param = gimple_assign_lhs (stmt);
+  else
+    return -1;
+  tree base = get_base_address (param);
+  if (TREE_CODE (base) != MEM_REF
+      || TREE_CODE (TREE_OPERAND (base, 0)) != SSA_NAME
+      || !SSA_NAME_IS_DEFAULT_DEF (TREE_OPERAND (base, 0)))
+    return -1;
+  tree p = SSA_NAME_VAR (TREE_OPERAND (base, 0));
+  if (TREE_CODE (p) != PARM_DECL)
+    return -1;
+  return ipa_get_param_decl_index (fbi->info, p);
+}
+
 /* See if statement might disappear after inlining.
    0 - means not eliminated
    1 - half of statements goes away
@@ -2585,6 +2631,23 @@ points_to_local_or_readonly_memory_p (tree t)
   return false;
 }
 
+/* Return true if T is a pointer pointing to memory location that is possible
+   sra candidate if all functions it is passed to are inlined.  */
+
+static bool
+points_to_possible_sra_candidate_p (tree t)
+{
+  if (TREE_CODE (t) != ADDR_EXPR)
+    return false;
+
+  t = get_base_address (TREE_OPERAND (t, 0));
+
+  /* Automatic variables are fine.  */
+  if (DECL_P (t)
+      && auto_var_in_fn_p (t, current_function_decl))
+    return true;
+  return false;
+}
 
 /* Analyze function body for NODE.
    EARLY indicates run from early optimization pipeline.  */
@@ -2797,6 +2860,9 @@ analyze_function_body (struct cgraph_node *node, bool 
early)
                      es->param[i].points_to_local_or_readonly_memory
                         = points_to_local_or_readonly_memory_p
                             (gimple_call_arg (stmt, i));
+                     es->param[i].points_to_possible_sra_candidate
+                        = points_to_possible_sra_candidate_p
+                            (gimple_call_arg (stmt, i));
                    }
                }
              /* We cannot setup VLA parameters during inlining.  */
@@ -2856,6 +2921,12 @@ analyze_function_body (struct cgraph_node *node, bool 
early)
                fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
 
              ipa_predicate p = bb_predicate & will_be_nonconstant;
+             int parm = load_or_store_of_ptr_parameter (&fbi, stmt);
+             ipa_predicate sra_predicate = true;
+             if (parm != -1)
+               sra_predicate &= add_condition (info, params_summary, parm,
+                                               ptr_type_node, NULL,
+                                               
ipa_predicate::not_sra_candidate, NULL, 0);
 
              /* We can ignore statement when we proved it is never going
                 to happen, but we cannot do that for call statements
@@ -2875,7 +2946,7 @@ analyze_function_body (struct cgraph_node *node, bool 
early)
                  if (prob)
                    {
                      ipa_predicate ip
-                       = bb_predicate & ipa_predicate::not_inlined ();
+                       = bb_predicate & ipa_predicate::not_inlined () & 
sra_predicate;
                      info->account_size_time (this_size * prob,
                                               (final_time * prob) / 2, ip,
                                               p);
@@ -2883,7 +2954,7 @@ analyze_function_body (struct cgraph_node *node, bool 
early)
                  if (prob != 2)
                    info->account_size_time (this_size * (2 - prob),
                                             (final_time * (2 - prob) / 2),
-                                            bb_predicate,
+                                            bb_predicate & sra_predicate,
                                             p);
                }
 
@@ -3930,7 +4001,7 @@ estimate_ipcp_clone_size_and_time (struct cgraph_node 
*node,
   clause_t clause, nonspec_clause;
 
   evaluate_conditions_for_known_args (node, false, avals, &clause,
-                                     &nonspec_clause);
+                                     &nonspec_clause, NULL);
   ipa_call_context ctx (node, clause, nonspec_clause, vNULL, avals);
   ctx.estimate_size_and_time (estimates);
 }
@@ -4024,6 +4095,9 @@ remap_edge_params (struct cgraph_edge *inlined_edge,
                  if (inlined_es
                        ->param[id].points_to_local_or_readonly_memory)
                    es->param[i].points_to_local_or_readonly_memory = true;
+                 if (inlined_es
+                       ->param[id].points_to_possible_sra_candidate)
+                   es->param[i].points_to_possible_sra_candidate = true;
                }
              if (!es->param[i].points_to_local_or_readonly_memory
                  && jfunc->type == IPA_JF_CONST
@@ -4420,8 +4494,11 @@ read_ipa_call_summary (class lto_input_block *ib, struct 
cgraph_edge *e,
       for (i = 0; i < length; i++)
        {
          es->param[i].change_prob = streamer_read_uhwi (ib);
+         bitpack_d bp = streamer_read_bitpack (ib);
          es->param[i].points_to_local_or_readonly_memory
-           = streamer_read_uhwi (ib);
+           = bp_unpack_value (&bp, 1); 
+         es->param[i].points_to_possible_sra_candidate
+           = bp_unpack_value (&bp, 1); 
        }
     }
   else
@@ -4692,7 +4769,10 @@ write_ipa_call_summary (struct output_block *ob, struct 
cgraph_edge *e)
   for (i = 0; i < (int) es->param.length (); i++)
     {
       streamer_write_uhwi (ob, es->param[i].change_prob);
-      streamer_write_uhwi (ob, 
es->param[i].points_to_local_or_readonly_memory);
+      bp = bitpack_create (ob->main_stream);
+      bp_pack_value (&bp, es->param[i].points_to_local_or_readonly_memory, 1);
+      bp_pack_value (&bp, es->param[i].points_to_possible_sra_candidate, 1);
+      streamer_write_bitpack (&bp);
     }
 }
 
diff --git a/gcc/ipa-predicate.cc b/gcc/ipa-predicate.cc
index 29478692b17..09a253abc93 100644
--- a/gcc/ipa-predicate.cc
+++ b/gcc/ipa-predicate.cc
@@ -132,7 +132,7 @@ ipa_predicate::add_clause (conditions conditions, clause_t 
new_clause)
        cc1 = &(*conditions)[c1 - ipa_predicate::first_dynamic_condition];
        /* We have no way to represent !changed and !is_not_constant
           and thus there is no point for looking for them.  */
-       if (cc1->code == changed || cc1->code == is_not_constant)
+       if (cc1->code == changed || cc1->code == is_not_constant || cc1->code 
== not_sra_candidate)
          continue;
        for (c2 = c1 + 1; c2 < num_conditions; c2++)
          if (new_clause & (1 << c2))
@@ -142,6 +142,7 @@ ipa_predicate::add_clause (conditions conditions, clause_t 
new_clause)
              if (cc1->operand_num == cc2->operand_num
                  && vrp_operand_equal_p (cc1->val, cc2->val)
                  && cc2->code != is_not_constant
+                 && cc2->code != not_sra_candidate
                  && cc2->code != changed
                  && expr_eval_ops_equal_p (cc1->param_ops, cc2->param_ops)
                  && cc2->agg_contents == cc1->agg_contents
@@ -416,6 +417,11 @@ dump_condition (FILE *f, conditions conditions, int cond)
          fprintf (f, " changed");
          return;
        }
+      if (c->code == ipa_predicate::not_sra_candidate)
+       {
+         fprintf (f, " not sra candidate");
+         return;
+       }
       fprintf (f, " %s ", op_symbol_code (c->code));
       print_generic_expr (f, c->val);
     }
diff --git a/gcc/ipa-predicate.h b/gcc/ipa-predicate.h
index 2882bf8e6f0..5abf253d096 100644
--- a/gcc/ipa-predicate.h
+++ b/gcc/ipa-predicate.h
@@ -78,16 +78,20 @@ struct inline_param_summary
      Value 0 is reserved for compile time invariants. */
   short change_prob;
   unsigned points_to_local_or_readonly_memory : 1;
+  unsigned points_to_possible_sra_candidate : 1;
   bool equal_to (const inline_param_summary &other) const
   {
     return change_prob == other.change_prob
           && points_to_local_or_readonly_memory
-             == other.points_to_local_or_readonly_memory;
+             == other.points_to_local_or_readonly_memory
+          && points_to_possible_sra_candidate
+             == other.points_to_possible_sra_candidate;
   }
   bool useless_p (void) const
   {
     return change_prob == REG_BR_PROB_BASE
-          && !points_to_local_or_readonly_memory;
+          && !points_to_local_or_readonly_memory
+          && !points_to_possible_sra_candidate;
   }
 };
 
@@ -135,6 +139,9 @@ public:
      percentage of executions even when they are not compile time constants.  
*/
   static const tree_code changed = IDENTIFIER_NODE;
 
+  /* Special condition code we use to check that the parameter is likely SRA
+     candidate.  */
+  static const tree_code not_sra_candidate = TREE_LIST;
 
 
   /* Initialize predicate either to true of false depending on P.  */
diff --git a/gcc/testsuite/g++.dg/ipa/devirt-45.C 
b/gcc/testsuite/g++.dg/ipa/devirt-45.C
index ce415e7c003..c26be21964c 100644
--- a/gcc/testsuite/g++.dg/ipa/devirt-45.C
+++ b/gcc/testsuite/g++.dg/ipa/devirt-45.C
@@ -37,5 +37,5 @@ int main()
 }
 
 /* One invocation is A::foo () other is B::foo () even though the type is 
destroyed and rebuilt in test() */
-/* { dg-final { scan-ipa-dump-times "Discovered a virtual call to a known 
target\[^\\n\]*A::foo" 1 "inline"  } } */
+/* { dg-final { scan-ipa-dump-times "Discovered a virtual call to a known 
target\[^\\n\]*A::foo" 2 "inline"  } } */
 /* { dg-final { scan-ipa-dump-times "Discovered a virtual call to a known 
target\[^\\n\]*B::foo" 1 "inline"  } } */

Reply via email to