Loop iterations inline hint

Jan Hubicka Mon, 20 Aug 2012 23:55:30 -0700

Hi,
this patch adds a hint that if inlining makes bounds on loop iterations known,
it is probably good idea.  This is primarely targetting Fortran's array
descriptors, but should be generally useful.


Fortran will still need a bit more work. Often we disregard inlining because we
think the call is cold (because it comes from Main) so inlining heuristic will
need more updating and apparently we will also need to update for PHI
conditionals as done in Martin's patch 3/3.

At the moment the hint is interpreted same way as the indirect_call hint from
previous patch.

Martin: I think ipa-cp should also make use of this hint. Resolving number of
loop iterations is important enough reason to specialize in many cases.
I think it already has logic for devirtualization but perhaps it should be made
more aggressive? I was sort of surprised that for Mozila the inlining hint
makes us to catch 20 times more cases than before. Most of the cases sounds like
good ipa-cp candidates.

Also can you please try to finaly make param notes to be used by the virtual
clones machinery and thus make it possible for ipa-cp to specialize for known
aggregate parameters? This should make a lot of difference for Fortran, I think.

Boostrapped/regtested x86_64-linux, will commit it after a bit more testing.
Honza

        * gcc.dg/ipa/inlinehint-1.c: New.

        PR fortran/48636
        * ipa-inline.c (want_inline_small_function_p): Take loop_iterations 
hint.
        (edge_badness): Likewise.
        * ipa-inline.h (inline_hints_vals): Add INLINE_HINT_loop_iterations.
        (inline_summary): Add loop_iterations.
        * ipa-inline-analysis.c: Include tree-scalar-evolution.h.
        (dump_inline_hints): Dump loop_iterations.
        (reset_inline_summary): Free loop_iterations.
        (inline_node_duplication_hook): Update loop_iterations.
        (dump_inline_summary): Dump loop_iterations.
        (will_be_nonconstant_expr_predicate): New function.
        (estimate_function_body_sizes): Analyze loops.
        (estimate_node_size_and_time): Set hint loop_iterations.
        (inline_merge_summary): Merge loop iterations.
        (inline_read_section): Stream in loop_iterations.
        (inline_write_summary): Stram out loop_iterations.
Index: testsuite/gcc.dg/ipa/inlinehint-1.c
===================================================================
*** testsuite/gcc.dg/ipa/inlinehint-1.c (revision 0)
--- testsuite/gcc.dg/ipa/inlinehint-1.c (revision 0)
***************
*** 0 ****
--- 1,16 ----
+ /* { dg-options "-O3 -c -fdump-ipa-inline-details -fno-early-inlining 
-fno-ipa-cp"  } */
+ test (int a)
+ {
+    int i;
+    for (i=0; i<a; i++)
+ {
+      test2(a);
+      test2(a);
+ }
+ }
+ m()
+ {
+   test (10);
+ }
+ /* { dg-final { scan-ipa-dump "loop_iterations"  "inline"  } } */
+ /* { dg-final { cleanup-ipa-dump "inline" } } */
Index: ipa-inline.c
===================================================================
*** ipa-inline.c        (revision 190510)
--- ipa-inline.c        (working copy)
*************** want_inline_small_function_p (struct cgr
*** 480,486 ****
         hints suggests that inlining given function is very profitable.  */
        else if (DECL_DECLARED_INLINE_P (callee->symbol.decl)
               && growth >= MAX_INLINE_INSNS_SINGLE
!              && !(hints & INLINE_HINT_indirect_call))
        {
            e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
          want_inline = false;
--- 480,487 ----
         hints suggests that inlining given function is very profitable.  */
        else if (DECL_DECLARED_INLINE_P (callee->symbol.decl)
               && growth >= MAX_INLINE_INSNS_SINGLE
!              && !(hints & (INLINE_HINT_indirect_call
!                            | INLINE_HINT_loop_iterations)))
        {
            e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
          want_inline = false;
*************** edge_badness (struct cgraph_edge *edge, 
*** 863,869 ****
          if (dump)
            fprintf (dump_file, "Badness overflow\n");
        }
!       if (hints & INLINE_HINT_indirect_call)
        badness /= 8;
        if (dump)
        {
--- 864,871 ----
          if (dump)
            fprintf (dump_file, "Badness overflow\n");
        }
!       if (hints & (INLINE_HINT_indirect_call
!                  | INLINE_HINT_loop_iterations))
        badness /= 8;
        if (dump)
        {
Index: ipa-inline.h
===================================================================
*** ipa-inline.h        (revision 190510)
--- ipa-inline.h        (working copy)
*************** typedef struct GTY(()) condition
*** 45,51 ****
  /* Inline hints are reasons why inline heuristics should preffer inlining 
given function.
     They are represtented as bitmap of the following values.  */
  enum inline_hints_vals {
!   INLINE_HINT_indirect_call = 1
  };
  typedef int inline_hints;
  
--- 45,52 ----
  /* Inline hints are reasons why inline heuristics should preffer inlining 
given function.
     They are represtented as bitmap of the following values.  */
  enum inline_hints_vals {
!   INLINE_HINT_indirect_call = 1,
!   INLINE_HINT_loop_iterations = 2
  };
  typedef int inline_hints;
  
*************** struct GTY(()) inline_summary
*** 118,123 ****
--- 119,128 ----
       merged during inlining.  */
    conditions conds;
    VEC(size_time_entry,gc) *entry;
+ 
+   /* Predicate on when some loop in the function sbecomes to have known
+      bounds.   */
+   struct predicate * GTY((skip)) loop_iterations;
  };
  
  
Index: ipa-inline-analysis.c
===================================================================
*** ipa-inline-analysis.c       (revision 190510)
--- ipa-inline-analysis.c       (working copy)
*************** along with GCC; see the file COPYING3.  
*** 88,93 ****
--- 88,94 ----
  #include "ipa-inline.h"
  #include "alloc-pool.h"
  #include "cfgloop.h"
+ #include "tree-scalar-evolution.h"
  
  /* Estimate runtime of function can easilly run into huge numbers with many
     nested loops.  Be sure we can compute time * INLINE_SIZE_SCALE * 2 in an
*************** dump_inline_hints (FILE *f, inline_hints
*** 627,632 ****
--- 629,639 ----
        hints &= ~INLINE_HINT_indirect_call;
        fprintf (f, " indirect_call");
      }
+   if (hints & INLINE_HINT_loop_iterations)
+     {
+       hints &= ~INLINE_HINT_loop_iterations;
+       fprintf (f, " loop_iterations");
+     }
    gcc_assert (!hints);
  }
  
*************** reset_inline_summary (struct cgraph_node
*** 941,946 ****
--- 948,958 ----
    info->stack_frame_offset = 0;
    info->size = 0;
    info->time = 0;
+   if (info->loop_iterations)
+     {
+       pool_free (edge_predicate_pool, info->loop_iterations);
+       info->loop_iterations = NULL;
+     }
    VEC_free (condition, gc, info->conds);
    VEC_free (size_time_entry,gc, info->entry);
    for (e = node->callees; e; e = e->next_callee)
*************** inline_node_duplication_hook (struct cgr
*** 1078,1084 ****
                                     * edge->frequency);
              edge->frequency = 0;
            }
!         *es->predicate = new_predicate;
        }
  
        /* Remap indirect edge predicates with the same simplificaiton as 
above. 
--- 1090,1096 ----
                                     * edge->frequency);
              edge->frequency = 0;
            }
!         edge_set_predicate (edge, &new_predicate);
        }
  
        /* Remap indirect edge predicates with the same simplificaiton as 
above. 
*************** inline_node_duplication_hook (struct cgr
*** 1110,1116 ****
                                     * edge->frequency);
              edge->frequency = 0;
            }
!         *es->predicate = new_predicate;
        }
  
        /* If inliner or someone after inliner will ever start producing
--- 1122,1150 ----
                                     * edge->frequency);
              edge->frequency = 0;
            }
!         edge_set_predicate (edge, &new_predicate);
!       }
!       if (info->loop_iterations)
!       {
!         struct predicate new_predicate = true_predicate ();
! 
!         for (j = 0; info->loop_iterations->clause[j]; j++)
!           if (!(possible_truths & info->loop_iterations->clause[j]))
!             {
!               new_predicate = false_predicate ();
!               break;
!             }
!           else
!             add_clause (info->conds, &new_predicate,
!                         possible_truths & info->loop_iterations->clause[j]);
!         if (false_predicate_p (&new_predicate)
!             || true_predicate_p (&new_predicate))
!           info->loop_iterations = NULL;
!         else
!           {
!             info->loop_iterations = (struct predicate *)pool_alloc 
(edge_predicate_pool);
!             *info->loop_iterations = new_predicate;
!           }
        }
  
        /* If inliner or someone after inliner will ever start producing
*************** inline_node_duplication_hook (struct cgr
*** 1136,1142 ****
        info->self_time = 0;
      }
    else
!     info->entry = VEC_copy (size_time_entry, gc, info->entry);
  }
  
  
--- 1170,1184 ----
        info->self_time = 0;
      }
    else
!     {
!       info->entry = VEC_copy (size_time_entry, gc, info->entry);
!       if (info->loop_iterations)
!       {
!         predicate p = *info->loop_iterations;
!         info->loop_iterations = (struct predicate *)pool_alloc 
(edge_predicate_pool);
!         *info->loop_iterations = p;
!       }
!     }
  }
  
  
*************** dump_inline_summary (FILE * f, struct cg
*** 1308,1313 ****
--- 1350,1360 ----
                   (double) e->time / INLINE_TIME_SCALE);
          dump_predicate (f, s->conds, &e->predicate);
        }
+       if (s->loop_iterations)
+       {
+         fprintf (f, "  loop iterations:");
+         dump_predicate (f, s->conds, s->loop_iterations);
+       }
        fprintf (f, "  calls:\n");
        dump_inline_edge_summary (f, 4, node, s);
        fprintf (f, "\n");
*************** compute_bb_predicates (struct cgraph_nod
*** 1780,1785 ****
--- 1827,1872 ----
  typedef struct predicate predicate_t;
  DEF_VEC_O (predicate_t);
  DEF_VEC_ALLOC_O (predicate_t, heap);
+ /* Return predicate specifying when the STMT might have result that is not
+    a compile time constant.  */
+ 
+ static struct predicate
+ will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
+                                   struct inline_summary *summary,
+                                   tree expr,
+                                   VEC (predicate_t, heap) *nonconstant_names)
+ {
+   tree parm;
+   int index;
+ 
+   while (UNARY_CLASS_P (expr))
+     expr = TREE_OPERAND (expr, 0);
+ 
+   parm = unmodified_parm (NULL, expr);
+   if (parm
+       && (index = ipa_get_param_decl_index (info, parm)) >= 0)
+     return add_condition (summary, index, NULL, CHANGED, NULL_TREE);
+   if (is_gimple_min_invariant (expr))
+     return false_predicate ();
+   if (TREE_CODE (expr) == SSA_NAME)
+     return VEC_index (predicate_t, nonconstant_names,
+                       SSA_NAME_VERSION (expr));
+   if (BINARY_CLASS_P (expr))
+     {
+       struct predicate p1 =  will_be_nonconstant_expr_predicate (info, 
summary, TREE_OPERAND (expr, 0), nonconstant_names);
+       struct predicate p2;
+       if (true_predicate_p (&p1))
+       return p1;
+       p2 = will_be_nonconstant_expr_predicate (info, summary, TREE_OPERAND 
(expr, 0), nonconstant_names);
+       return or_predicates (summary->conds, &p1, &p2);
+     }
+   else
+     {
+       debug_tree (expr);
+       gcc_unreachable ();
+     }
+   return false_predicate ();
+ }
  
  
  /* Return predicate specifying when the STMT might have result that is not
*************** estimate_function_body_sizes (struct cgr
*** 2176,2181 ****
--- 2263,2313 ----
    time = (time + CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE;
    if (time > MAX_TIME)
      time = MAX_TIME;
+ 
+   if (!early && nonconstant_names)
+     {
+       struct loop *loop;
+       loop_iterator li;
+       predicate loop_iterations = true_predicate ();
+ 
+       calculate_dominance_info (CDI_DOMINATORS);
+       loop_optimizer_init (LOOPS_NORMAL
+                          | LOOPS_HAVE_RECORDED_EXITS);
+       if (dump_file && (dump_flags & TDF_DETAILS))
+       flow_loops_dump (dump_file, NULL, 0);
+       scev_initialize ();
+       FOR_EACH_LOOP (li, loop, 0)
+       {
+           VEC (edge, heap) *exits;
+           edge ex;
+         unsigned int j;
+         struct tree_niter_desc niter_desc;
+ 
+         exits = get_loop_exit_edges (loop);
+           FOR_EACH_VEC_ELT (edge, exits, j, ex)
+           if (number_of_iterations_exit (loop, ex, &niter_desc, false)
+               && !is_gimple_min_invariant (niter_desc.niter))
+             {
+               predicate will_be_nonconstant
+                = will_be_nonconstant_expr_predicate (parms_info, info,
+                                                      niter_desc.niter, 
nonconstant_names);
+               if (!true_predicate_p (&will_be_nonconstant)
+                   && !false_predicate_p (&will_be_nonconstant))
+                 /* This is slightly inprecise.  We may want to represent each 
loop with
+                    independent predicate.  */
+                 loop_iterations = and_predicates (info->conds, 
&loop_iterations, &will_be_nonconstant);
+             }
+           VEC_free (edge, heap, exits);
+       }
+       if (!true_predicate_p (&loop_iterations))
+       {
+           inline_summary (node)->loop_iterations = (struct predicate 
*)pool_alloc (edge_predicate_pool);
+           *inline_summary (node)->loop_iterations = loop_iterations;
+       }
+       scev_finalize ();
+       loop_optimizer_finalize ();
+       free_dominance_info (CDI_DOMINATORS);
+     }
    inline_summary (node)->self_time = time;
    inline_summary (node)->self_size = size;
    VEC_free (predicate_t, heap, nonconstant_names);
*************** estimate_node_size_and_time (struct cgra
*** 2459,2464 ****
--- 2591,2600 ----
                                                 
        }
  
+   if (info->loop_iterations
+       && !evaluate_predicate (info->loop_iterations, possible_truths))
+     hints |=INLINE_HINT_loop_iterations;
+ 
    if (time > MAX_TIME * INLINE_TIME_SCALE)
      time = MAX_TIME * INLINE_TIME_SCALE;
  
*************** inline_merge_summary (struct cgraph_edge
*** 2842,2847 ****
--- 2978,3005 ----
      }
    remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
                        offset_map, clause, &toplev_predicate);
+   if (callee_info->loop_iterations)
+     {
+       predicate p = remap_predicate (info, callee_info,
+                                    callee_info->loop_iterations,
+                                    operand_map, offset_map,
+                                    clause,
+                                    &toplev_predicate);
+       if (!false_predicate_p (&p)
+         && !true_predicate_p (&p))
+       {
+         if (!info->loop_iterations)
+           {
+             info->loop_iterations
+                = (struct predicate *)pool_alloc (edge_predicate_pool);
+             *info->loop_iterations = p;
+           }
+         else
+           *info->loop_iterations = and_predicates (info->conds, 
+                                                    info->loop_iterations,
+                                                    &p);
+       }
+     }
  
    inline_update_callee_summaries (edge->callee,
                                  inline_edge_summary (edge)->loop_depth);
*************** inline_read_section (struct lto_file_dec
*** 3269,3274 ****
--- 3427,3433 ----
        lto_symtab_encoder_t encoder;
        struct bitpack_d bp;
        struct cgraph_edge *e;
+       predicate p;
  
        index = streamer_read_uhwi (&ib);
        encoder = file_data->symtab_node_encoder;
*************** inline_read_section (struct lto_file_dec
*** 3310,3315 ****
--- 3469,3481 ----
  
          VEC_safe_push (size_time_entry, gc, info->entry, &e);
        }
+      
+       p = read_predicate (&ib);
+       if (!true_predicate_p (&p))
+       {
+         info->loop_iterations = (struct predicate *)pool_alloc 
(edge_predicate_pool);
+         *info->loop_iterations = p;
+       }
        for (e = node->callees; e; e = e->next_callee)
        read_inline_edge_summary (&ib, e);
        for (e = node->indirect_calls; e; e = e->next_callee)
*************** inline_write_summary (void)
*** 3456,3461 ****
--- 3622,3628 ----
              streamer_write_uhwi (ob, e->time);
              write_predicate (ob, &e->predicate);
            }
+         write_predicate (ob, info->loop_iterations);
          for (edge = node->callees; edge; edge = edge->next_callee)
            write_inline_edge_summary (ob, edge);
          for (edge = node->indirect_calls; edge; edge = edge->next_callee)

Loop iterations inline hint

Reply via email to