Re: [google] Enable loop unroll/peel notes under -fopt-info

2011-11-02 Thread Xinliang David Li
On Tue, Nov 1, 2011 at 2:53 AM, Richard Guenther
richard.guent...@gmail.com wrote:
 On Tue, Nov 1, 2011 at 1:46 AM, Teresa Johnson tejohn...@google.com wrote:
 This patch is for google-main only.

 Tested with bootstrap and regression tests.

 Print unroll and peel factors along with loop source position under 
 -fopt-info.

 Teresa

 2011-10-31   Teresa Johnson  tejohn...@google.com

        * common.opt (fopt-info): Disable -fopt-info by default.
        * loop-unroll.c (report_unroll_peel): New function.
        (unroll_and_peel_loops): Call record_loop_exits for later use.
        (peel_loops_completely): Print the loop source position in dump
        info and emit note under -fopt-info.
        (decide_unroll_and_peeling): Ditto.
        (decide_peel_once_rolling): Record peel factor for use in note
        emission.
        (decide_peel_completely): Ditto.
        * cfgloop.c (get_loop_location): New function.
        * cfgloop.h (get_loop_location): Ditto.
        * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Emit note
        under -fopt-info.

 Index: tree-ssa-loop-ivcanon.c
 ===
 --- tree-ssa-loop-ivcanon.c     (revision 180437)
 +++ tree-ssa-loop-ivcanon.c     (working copy)
 @@ -52,6 +52,7 @@
  #include flags.h
  #include tree-inline.h
  #include target.h
 +#include diagnostic.h

  /* Specifies types of loops that may be unrolled.  */

 @@ -443,6 +444,17 @@
     fprintf (dump_file, Unrolled loop %d completely by factor %d.\n,
              loop-num, (int) n_unroll);

 +  if (flag_opt_info = OPT_INFO_MIN)
 +    {
 +      location_t locus;
 +      locus = gimple_location (cond);
 +
 +      inform (locus, Completely Unroll loop by %d (execution count
 %d, const iterations %d),
 +              (int) n_unroll,
 +              (int) loop-header-count,
 +              (int) TREE_INT_CST_LOW(niter));
 +    }
 +

 And this is exactly what I mean with code-duplication.

Yes, we agree with you and we will improve it after this.

Two lines above
 we already have Unroled loop %d completely by factor %d, not only
 do you duplicate some diagnostic printing about this fact, you
 put in useless info (complete unroll by N of a loop executing M (?! that's
 surely N as well) times,

M!=N. M is used to indicate how hot the loop is.

David

const iterations O (?! that's surely N as well ...).

 Richard.



Re: [google] Enable loop unroll/peel notes under -fopt-info

2011-11-02 Thread Xinliang David Li
Ok for google/main.

1) may be better to omit the const iteration count for complete unroll message
2) Instead of dumping loop header count, is it better to dump
pre-header count -- it gives an idea of how often the loop is entered.
The loop header count can be derived from loop average iteration and
preheader count.

thanks,

David

On Mon, Oct 31, 2011 at 5:46 PM, Teresa Johnson tejohn...@google.com wrote:
 This patch is for google-main only.

 Tested with bootstrap and regression tests.

 Print unroll and peel factors along with loop source position under 
 -fopt-info.

 Teresa

 2011-10-31   Teresa Johnson  tejohn...@google.com

        * common.opt (fopt-info): Disable -fopt-info by default.
        * loop-unroll.c (report_unroll_peel): New function.
        (unroll_and_peel_loops): Call record_loop_exits for later use.
        (peel_loops_completely): Print the loop source position in dump
        info and emit note under -fopt-info.
        (decide_unroll_and_peeling): Ditto.
        (decide_peel_once_rolling): Record peel factor for use in note
        emission.
        (decide_peel_completely): Ditto.
        * cfgloop.c (get_loop_location): New function.
        * cfgloop.h (get_loop_location): Ditto.
        * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Emit note
        under -fopt-info.

 Index: tree-ssa-loop-ivcanon.c
 ===
 --- tree-ssa-loop-ivcanon.c     (revision 180437)
 +++ tree-ssa-loop-ivcanon.c     (working copy)
 @@ -52,6 +52,7 @@
  #include flags.h
  #include tree-inline.h
  #include target.h
 +#include diagnostic.h

  /* Specifies types of loops that may be unrolled.  */

 @@ -443,6 +444,17 @@
     fprintf (dump_file, Unrolled loop %d completely by factor %d.\n,
              loop-num, (int) n_unroll);

 +  if (flag_opt_info = OPT_INFO_MIN)
 +    {
 +      location_t locus;
 +      locus = gimple_location (cond);
 +
 +      inform (locus, Completely Unroll loop by %d (execution count
 %d, const iterations %d),
 +              (int) n_unroll,
 +              (int) loop-header-count,
 +              (int) TREE_INT_CST_LOW(niter));
 +    }
 +
   return true;
  }

 Index: loop-unroll.c
 ===
 --- loop-unroll.c       (revision 180437)
 +++ loop-unroll.c       (working copy)
 @@ -34,6 +34,7 @@
  #include hashtab.h
  #include recog.h
  #include target.h
 +#include diagnostic.h

  /* This pass performs loop unrolling and peeling.  We only perform these
    optimizations on innermost loops (with single exception) because
 @@ -152,6 +153,30 @@
                                             basic_block);
  static rtx get_expansion (struct var_to_expand *);

 +static void
 +report_unroll_peel(struct loop *loop, location_t locus)
 +{
 +  struct niter_desc *desc;
 +  int niters = 0;
 +
 +  desc = get_simple_loop_desc (loop);
 +
 +  if (desc-const_iter)
 +    niters = desc-niter;
 +  else if (loop-header-count)
 +    niters = expected_loop_iterations (loop);
 +
 +  inform (locus, %s%s loop by %d (execution count %d, %s iterations %d),
 +          loop-lpt_decision.decision == LPT_PEEL_COMPLETELY ?
 +            Completely  : ,
 +          loop-lpt_decision.decision == LPT_PEEL_SIMPLE ?
 +            Peel : Unroll,
 +          loop-lpt_decision.times,
 +          (int)loop-header-count,
 +          desc-const_iter?const:average,
 +          niters);
 +}
 +
  /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
  void
  unroll_and_peel_loops (int flags)
 @@ -160,6 +185,8 @@
   bool check;
   loop_iterator li;

 +  record_loop_exits();
 +
   /* First perform complete loop peeling (it is almost surely a win,
      and affects parameters for further decision a lot).  */
   peel_loops_completely (flags);
 @@ -234,16 +261,18 @@
  {
   struct loop *loop;
   loop_iterator li;
 +  location_t locus;

   /* Scan the loops, the inner ones first.  */
   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
     {
       loop-lpt_decision.decision = LPT_NONE;
 +      locus = get_loop_location(loop);

       if (dump_file)
 -       fprintf (dump_file,
 -                \n;; *** Considering loop %d for complete peeling ***\n,
 -                loop-num);
 +       fprintf (dump_file, \n;; *** Considering loop %d for complete
 peeling at BB %d from %s:%d ***\n,
 +                 loop-num, loop-header-index, LOCATION_FILE(locus),
 +                 LOCATION_LINE(locus));

       loop-ninsns = num_loop_insns (loop);

 @@ -253,6 +282,11 @@

       if (loop-lpt_decision.decision == LPT_PEEL_COMPLETELY)
        {
 +          if (flag_opt_info = OPT_INFO_MIN)
 +            {
 +              report_unroll_peel(loop, locus);
 +            }
 +
          peel_loop_completely (loop);
  #ifdef ENABLE_CHECKING
          verify_dominators (CDI_DOMINATORS);
 @@ -268,14 +302,18 @@
  {
   struct loop *loop;
   loop_iterator li;
 +  location_t locus;

   /* Scan the loops, inner ones first.  */
   

Re: [google] Enable loop unroll/peel notes under -fopt-info

2011-11-01 Thread Richard Guenther
On Tue, Nov 1, 2011 at 1:46 AM, Teresa Johnson tejohn...@google.com wrote:
 This patch is for google-main only.

 Tested with bootstrap and regression tests.

 Print unroll and peel factors along with loop source position under 
 -fopt-info.

 Teresa

 2011-10-31   Teresa Johnson  tejohn...@google.com

        * common.opt (fopt-info): Disable -fopt-info by default.
        * loop-unroll.c (report_unroll_peel): New function.
        (unroll_and_peel_loops): Call record_loop_exits for later use.
        (peel_loops_completely): Print the loop source position in dump
        info and emit note under -fopt-info.
        (decide_unroll_and_peeling): Ditto.
        (decide_peel_once_rolling): Record peel factor for use in note
        emission.
        (decide_peel_completely): Ditto.
        * cfgloop.c (get_loop_location): New function.
        * cfgloop.h (get_loop_location): Ditto.
        * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Emit note
        under -fopt-info.

 Index: tree-ssa-loop-ivcanon.c
 ===
 --- tree-ssa-loop-ivcanon.c     (revision 180437)
 +++ tree-ssa-loop-ivcanon.c     (working copy)
 @@ -52,6 +52,7 @@
  #include flags.h
  #include tree-inline.h
  #include target.h
 +#include diagnostic.h

  /* Specifies types of loops that may be unrolled.  */

 @@ -443,6 +444,17 @@
     fprintf (dump_file, Unrolled loop %d completely by factor %d.\n,
              loop-num, (int) n_unroll);

 +  if (flag_opt_info = OPT_INFO_MIN)
 +    {
 +      location_t locus;
 +      locus = gimple_location (cond);
 +
 +      inform (locus, Completely Unroll loop by %d (execution count
 %d, const iterations %d),
 +              (int) n_unroll,
 +              (int) loop-header-count,
 +              (int) TREE_INT_CST_LOW(niter));
 +    }
 +

And this is exactly what I mean with code-duplication.  Two lines above
we already have Unroled loop %d completely by factor %d, not only
do you duplicate some diagnostic printing about this fact, you
put in useless info (complete unroll by N of a loop executing M (?! that's
surely N as well) times, const iterations O (?! that's surely N as well ...).

Richard.


Re: [google] Enable loop unroll/peel notes under -fopt-info

2011-11-01 Thread Teresa Johnson
Hi Richard,

Once we have a uniform way to emit notes to either stderr or dump, as
you and David had discussed in the earlier thread, we can merge these
two messages. The advantage with the new messages, besides going to
stderr, is that the source position information is being emitted since
it is a note. I agree that for complete unrolls the constant number of
iterations can be omitted (but it is useful for the other types of
unrolls/peels). But the execution count is something different - it
includes the number of times the loop header executes based on profile
information (i.e. iterations*# times loop is entered).

Thanks,
Teresa


On Tue, Nov 1, 2011 at 2:53 AM, Richard Guenther
richard.guent...@gmail.com wrote:
 On Tue, Nov 1, 2011 at 1:46 AM, Teresa Johnson tejohn...@google.com wrote:
 This patch is for google-main only.

 Tested with bootstrap and regression tests.

 Print unroll and peel factors along with loop source position under 
 -fopt-info.

 Teresa

 2011-10-31   Teresa Johnson  tejohn...@google.com

        * common.opt (fopt-info): Disable -fopt-info by default.
        * loop-unroll.c (report_unroll_peel): New function.
        (unroll_and_peel_loops): Call record_loop_exits for later use.
        (peel_loops_completely): Print the loop source position in dump
        info and emit note under -fopt-info.
        (decide_unroll_and_peeling): Ditto.
        (decide_peel_once_rolling): Record peel factor for use in note
        emission.
        (decide_peel_completely): Ditto.
        * cfgloop.c (get_loop_location): New function.
        * cfgloop.h (get_loop_location): Ditto.
        * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Emit note
        under -fopt-info.

 Index: tree-ssa-loop-ivcanon.c
 ===
 --- tree-ssa-loop-ivcanon.c     (revision 180437)
 +++ tree-ssa-loop-ivcanon.c     (working copy)
 @@ -52,6 +52,7 @@
  #include flags.h
  #include tree-inline.h
  #include target.h
 +#include diagnostic.h

  /* Specifies types of loops that may be unrolled.  */

 @@ -443,6 +444,17 @@
     fprintf (dump_file, Unrolled loop %d completely by factor %d.\n,
              loop-num, (int) n_unroll);

 +  if (flag_opt_info = OPT_INFO_MIN)
 +    {
 +      location_t locus;
 +      locus = gimple_location (cond);
 +
 +      inform (locus, Completely Unroll loop by %d (execution count
 %d, const iterations %d),
 +              (int) n_unroll,
 +              (int) loop-header-count,
 +              (int) TREE_INT_CST_LOW(niter));
 +    }
 +

 And this is exactly what I mean with code-duplication.  Two lines above
 we already have Unroled loop %d completely by factor %d, not only
 do you duplicate some diagnostic printing about this fact, you
 put in useless info (complete unroll by N of a loop executing M (?! that's
 surely N as well) times, const iterations O (?! that's surely N as well ...).

 Richard.




-- 
Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413


[google] Enable loop unroll/peel notes under -fopt-info

2011-10-31 Thread Teresa Johnson
This patch is for google-main only.

Tested with bootstrap and regression tests.

Print unroll and peel factors along with loop source position under -fopt-info.

Teresa

2011-10-31   Teresa Johnson  tejohn...@google.com

* common.opt (fopt-info): Disable -fopt-info by default.
* loop-unroll.c (report_unroll_peel): New function.
(unroll_and_peel_loops): Call record_loop_exits for later use.
(peel_loops_completely): Print the loop source position in dump
info and emit note under -fopt-info.
(decide_unroll_and_peeling): Ditto.
(decide_peel_once_rolling): Record peel factor for use in note
emission.
(decide_peel_completely): Ditto.
* cfgloop.c (get_loop_location): New function.
* cfgloop.h (get_loop_location): Ditto.
* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Emit note
under -fopt-info.

Index: tree-ssa-loop-ivcanon.c
===
--- tree-ssa-loop-ivcanon.c (revision 180437)
+++ tree-ssa-loop-ivcanon.c (working copy)
@@ -52,6 +52,7 @@
 #include flags.h
 #include tree-inline.h
 #include target.h
+#include diagnostic.h

 /* Specifies types of loops that may be unrolled.  */

@@ -443,6 +444,17 @@
 fprintf (dump_file, Unrolled loop %d completely by factor %d.\n,
  loop-num, (int) n_unroll);

+  if (flag_opt_info = OPT_INFO_MIN)
+{
+  location_t locus;
+  locus = gimple_location (cond);
+
+  inform (locus, Completely Unroll loop by %d (execution count
%d, const iterations %d),
+  (int) n_unroll,
+  (int) loop-header-count,
+  (int) TREE_INT_CST_LOW(niter));
+}
+
   return true;
 }

Index: loop-unroll.c
===
--- loop-unroll.c   (revision 180437)
+++ loop-unroll.c   (working copy)
@@ -34,6 +34,7 @@
 #include hashtab.h
 #include recog.h
 #include target.h
+#include diagnostic.h

 /* This pass performs loop unrolling and peeling.  We only perform these
optimizations on innermost loops (with single exception) because
@@ -152,6 +153,30 @@
 basic_block);
 static rtx get_expansion (struct var_to_expand *);

+static void
+report_unroll_peel(struct loop *loop, location_t locus)
+{
+  struct niter_desc *desc;
+  int niters = 0;
+
+  desc = get_simple_loop_desc (loop);
+
+  if (desc-const_iter)
+niters = desc-niter;
+  else if (loop-header-count)
+niters = expected_loop_iterations (loop);
+
+  inform (locus, %s%s loop by %d (execution count %d, %s iterations %d),
+  loop-lpt_decision.decision == LPT_PEEL_COMPLETELY ?
+Completely  : ,
+  loop-lpt_decision.decision == LPT_PEEL_SIMPLE ?
+Peel : Unroll,
+  loop-lpt_decision.times,
+  (int)loop-header-count,
+  desc-const_iter?const:average,
+  niters);
+}
+
 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 void
 unroll_and_peel_loops (int flags)
@@ -160,6 +185,8 @@
   bool check;
   loop_iterator li;

+  record_loop_exits();
+
   /* First perform complete loop peeling (it is almost surely a win,
  and affects parameters for further decision a lot).  */
   peel_loops_completely (flags);
@@ -234,16 +261,18 @@
 {
   struct loop *loop;
   loop_iterator li;
+  location_t locus;

   /* Scan the loops, the inner ones first.  */
   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
 {
   loop-lpt_decision.decision = LPT_NONE;
+  locus = get_loop_location(loop);

   if (dump_file)
-   fprintf (dump_file,
-\n;; *** Considering loop %d for complete peeling ***\n,
-loop-num);
+   fprintf (dump_file, \n;; *** Considering loop %d for complete
peeling at BB %d from %s:%d ***\n,
+ loop-num, loop-header-index, LOCATION_FILE(locus),
+ LOCATION_LINE(locus));

   loop-ninsns = num_loop_insns (loop);

@@ -253,6 +282,11 @@

   if (loop-lpt_decision.decision == LPT_PEEL_COMPLETELY)
{
+  if (flag_opt_info = OPT_INFO_MIN)
+{
+  report_unroll_peel(loop, locus);
+}
+
  peel_loop_completely (loop);
 #ifdef ENABLE_CHECKING
  verify_dominators (CDI_DOMINATORS);
@@ -268,14 +302,18 @@
 {
   struct loop *loop;
   loop_iterator li;
+  location_t locus;

   /* Scan the loops, inner ones first.  */
   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
 {
   loop-lpt_decision.decision = LPT_NONE;
+  locus = get_loop_location(loop);

   if (dump_file)
-   fprintf (dump_file, \n;; *** Considering loop %d ***\n, loop-num);
+   fprintf (dump_file, \n;; *** Considering loop %d at BB %d from %s:%d 
***\n,
+ loop-num, loop-header-index, LOCATION_FILE(locus),
+ LOCATION_LINE(locus));

   /* Do not peel cold areas.  */
   if (optimize_loop_for_size_p (loop))
@@