Re: [google] Enable loop unroll/peel notes under -fopt-info
On Tue, Nov 1, 2011 at 2:53 AM, Richard Guenther richard.guent...@gmail.com wrote: On Tue, Nov 1, 2011 at 1:46 AM, Teresa Johnson tejohn...@google.com wrote: This patch is for google-main only. Tested with bootstrap and regression tests. Print unroll and peel factors along with loop source position under -fopt-info. Teresa 2011-10-31 Teresa Johnson tejohn...@google.com * common.opt (fopt-info): Disable -fopt-info by default. * loop-unroll.c (report_unroll_peel): New function. (unroll_and_peel_loops): Call record_loop_exits for later use. (peel_loops_completely): Print the loop source position in dump info and emit note under -fopt-info. (decide_unroll_and_peeling): Ditto. (decide_peel_once_rolling): Record peel factor for use in note emission. (decide_peel_completely): Ditto. * cfgloop.c (get_loop_location): New function. * cfgloop.h (get_loop_location): Ditto. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Emit note under -fopt-info. Index: tree-ssa-loop-ivcanon.c === --- tree-ssa-loop-ivcanon.c (revision 180437) +++ tree-ssa-loop-ivcanon.c (working copy) @@ -52,6 +52,7 @@ #include flags.h #include tree-inline.h #include target.h +#include diagnostic.h /* Specifies types of loops that may be unrolled. */ @@ -443,6 +444,17 @@ fprintf (dump_file, Unrolled loop %d completely by factor %d.\n, loop-num, (int) n_unroll); + if (flag_opt_info = OPT_INFO_MIN) + { + location_t locus; + locus = gimple_location (cond); + + inform (locus, Completely Unroll loop by %d (execution count %d, const iterations %d), + (int) n_unroll, + (int) loop-header-count, + (int) TREE_INT_CST_LOW(niter)); + } + And this is exactly what I mean with code-duplication. Yes, we agree with you and we will improve it after this. Two lines above we already have Unroled loop %d completely by factor %d, not only do you duplicate some diagnostic printing about this fact, you put in useless info (complete unroll by N of a loop executing M (?! that's surely N as well) times, M!=N. M is used to indicate how hot the loop is. David const iterations O (?! that's surely N as well ...). Richard.
Re: [google] Enable loop unroll/peel notes under -fopt-info
Ok for google/main. 1) may be better to omit the const iteration count for complete unroll message 2) Instead of dumping loop header count, is it better to dump pre-header count -- it gives an idea of how often the loop is entered. The loop header count can be derived from loop average iteration and preheader count. thanks, David On Mon, Oct 31, 2011 at 5:46 PM, Teresa Johnson tejohn...@google.com wrote: This patch is for google-main only. Tested with bootstrap and regression tests. Print unroll and peel factors along with loop source position under -fopt-info. Teresa 2011-10-31 Teresa Johnson tejohn...@google.com * common.opt (fopt-info): Disable -fopt-info by default. * loop-unroll.c (report_unroll_peel): New function. (unroll_and_peel_loops): Call record_loop_exits for later use. (peel_loops_completely): Print the loop source position in dump info and emit note under -fopt-info. (decide_unroll_and_peeling): Ditto. (decide_peel_once_rolling): Record peel factor for use in note emission. (decide_peel_completely): Ditto. * cfgloop.c (get_loop_location): New function. * cfgloop.h (get_loop_location): Ditto. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Emit note under -fopt-info. Index: tree-ssa-loop-ivcanon.c === --- tree-ssa-loop-ivcanon.c (revision 180437) +++ tree-ssa-loop-ivcanon.c (working copy) @@ -52,6 +52,7 @@ #include flags.h #include tree-inline.h #include target.h +#include diagnostic.h /* Specifies types of loops that may be unrolled. */ @@ -443,6 +444,17 @@ fprintf (dump_file, Unrolled loop %d completely by factor %d.\n, loop-num, (int) n_unroll); + if (flag_opt_info = OPT_INFO_MIN) + { + location_t locus; + locus = gimple_location (cond); + + inform (locus, Completely Unroll loop by %d (execution count %d, const iterations %d), + (int) n_unroll, + (int) loop-header-count, + (int) TREE_INT_CST_LOW(niter)); + } + return true; } Index: loop-unroll.c === --- loop-unroll.c (revision 180437) +++ loop-unroll.c (working copy) @@ -34,6 +34,7 @@ #include hashtab.h #include recog.h #include target.h +#include diagnostic.h /* This pass performs loop unrolling and peeling. We only perform these optimizations on innermost loops (with single exception) because @@ -152,6 +153,30 @@ basic_block); static rtx get_expansion (struct var_to_expand *); +static void +report_unroll_peel(struct loop *loop, location_t locus) +{ + struct niter_desc *desc; + int niters = 0; + + desc = get_simple_loop_desc (loop); + + if (desc-const_iter) + niters = desc-niter; + else if (loop-header-count) + niters = expected_loop_iterations (loop); + + inform (locus, %s%s loop by %d (execution count %d, %s iterations %d), + loop-lpt_decision.decision == LPT_PEEL_COMPLETELY ? + Completely : , + loop-lpt_decision.decision == LPT_PEEL_SIMPLE ? + Peel : Unroll, + loop-lpt_decision.times, + (int)loop-header-count, + desc-const_iter?const:average, + niters); +} + /* Unroll and/or peel (depending on FLAGS) LOOPS. */ void unroll_and_peel_loops (int flags) @@ -160,6 +185,8 @@ bool check; loop_iterator li; + record_loop_exits(); + /* First perform complete loop peeling (it is almost surely a win, and affects parameters for further decision a lot). */ peel_loops_completely (flags); @@ -234,16 +261,18 @@ { struct loop *loop; loop_iterator li; + location_t locus; /* Scan the loops, the inner ones first. */ FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) { loop-lpt_decision.decision = LPT_NONE; + locus = get_loop_location(loop); if (dump_file) - fprintf (dump_file, - \n;; *** Considering loop %d for complete peeling ***\n, - loop-num); + fprintf (dump_file, \n;; *** Considering loop %d for complete peeling at BB %d from %s:%d ***\n, + loop-num, loop-header-index, LOCATION_FILE(locus), + LOCATION_LINE(locus)); loop-ninsns = num_loop_insns (loop); @@ -253,6 +282,11 @@ if (loop-lpt_decision.decision == LPT_PEEL_COMPLETELY) { + if (flag_opt_info = OPT_INFO_MIN) + { + report_unroll_peel(loop, locus); + } + peel_loop_completely (loop); #ifdef ENABLE_CHECKING verify_dominators (CDI_DOMINATORS); @@ -268,14 +302,18 @@ { struct loop *loop; loop_iterator li; + location_t locus; /* Scan the loops, inner ones first. */
Re: [google] Enable loop unroll/peel notes under -fopt-info
On Tue, Nov 1, 2011 at 1:46 AM, Teresa Johnson tejohn...@google.com wrote: This patch is for google-main only. Tested with bootstrap and regression tests. Print unroll and peel factors along with loop source position under -fopt-info. Teresa 2011-10-31 Teresa Johnson tejohn...@google.com * common.opt (fopt-info): Disable -fopt-info by default. * loop-unroll.c (report_unroll_peel): New function. (unroll_and_peel_loops): Call record_loop_exits for later use. (peel_loops_completely): Print the loop source position in dump info and emit note under -fopt-info. (decide_unroll_and_peeling): Ditto. (decide_peel_once_rolling): Record peel factor for use in note emission. (decide_peel_completely): Ditto. * cfgloop.c (get_loop_location): New function. * cfgloop.h (get_loop_location): Ditto. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Emit note under -fopt-info. Index: tree-ssa-loop-ivcanon.c === --- tree-ssa-loop-ivcanon.c (revision 180437) +++ tree-ssa-loop-ivcanon.c (working copy) @@ -52,6 +52,7 @@ #include flags.h #include tree-inline.h #include target.h +#include diagnostic.h /* Specifies types of loops that may be unrolled. */ @@ -443,6 +444,17 @@ fprintf (dump_file, Unrolled loop %d completely by factor %d.\n, loop-num, (int) n_unroll); + if (flag_opt_info = OPT_INFO_MIN) + { + location_t locus; + locus = gimple_location (cond); + + inform (locus, Completely Unroll loop by %d (execution count %d, const iterations %d), + (int) n_unroll, + (int) loop-header-count, + (int) TREE_INT_CST_LOW(niter)); + } + And this is exactly what I mean with code-duplication. Two lines above we already have Unroled loop %d completely by factor %d, not only do you duplicate some diagnostic printing about this fact, you put in useless info (complete unroll by N of a loop executing M (?! that's surely N as well) times, const iterations O (?! that's surely N as well ...). Richard.
Re: [google] Enable loop unroll/peel notes under -fopt-info
Hi Richard, Once we have a uniform way to emit notes to either stderr or dump, as you and David had discussed in the earlier thread, we can merge these two messages. The advantage with the new messages, besides going to stderr, is that the source position information is being emitted since it is a note. I agree that for complete unrolls the constant number of iterations can be omitted (but it is useful for the other types of unrolls/peels). But the execution count is something different - it includes the number of times the loop header executes based on profile information (i.e. iterations*# times loop is entered). Thanks, Teresa On Tue, Nov 1, 2011 at 2:53 AM, Richard Guenther richard.guent...@gmail.com wrote: On Tue, Nov 1, 2011 at 1:46 AM, Teresa Johnson tejohn...@google.com wrote: This patch is for google-main only. Tested with bootstrap and regression tests. Print unroll and peel factors along with loop source position under -fopt-info. Teresa 2011-10-31 Teresa Johnson tejohn...@google.com * common.opt (fopt-info): Disable -fopt-info by default. * loop-unroll.c (report_unroll_peel): New function. (unroll_and_peel_loops): Call record_loop_exits for later use. (peel_loops_completely): Print the loop source position in dump info and emit note under -fopt-info. (decide_unroll_and_peeling): Ditto. (decide_peel_once_rolling): Record peel factor for use in note emission. (decide_peel_completely): Ditto. * cfgloop.c (get_loop_location): New function. * cfgloop.h (get_loop_location): Ditto. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Emit note under -fopt-info. Index: tree-ssa-loop-ivcanon.c === --- tree-ssa-loop-ivcanon.c (revision 180437) +++ tree-ssa-loop-ivcanon.c (working copy) @@ -52,6 +52,7 @@ #include flags.h #include tree-inline.h #include target.h +#include diagnostic.h /* Specifies types of loops that may be unrolled. */ @@ -443,6 +444,17 @@ fprintf (dump_file, Unrolled loop %d completely by factor %d.\n, loop-num, (int) n_unroll); + if (flag_opt_info = OPT_INFO_MIN) + { + location_t locus; + locus = gimple_location (cond); + + inform (locus, Completely Unroll loop by %d (execution count %d, const iterations %d), + (int) n_unroll, + (int) loop-header-count, + (int) TREE_INT_CST_LOW(niter)); + } + And this is exactly what I mean with code-duplication. Two lines above we already have Unroled loop %d completely by factor %d, not only do you duplicate some diagnostic printing about this fact, you put in useless info (complete unroll by N of a loop executing M (?! that's surely N as well) times, const iterations O (?! that's surely N as well ...). Richard. -- Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413
[google] Enable loop unroll/peel notes under -fopt-info
This patch is for google-main only. Tested with bootstrap and regression tests. Print unroll and peel factors along with loop source position under -fopt-info. Teresa 2011-10-31 Teresa Johnson tejohn...@google.com * common.opt (fopt-info): Disable -fopt-info by default. * loop-unroll.c (report_unroll_peel): New function. (unroll_and_peel_loops): Call record_loop_exits for later use. (peel_loops_completely): Print the loop source position in dump info and emit note under -fopt-info. (decide_unroll_and_peeling): Ditto. (decide_peel_once_rolling): Record peel factor for use in note emission. (decide_peel_completely): Ditto. * cfgloop.c (get_loop_location): New function. * cfgloop.h (get_loop_location): Ditto. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Emit note under -fopt-info. Index: tree-ssa-loop-ivcanon.c === --- tree-ssa-loop-ivcanon.c (revision 180437) +++ tree-ssa-loop-ivcanon.c (working copy) @@ -52,6 +52,7 @@ #include flags.h #include tree-inline.h #include target.h +#include diagnostic.h /* Specifies types of loops that may be unrolled. */ @@ -443,6 +444,17 @@ fprintf (dump_file, Unrolled loop %d completely by factor %d.\n, loop-num, (int) n_unroll); + if (flag_opt_info = OPT_INFO_MIN) +{ + location_t locus; + locus = gimple_location (cond); + + inform (locus, Completely Unroll loop by %d (execution count %d, const iterations %d), + (int) n_unroll, + (int) loop-header-count, + (int) TREE_INT_CST_LOW(niter)); +} + return true; } Index: loop-unroll.c === --- loop-unroll.c (revision 180437) +++ loop-unroll.c (working copy) @@ -34,6 +34,7 @@ #include hashtab.h #include recog.h #include target.h +#include diagnostic.h /* This pass performs loop unrolling and peeling. We only perform these optimizations on innermost loops (with single exception) because @@ -152,6 +153,30 @@ basic_block); static rtx get_expansion (struct var_to_expand *); +static void +report_unroll_peel(struct loop *loop, location_t locus) +{ + struct niter_desc *desc; + int niters = 0; + + desc = get_simple_loop_desc (loop); + + if (desc-const_iter) +niters = desc-niter; + else if (loop-header-count) +niters = expected_loop_iterations (loop); + + inform (locus, %s%s loop by %d (execution count %d, %s iterations %d), + loop-lpt_decision.decision == LPT_PEEL_COMPLETELY ? +Completely : , + loop-lpt_decision.decision == LPT_PEEL_SIMPLE ? +Peel : Unroll, + loop-lpt_decision.times, + (int)loop-header-count, + desc-const_iter?const:average, + niters); +} + /* Unroll and/or peel (depending on FLAGS) LOOPS. */ void unroll_and_peel_loops (int flags) @@ -160,6 +185,8 @@ bool check; loop_iterator li; + record_loop_exits(); + /* First perform complete loop peeling (it is almost surely a win, and affects parameters for further decision a lot). */ peel_loops_completely (flags); @@ -234,16 +261,18 @@ { struct loop *loop; loop_iterator li; + location_t locus; /* Scan the loops, the inner ones first. */ FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) { loop-lpt_decision.decision = LPT_NONE; + locus = get_loop_location(loop); if (dump_file) - fprintf (dump_file, -\n;; *** Considering loop %d for complete peeling ***\n, -loop-num); + fprintf (dump_file, \n;; *** Considering loop %d for complete peeling at BB %d from %s:%d ***\n, + loop-num, loop-header-index, LOCATION_FILE(locus), + LOCATION_LINE(locus)); loop-ninsns = num_loop_insns (loop); @@ -253,6 +282,11 @@ if (loop-lpt_decision.decision == LPT_PEEL_COMPLETELY) { + if (flag_opt_info = OPT_INFO_MIN) +{ + report_unroll_peel(loop, locus); +} + peel_loop_completely (loop); #ifdef ENABLE_CHECKING verify_dominators (CDI_DOMINATORS); @@ -268,14 +302,18 @@ { struct loop *loop; loop_iterator li; + location_t locus; /* Scan the loops, inner ones first. */ FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) { loop-lpt_decision.decision = LPT_NONE; + locus = get_loop_location(loop); if (dump_file) - fprintf (dump_file, \n;; *** Considering loop %d ***\n, loop-num); + fprintf (dump_file, \n;; *** Considering loop %d at BB %d from %s:%d ***\n, + loop-num, loop-header-index, LOCATION_FILE(locus), + LOCATION_LINE(locus)); /* Do not peel cold areas. */ if (optimize_loop_for_size_p (loop)) @@