The following aims at reducing the number of pointless passes we run on functions containing no loops. Those are at least two copyprop and one dce pass (two dce passes when vectorization is enabled, three dce passes and an additional copyprop pass when any graphite optimization is enabled).
Simply gating pass_tree_loop on number_of_loops () > 1 would disable basic-block vectorization on loopless functions. Moving basic-block vectorization out of pass_tree_loop works to the extent that you'd need to move IVOPTs as well as data-ref analysis cannot cope with TARGET_MEM_REFs. So the following introduces a pass_tree_no_loop pass group which is enabled whenever the pass_tree_loop group is disabled. As followup this would allow to skip cleanup work we do after the loop pipeline just to cleanup after it. Any comments? Does such followup sound realistic or would it be better to take the opportunity to move IVOPTs a bit closer to RTL expansion and avoid that "pass_tree_no_loop hack"? Bootstrap and regtest running on x86_64-unknown-linux-gnu. Thanks, Richard. 2014-06-18 Richard Biener <rguent...@suse.de> * tree-ssa-loop.c (gate_loop): New function. (pass_tree_loop::gate): Call it. (pass_data_tree_no_loop, pass_tree_no_loop, make_pass_tree_no_loop): New. * tree-vectorizer.c: Include tree-scalar-evolution.c (pass_slp_vectorize::execute): Initialize loops and SCEV if required. (pass_slp_vectorize::clone): New method. * timevar.def (TV_TREE_NOLOOP): New. * tree-pass.h (make_pass_tree_no_loop): Declare. * passes.def (pass_tree_no_loop): New pass group with SLP vectorizer. Index: gcc/tree-ssa-loop.c =================================================================== *** gcc/tree-ssa-loop.c.orig 2014-06-18 12:06:19.226205380 +0200 --- gcc/tree-ssa-loop.c 2014-06-18 12:06:39.103204012 +0200 *************** along with GCC; see the file COPYING3. *** 42,47 **** --- 42,63 ---- #include "diagnostic-core.h" #include "tree-vectorizer.h" + + /* Gate for loop pass group. The group is controlled by -ftree-loop-optimize + but we also avoid running it when the IL doesn't contain any loop. */ + + static bool + gate_loop (function *fn) + { + if (!flag_tree_loop_optimize) + return false; + + /* Make sure to drop / re-discover loops when necessary. */ + if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) + fix_loop_structure (NULL); + return number_of_loops (fn) > 1; + } + /* The loop superpass. */ namespace { *************** public: *** 68,74 **** {} /* opt_pass methods: */ ! virtual bool gate (function *) { return flag_tree_loop_optimize != 0; } }; // class pass_tree_loop --- 84,90 ---- {} /* opt_pass methods: */ ! virtual bool gate (function *fn) { return gate_loop (fn); } }; // class pass_tree_loop *************** make_pass_tree_loop (gcc::context *ctxt) *** 80,85 **** --- 96,140 ---- return new pass_tree_loop (ctxt); } + /* The no-loop superpass. */ + + namespace { + + const pass_data pass_data_tree_no_loop = + { + GIMPLE_PASS, /* type */ + "no_loop", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_execute */ + TV_TREE_NOLOOP, /* tv_id */ + PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ + }; + + class pass_tree_no_loop : public gimple_opt_pass + { + public: + pass_tree_no_loop (gcc::context *ctxt) + : gimple_opt_pass (pass_data_tree_no_loop, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *fn) { return !gate_loop (fn); } + + }; // class pass_tree_no_loop + + } // anon namespace + + gimple_opt_pass * + make_pass_tree_no_loop (gcc::context *ctxt) + { + return new pass_tree_no_loop (ctxt); + } + + /* Loop optimizer initialization. */ namespace { Index: gcc/tree-vectorizer.c =================================================================== *** gcc/tree-vectorizer.c.orig 2014-06-18 12:06:19.226205380 +0200 --- gcc/tree-vectorizer.c 2014-06-18 12:10:55.958186328 +0200 *************** along with GCC; see the file COPYING3. *** 82,87 **** --- 82,89 ---- #include "tree-ssa-propagate.h" #include "dbgcnt.h" #include "gimple-fold.h" + #include "tree-scalar-evolution.h" + /* Loop or bb location. */ source_location vect_location; *************** public: *** 610,615 **** --- 612,618 ---- {} /* opt_pass methods: */ + opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); } virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; } virtual unsigned int execute (function *); *************** pass_slp_vectorize::execute (function *f *** 620,625 **** --- 623,635 ---- { basic_block bb; + bool in_loop_pipeline = scev_initialized_p (); + if (!in_loop_pipeline) + { + loop_optimizer_init (LOOPS_NORMAL); + scev_initialize (); + } + init_stmt_vec_info_vec (); FOR_EACH_BB_FN (bb, fun) *************** pass_slp_vectorize::execute (function *f *** 639,644 **** --- 649,661 ---- } free_stmt_vec_info_vec (); + + if (!in_loop_pipeline) + { + scev_finalize (); + loop_optimizer_finalize (); + } + return 0; } Index: gcc/timevar.def =================================================================== *** gcc/timevar.def.orig 2014-06-18 12:06:18.615205422 +0200 --- gcc/timevar.def 2014-06-18 12:06:39.103204012 +0200 *************** DEFTIMEVAR (TV_TREE_CALL_CDCE , "tr *** 162,167 **** --- 162,168 ---- DEFTIMEVAR (TV_TREE_DSE , "tree DSE") DEFTIMEVAR (TV_TREE_MERGE_PHI , "PHI merge") DEFTIMEVAR (TV_TREE_LOOP , "tree loop optimization") + DEFTIMEVAR (TV_TREE_NOLOOP , "loopless fn") DEFTIMEVAR (TV_TREE_LOOP_BOUNDS , "tree loop bounds") DEFTIMEVAR (TV_LIM , "tree loop invariant motion") DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv") Index: gcc/tree-pass.h =================================================================== *** gcc/tree-pass.h.orig 2014-06-18 12:06:18.615205422 +0200 --- gcc/tree-pass.h 2014-06-18 12:06:39.104204012 +0200 *************** extern gimple_opt_pass *make_pass_early_ *** 354,359 **** --- 354,360 ---- extern gimple_opt_pass *make_pass_tail_recursion (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tail_calls (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_loop (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_loop_init (gcc::context *ctxt); extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt); Index: gcc/passes.def =================================================================== *** gcc/passes.def.orig 2014-06-18 12:06:18.615205422 +0200 --- gcc/passes.def 2014-06-18 12:06:39.104204012 +0200 *************** along with GCC; see the file COPYING3. *** 201,206 **** --- 201,208 ---- NEXT_PASS (pass_sink_code); NEXT_PASS (pass_asan); NEXT_PASS (pass_tsan); + /* Pass group that runs when 1) enabled, 2) there are loops + in the function. */ NEXT_PASS (pass_tree_loop); PUSH_INSERT_PASSES_WITHIN (pass_tree_loop) NEXT_PASS (pass_tree_loop_init); *************** along with GCC; see the file COPYING3. *** 233,242 **** --- 235,252 ---- NEXT_PASS (pass_complete_unroll); NEXT_PASS (pass_slp_vectorize); NEXT_PASS (pass_loop_prefetch); + /* Run IVOPTs after the last pass that uses data-reference analysis + as that doesn't handle TARGET_MEM_REFs. */ NEXT_PASS (pass_iv_optimize); NEXT_PASS (pass_lim); NEXT_PASS (pass_tree_loop_done); POP_INSERT_PASSES () + /* Pass group that runs when pass_tree_loop is disabled or there + are no loops in the function. */ + NEXT_PASS (pass_tree_no_loop); + PUSH_INSERT_PASSES_WITHIN (pass_tree_no_loop) + NEXT_PASS (pass_slp_vectorize); + POP_INSERT_PASSES () NEXT_PASS (pass_lower_vector_ssa); NEXT_PASS (pass_cse_reciprocals); NEXT_PASS (pass_reassoc);