[PATCH][3/n] Reduction vectorization improvements
This refactors code to arrange that for loop SLP we pass in the SLP node and instance to the vectorizable_* functions also at analysis phase (not only at transform phase). Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2015-05-26 Richard Biener rguent...@suse.de * tree-vect-loop.c (vect_update_vf_for_slp): Split out from ... (vect_analyze_loop_operations): ... here. Remove slp parameter, detect whether we apply SLP. Remove call to vect_update_slp_costs_according_to_vf. (vect_analyze_loop_2): Call vect_update_vf_for_slp and vect_update_slp_costs_according_to_vf from here. Dispatch to vect_slp_analyze_operations to analyze SLP stmts. * tree-vect-slp.c (vect_slp_analyze_node_operations): Drop unused bb_vec_info parameter, adjust assert. (vect_slp_analyze_operations): Pass in the slp instance tree instead of bb_vec_info. (vect_slp_analyze_bb_1): Adjust call to vect_slp_analyze_operations. * tree-vectorizer.h (vect_slp_analyze_operations): Declare. Index: gcc/tree-vect-loop.c === --- gcc/tree-vect-loop.c(revision 223574) +++ gcc/tree-vect-loop.c(working copy) @@ -1355,25 +1355,85 @@ vect_analyze_loop_form (struct loop *loo return loop_vinfo; } +/* Scan the loop stmts and dependent on whether there are any (non-)SLP + statements update the vectorization factor. */ + +static void +vect_update_vf_for_slp (loop_vec_info loop_vinfo) +{ + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); + int nbbs = loop-num_nodes; + unsigned int vectorization_factor; + int i; + + if (dump_enabled_p ()) +dump_printf_loc (MSG_NOTE, vect_location, +=== vect_update_vf_for_slp ===\n); + + vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + gcc_assert (vectorization_factor != 0); + + /* If all the stmts in the loop can be SLPed, we perform only SLP, and + vectorization factor of the loop is the unrolling factor required by + the SLP instances. If that unrolling factor is 1, we say, that we + perform pure SLP on loop - cross iteration parallelism is not + exploited. */ + bool only_slp_in_loop = true; + for (i = 0; i nbbs; i++) +{ + basic_block bb = bbs[i]; + for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); + gsi_next (si)) + { + gimple stmt = gsi_stmt (si); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + if (STMT_VINFO_IN_PATTERN_P (stmt_info) + STMT_VINFO_RELATED_STMT (stmt_info)) + { + stmt = STMT_VINFO_RELATED_STMT (stmt_info); + stmt_info = vinfo_for_stmt (stmt); + } + if ((STMT_VINFO_RELEVANT_P (stmt_info) + || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) + !PURE_SLP_STMT (stmt_info)) + /* STMT needs both SLP and loop-based vectorization. */ + only_slp_in_loop = false; + } +} + + if (only_slp_in_loop) +vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo); + else +vectorization_factor + = least_common_multiple (vectorization_factor, + LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)); + + LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; + if (dump_enabled_p ()) +dump_printf_loc (MSG_NOTE, vect_location, +Updating vectorization factor to %d\n, +vectorization_factor); +} /* Function vect_analyze_loop_operations. Scan the loop stmts and make sure they are all vectorizable. */ static bool -vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp) +vect_analyze_loop_operations (loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); int nbbs = loop-num_nodes; - unsigned int vectorization_factor = 0; + unsigned int vectorization_factor; int i; stmt_vec_info stmt_info; bool need_to_vectorize = false; int min_profitable_iters; int min_scalar_loop_bound; unsigned int th; - bool only_slp_in_loop = true, ok; + bool ok; HOST_WIDE_INT max_niter; HOST_WIDE_INT estimated_niter; int min_profitable_estimate; @@ -1382,50 +1442,6 @@ vect_analyze_loop_operations (loop_vec_i dump_printf_loc (MSG_NOTE, vect_location, === vect_analyze_loop_operations ===\n); - gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); - vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - if (slp) -{ - /* If all the stmts in the loop can be SLPed, we perform only SLP, and -vectorization factor of the loop is the unrolling factor required by -the SLP instances. If that unrolling factor is 1, we say, that we -perform
[PATCH][3/n] Reduction vectorization improvements
This does some more cleanup and refactoring with two fixes, the pure slp compute in vect_analyze_loop_operations was failing to look at pattern stmts and the vect_is_slp_reduction hunk makes reduction detection fail because the pattern state changes in between reduction detection and vectoriztaion (which re-calls the early code). Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2015-05-22 Richard Biener rguent...@suse.de * tree-vectorizer.h (struct _slp_oprnd_info): Add second_pattern member. * tree-vect-loop.c (vect_analyze_loop_operations): Look at patterns when determining whether SLP is pure. (vect_is_slp_reduction): Remove check for pattern stmts. (vect_is_simple_reduction_1): Remove dead code. * tree-vect-slp.c (vect_create_oprnd_info): Initialize second_pattern. (vect_get_and_check_slp_defs): Pass in the stmt number. Allow the first def in a reduction to be not a pattern stmt when the rest of the stmts def are patterns. (vect_build_slp_tree_1): Allow tcc_expression codes like SAD_EXPR and DOT_PROD_EXPR. (vect_build_slp_tree): Adjust. (vect_analyze_slp): Refactor and move BB vect error message ... (vect_slp_analyze_bb_1): ... here. Index: gcc/tree-vect-loop.c === --- gcc/tree-vect-loop.c(revision 223529) +++ gcc/tree-vect-loop.c(working copy) @@ -1399,7 +1399,12 @@ vect_analyze_loop_operations (loop_vec_i { gimple stmt = gsi_stmt (si); stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - gcc_assert (stmt_info); + if (STMT_VINFO_IN_PATTERN_P (stmt_info) + STMT_VINFO_RELATED_STMT (stmt_info)) + { + stmt = STMT_VINFO_RELATED_STMT (stmt_info); + stmt_info = vinfo_for_stmt (stmt); + } if ((STMT_VINFO_RELEVANT_P (stmt_info) || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) !PURE_SLP_STMT (stmt_info)) @@ -2031,12 +2036,8 @@ vect_is_slp_reduction (loop_vec_info loo if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) { - if (vinfo_for_stmt (use_stmt) - !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) -{ - loop_use_stmt = use_stmt; - nloop_uses++; -} + loop_use_stmt = use_stmt; + nloop_uses++; } else n_out_of_loop_uses++; @@ -2265,9 +2266,7 @@ vect_is_simple_reduction_1 (loop_vec_inf return NULL; } - if (vinfo_for_stmt (use_stmt) - !is_pattern_stmt_p (vinfo_for_stmt (use_stmt))) -nloop_uses++; + nloop_uses++; if (nloop_uses 1) { if (dump_enabled_p ()) @@ -2325,9 +2324,7 @@ vect_is_simple_reduction_1 (loop_vec_inf gimple use_stmt = USE_STMT (use_p); if (is_gimple_debug (use_stmt)) continue; - if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)) - vinfo_for_stmt (use_stmt) - !is_pattern_stmt_p (vinfo_for_stmt (use_stmt))) + if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) nloop_uses++; if (nloop_uses 1) { Index: gcc/tree-vect-slp.c === --- gcc/tree-vect-slp.c (revision 223529) +++ gcc/tree-vect-slp.c (working copy) @@ -183,6 +183,7 @@ vect_create_oprnd_info (int nops, int gr oprnd_info-first_dt = vect_uninitialized_def; oprnd_info-first_op_type = NULL_TREE; oprnd_info-first_pattern = false; + oprnd_info-second_pattern = false; oprnds_info.quick_push (oprnd_info); } @@ -242,7 +243,7 @@ vect_get_place_in_interleaving_chain (gi static int vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, - gimple stmt, bool first, + gimple stmt, unsigned stmt_num, vecslp_oprnd_info *oprnds_info) { tree oprnd; @@ -256,6 +257,8 @@ vect_get_and_check_slp_defs (loop_vec_in int first_op_idx = 1; bool commutative = false; bool first_op_cond = false; + bool first = stmt_num == 0; + bool second = stmt_num == 1; if (loop_vinfo) loop = LOOP_VINFO_LOOP (loop_vinfo); @@ -326,7 +329,11 @@ again: !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) { pattern = true; - if (!first !oprnd_info-first_pattern) + if (!first !oprnd_info-first_pattern + /* Allow different pattern state for the defs of the +first stmt in reduction chains. */ + (oprnd_info-first_dt != vect_reduction_def + || (!second !oprnd_info-second_pattern)))