> > > > + /* Save destination as we go, BB are visited in order and the > > > > last one > > > > + is where statements should be moved to. */ > > > > + if (!dest_bb) > > > > + dest_bb = gimple_bb (c); > > > > + else > > > > + { > > > > + basic_block curr_bb = gimple_bb (c); > > > > + if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb)) > > > > + dest_bb = curr_bb; > > > > + } > > > > + } > > > > + > > > > + dest_bb = FALLTHRU_EDGE (dest_bb)->dest; > > > > > > no edge is the fallthru edge out of a condition, so this always selects > > > EDGE_SUCC (dest_bb, 1) which cannot be correct (well, guess you're > > > lucky). I > > > think you instead want > > > > > > dest_bb = EDGE_SUCC (dest_bb, 0)->dest->loop_father == dest_bb- > > > >loop_father ? EDGE_SUCC (dest_bb, 0)->dest : EDGE_SUCC (dest_bb, 1)- > > > >dest; > > > > > > more nicely written, of course. > > > > > > > + gcc_assert (dest_bb); > > > > + LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb; > > > > > > Sorting the vector of early breaks as we gather them might be nicer than > > > this - > > > you'd then simply use the first or last. > > >
I opted not to do the sorting since I don't really need a full order between the exits here And only need to find the last one. A sort would be more expensive than the linear Check here. But I also couldn't think of a good sort key since all you have is dominate yes/no. Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * tree-vect-data-refs.cc (vect_analyze_early_break_dependences): New. (vect_analyze_data_ref_dependences): Use them. * tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Initialize early_breaks. (move_early_exit_stmts): New. (vect_transform_loop): use it/ * tree-vect-stmts.cc (vect_is_simple_use): Use vect_early_exit_def. * tree-vectorizer.h (enum vect_def_type): Add vect_early_exit_def. (class _loop_vec_info): Add early_breaks, early_break_conflict, early_break_vuses. (LOOP_VINFO_EARLY_BREAKS): New. (LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS): New. (LOOP_VINFO_EARLY_BRK_DEST_BB): New. (LOOP_VINFO_EARLY_BRK_VUSES): New. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-early-break_57.c: New test. * gcc.dg/vect/vect-early-break_79.c: New test. * gcc.dg/vect/vect-early-break_80.c: New test. * gcc.dg/vect/vect-early-break_81.c: New test. * gcc.dg/vect/vect-early-break_83.c: New test. --- inline copy of patch --- diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c index be4a0c7426093059ce37a9f824defb7ae270094d..9a4e795f92b7a8577ac71827f5cb0bd15d88ebe1 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c @@ -5,6 +5,7 @@ /* { dg-additional-options "-Ofast" } */ /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "epilog loop required" "vect" } } */ void abort (); diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_79.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_79.c new file mode 100644 index 0000000000000000000000000000000000000000..a26011ef1ba5aa000692babc90d46621efc2f8b5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_79.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* { dg-additional-options "-Ofast" } */ + +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ + +#undef N +#define N 32 + +unsigned vect_a[N]; +unsigned vect_b[N]; + +unsigned test4(unsigned x) +{ + unsigned ret = 0; + for (int i = 0; i < 1024; i++) + { + vect_b[i] = x + i; + if (vect_a[i] > x) + break; + vect_a[i] = x; + + } + return ret; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_80.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_80.c new file mode 100644 index 0000000000000000000000000000000000000000..ddf504e0c8787ae33a0e98045c1c91f2b9f533a9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_80.c @@ -0,0 +1,43 @@ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* { dg-additional-options "-Ofast" } */ + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ + +extern void abort (); + +int x; +__attribute__ ((noinline, noipa)) +void foo (int *a, int *b) +{ + int local_x = x; + for (int i = 0; i < 1024; ++i) + { + if (i + local_x == 13) + break; + a[i] = 2 * b[i]; + } +} + +int main () +{ + int a[1024] = {0}; + int b[1024] = {0}; + + for (int i = 0; i < 1024; i++) + b[i] = i; + + x = -512; + foo (a, b); + + if (a[524] != 1048) + abort (); + + if (a[525] != 0) + abort (); + + if (a[1023] != 0) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_81.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_81.c new file mode 100644 index 0000000000000000000000000000000000000000..c38e394ad87863f0702d422cb58018b979c9fba6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_81.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* { dg-additional-options "-Ofast" } */ + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "epilog loop required" "vect" } } */ +void abort (); + +unsigned short sa[32]; +unsigned short sc[32] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; +unsigned short sb[32] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; +unsigned int ia[32]; +unsigned int ic[32] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45, + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; +unsigned int ib[32] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45, + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + +int main2 (int n) +{ + int i; + for (i = 0; i < n - 3; i++) + { + if (sa[i+3] != sb[i] + sc[i] || ia[i+3] != ib[i] + ic[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_83.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_83.c new file mode 100644 index 0000000000000000000000000000000000000000..227dcf1b7ab2ace149e692a6aab41cdd5d47d098 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_83.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* { dg-additional-options "-Ofast" } */ + +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ + +#include <complex.h> + +#define N 1024 +complex double vect_a[N]; +complex double vect_b[N]; + +complex double test4(complex double x) +{ + complex double ret = 0; + for (int i = 0; i < N; i++) + { + volatile complex double z = vect_b[i]; + vect_b[i] = x + i + z; + if (vect_a[i] == x) + return i; + vect_a[i] += x * vect_b[i]; + + } + return ret; +} diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index d5c9c4a11c2e5d8fd287f412bfa86d081c2f8325..8e9e780e01fd349b30da1f0a762c0306ec257ff7 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -613,6 +613,377 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, return opt_result::success (); } +/* Funcion vect_analyze_early_break_dependences. + + Examime all the data references in the loop and make sure that if we have + mulitple exits that we are able to safely move stores such that they become + safe for vectorization. The function also calculates the place where to move + the instructions to and computes what the new vUSE chain should be. + + This works in tandem with the CFG that will be produced by + slpeel_tree_duplicate_loop_to_edge_cfg later on. + + This function tries to validate whether an early break vectorization + is possible for the current instruction sequence. Returns True i + possible, otherwise False. + + Requirements: + - Any memory access must be to a fixed size buffer. + - There must not be any loads and stores to the same object. + - Multiple loads are allowed as long as they don't alias. + + NOTE: + This implemementation is very conservative. Any overlappig loads/stores + that take place before the early break statement gets rejected aside from + WAR dependencies. + + i.e.: + + a[i] = 8 + c = a[i] + if (b[i]) + ... + + is not allowed, but + + c = a[i] + a[i] = 8 + if (b[i]) + ... + + is which is the common case. */ + +static opt_result +vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) +{ + DUMP_VECT_SCOPE ("vect_analyze_early_break_dependences"); + + /* - CHAIN: Currently detected sequence of instructions that need to be moved + if we are to vectorize this early break. + - FIXED: Sequences of SSA_NAMEs that must not be moved, they are reachable + from one or more cond conditions. If this set overlaps with CHAIN + then FIXED takes precedence. This deals with non-single use + cases. + - BASES: List of all load data references found during traversal. */ + hash_set<tree> chain, fixed; + auto_vec<data_reference *> bases; + basic_block dest_bb = NULL; + + hash_set <gimple *> visited; + use_operand_p use_p; + ssa_op_iter iter; + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + class loop *loop_nest = loop_outer (loop); + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "loop contains multiple exits, analyzing" + " statement dependencies.\n"); + + for (gimple *c : LOOP_VINFO_LOOP_CONDS (loop_vinfo)) + { + stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (c); + if (STMT_VINFO_TYPE (loop_cond_info) != loop_exit_ctrl_vec_info_type) + continue; + + gimple_stmt_iterator gsi = gsi_for_stmt (c); + + /* First determine the list of statements that we can't move because they + are required for the early break vectorization itself. */ + auto_vec <gimple *> workset; + workset.safe_push (c); + do { + gimple *op = workset.pop (); + if (visited.add (op) + || is_a <gphi *> (op) + || is_gimple_debug (op)) + continue; + + if (gimple_has_lhs (op)) + fixed.add (gimple_get_lhs (op)); + + stmt_vec_info def_info = loop_vinfo->lookup_stmt (op); + if (!def_info) + continue; + + gimple *def_stmt = STMT_VINFO_STMT (def_info); + FOR_EACH_SSA_USE_OPERAND (use_p, def_stmt, iter, SSA_OP_USE) + { + tree use = USE_FROM_PTR (use_p); + if (TREE_CODE (use) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (use)) + continue; + + if (gimple *g = SSA_NAME_DEF_STMT (use)) + workset.safe_push (g); + } + } while (!workset.is_empty ()); + + /* Now analyze all the remaining statements and try to determine which + instructions are allowed/needed to be moved. */ + while (!gsi_end_p (gsi)) + { + gimple *stmt = gsi_stmt (gsi); + gsi_prev (&gsi); + if (!gimple_has_ops (stmt) + || is_gimple_debug (stmt)) + continue; + + tree dest = NULL_TREE; + /* Try to find the SSA_NAME being defined. For Statements with an LHS + use the LHS, if not, assume that the first argument of a call is + the value being defined. e.g. MASKED_LOAD etc. */ + if (gimple_has_lhs (stmt)) + dest = gimple_get_lhs (stmt); + else if (const gcall *call = dyn_cast <const gcall *> (stmt)) + dest = gimple_arg (call, 0); + + bool move = chain.contains (dest); + + stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (stmt); + if (!stmt_vinfo) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "early breaks not supported. Unknown" + " statement: %G", stmt); + return opt_result::failure_at (c, + "can't safely apply code motion to " + "dependencies of %G to vectorize " + "the early exit.\n", c); + } + + auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo); + if (dr_ref) + { + /* We currently only support statically allocated objects due to + not having first-faulting loads support or peeling for + alignment support. Compute the size of the referenced object + (it could be dynamically allocated). */ + tree obj = DR_BASE_ADDRESS (dr_ref); + if (!obj || TREE_CODE (obj) != ADDR_EXPR) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "early breaks only supported on statically" + " allocated objects.\n"); + return opt_result::failure_at (c, + "can't safely apply code motion to " + "dependencies of %G to vectorize " + "the early exit.\n", c); + } + + tree refop = TREE_OPERAND (obj, 0); + tree refbase = get_base_address (refop); + if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase) + || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "early breaks only supported on" + " statically allocated objects.\n"); + return opt_result::failure_at (c, + "can't safely apply code motion to " + "dependencies of %G to vectorize " + "the early exit.\n", c); + } + + /* Check if vector accesses to the object will be within + bounds. */ + tree stype = TREE_TYPE (DECL_SIZE (refbase)); + tree access = fold_build2 (PLUS_EXPR, stype, DR_OFFSET (dr_ref), + DR_INIT (dr_ref)); + tree final_adj + = fold_build2 (MULT_EXPR, stype, LOOP_VINFO_NITERS (loop_vinfo), + DR_STEP (dr_ref)); + + /* must be a constant or assume loop will be versioned or niters + bounded by VF so accesses are within range. */ + if (TREE_CODE (access) == INTEGER_CST + && TREE_CODE (final_adj) == INTEGER_CST) + { + access = fold_build2 (PLUS_EXPR, stype, access, final_adj); + wide_int size = wi::to_wide (DECL_SIZE (refbase)); + wide_int off = wi::to_wide (access); + if (wi::ge_p (off, size, UNSIGNED)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "early breaks not supported:" + " vectorization would read beyond size" + " of object %T.\n", obj); + return opt_result::failure_at (c, + "can't safely apply code motion to " + "dependencies of %G to vectorize " + "the early exit.\n", c); + } + } + + if (DR_IS_READ (dr_ref)) + bases.safe_push (dr_ref); + else if (DR_IS_WRITE (dr_ref)) + { + /* We are moving writes down in the CFG. To be sure that this + is valid after vectorization we have to check all the loads + we are hoisting the stores past to see if any of them may + alias or are the same object. + + Same objects will not be an issue because unless the store + is marked volatile the value can be forwarded. If the + store is marked volatile we don't vectorize the loop + anyway. + + That leaves the check for aliasing. We don't really need + to care about the stores aliasing with each other since the + stores are moved in order so the effects are still observed + correctly. This leaves the check for WAR dependencies + which we would be introducing here if the DR can alias. + The check is quadratic in loads/stores but I have not found + a better API to do this. I believe all loads and stores + must be checked. We also must check them when we + encountered the store, since we don't care about loads past + the store. */ + + for (auto dr_read : bases) + if (dr_may_alias_p (dr_read, dr_ref, loop_nest)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, + vect_location, + "early breaks not supported: " + "overlapping loads and stores " + "found before the break " + "statement.\n"); + + return opt_result::failure_at (stmt, + "can't safely apply code motion to dependencies" + " to vectorize the early exit. %G may alias with" + " %G\n", stmt, dr_read->stmt); + } + + /* Any writes starts a new chain. */ + move = true; + } + } + + /* If a statement is live and escapes the loop through usage in the + loop epilogue then we can't move it since we need to maintain its + reachability through all exits. */ + bool skip = false; + if (STMT_VINFO_LIVE_P (stmt_vinfo) + && !(dr_ref && DR_IS_WRITE (dr_ref))) + { + imm_use_iterator imm_iter; + use_operand_p use_p; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, dest) + { + basic_block bb = gimple_bb (USE_STMT (use_p)); + skip = bb == LOOP_VINFO_IV_EXIT (loop_vinfo)->dest; + if (skip) + break; + } + } + + /* If we found the defining statement of a something that's part of + the chain then expand the chain with the new SSA_VARs being + used. */ + if (!skip && move) + { + use_operand_p use_p; + ssa_op_iter iter; + FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) + { + tree op = USE_FROM_PTR (use_p); + gcc_assert (TREE_CODE (op) == SSA_NAME); + if (fixed.contains (dest)) + { + move = false; + fixed.add (op); + } + else + chain.add (op); + } + + if (dump_enabled_p ()) + { + if (move) + dump_printf_loc (MSG_NOTE, vect_location, + "found chain %G", stmt); + else + dump_printf_loc (MSG_NOTE, vect_location, + "ignored chain %G, not single use", stmt); + } + } + + if (move) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "==> recording stmt %G", stmt); + + /* If we've moved a VDEF, extract the defining MEM and update + usages of it. */ + tree vdef; + /* This statement is to be moved. */ + if ((vdef = gimple_vdef (stmt))) + LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).safe_push ( + stmt); + } + + if (gimple_vuse (stmt) && !gimple_vdef (stmt)) + { + LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).safe_insert (0, stmt); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "marked statement for vUSE update: %G", stmt); + } + } + + /* Save destination as we go, BB are visited in order and the last one + is where statements should be moved to. */ + if (!dest_bb) + dest_bb = gimple_bb (c); + else + { + basic_block curr_bb = gimple_bb (c); + if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb)) + dest_bb = curr_bb; + } + + /* Mark the statement as a condition. */ + STMT_VINFO_DEF_TYPE (loop_cond_info) = vect_condition_def; + } + + basic_block dest_bb0 = EDGE_SUCC (dest_bb, 0)->dest; + basic_block dest_bb1 = EDGE_SUCC (dest_bb, 1)->dest; + dest_bb = flow_bb_inside_loop_p (loop, dest_bb0) ? dest_bb0 : dest_bb1; + /* We don't allow outer -> inner loop transitions which should have been + trapped already during loop form analysis. */ + gcc_assert (dest_bb->loop_father == loop); + + gcc_assert (dest_bb); + LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb; + + if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ()) + { + /* All uses shall be updated to that of the first load. Entries are + stored in reverse order. */ + tree vuse = gimple_vuse (LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).last ()); + for (auto g : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "will update use: %T, mem_ref: %G", vuse, g); + } + } + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "recorded statements to be moved to BB %d\n", + LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo)->index); + + return opt_result::success (); +} + /* Function vect_analyze_data_ref_dependences. Examine all the data references in the loop, and make sure there do not @@ -657,6 +1028,11 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, return res; } + /* If we have early break statements in the loop, check to see if they + are of a form we can vectorizer. */ + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) + return vect_analyze_early_break_dependences (loop_vinfo); + return opt_result::success (); } diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index fb8d999ee6bfaff551ac06ac2f3aea5354914659..0a90d2860b8d037b72fd41d4240804aa390467ea 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -1040,6 +1040,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) partial_load_store_bias (0), peeling_for_gaps (false), peeling_for_niter (false), + early_breaks (false), no_data_dependencies (false), has_mask_store (false), scalar_loop_scaling (profile_probability::uninitialized ()), @@ -11548,6 +11549,56 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) epilogue_vinfo->shared->save_datarefs (); } +/* When vectorizing early break statements instructions that happen before + the early break in the current BB need to be moved to after the early + break. This function deals with that and assumes that any validity + checks has already been performed. + + While moving the instructions if it encounters a VUSE or VDEF it then + corrects the VUSES as it moves the statements along. GDEST is the location + in which to insert the new statements. */ + +static void +move_early_exit_stmts (loop_vec_info loop_vinfo) +{ + DUMP_VECT_SCOPE ("move_early_exit_stmts"); + + if (LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).is_empty ()) + return; + + /* Move all stmts that need moving. */ + basic_block dest_bb = LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo); + gimple_stmt_iterator dest_gsi = gsi_start_bb (dest_bb); + + for (gimple *stmt : LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo)) + { + /* Check to see if statement is still required for vect or has been + elided. */ + auto stmt_info = loop_vinfo->lookup_stmt (stmt); + if (!stmt_info) + continue; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "moving stmt %G", stmt); + + gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt); + gsi_move_before (&stmt_gsi, &dest_gsi); + gsi_prev (&dest_gsi); + } + + /* Update all the stmts with their new reaching VUSES. */ + tree vuse + = gimple_vuse (LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).last ()); + for (auto p : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "updating vuse to %T for load %G", vuse, p); + gimple_set_vuse (p, vuse); + update_stmt (p); + } +} + /* Function vect_transform_loop. The analysis phase has determined that the loop is vectorizable. @@ -11697,6 +11748,11 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo)); } + /* Handle any code motion that we need to for early-break vectorization after + we've done peeling but just before we start vectorizing. */ + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) + move_early_exit_stmts (loop_vinfo); + /* FORNOW: the vectorizer supports only loops which body consist of one basic block (header + empty latch). When the vectorizer will support more involved loop forms, the order by which the BBs are diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 96e4a6cffadebb43946c5cb7e9849c915da589bc..b3a09c0a804a38e17ef32b6ce13b98b077459fc7 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -359,8 +359,8 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, *live_p = false; /* cond stmt other than loop exit cond. */ - if (is_ctrl_stmt (stmt_info->stmt) - && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type) + gimple *stmt = STMT_VINFO_STMT (stmt_info); + if (dyn_cast <gcond *> (stmt)) *relevant = vect_used_in_scope; /* changing memory. */ @@ -13530,6 +13530,9 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt, case vect_first_order_recurrence: dump_printf (MSG_NOTE, "first order recurrence\n"); break; + case vect_condition_def: + dump_printf (MSG_NOTE, "control flow\n"); + break; case vect_unknown_def_type: dump_printf (MSG_NOTE, "unknown\n"); break; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index e4d7ab4567cef3c018b958f98eeff045d3477725..3c9478a3dc8750c71e0bf2a36a5b0815afc3fd94 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -66,6 +66,7 @@ enum vect_def_type { vect_double_reduction_def, vect_nested_cycle, vect_first_order_recurrence, + vect_condition_def, vect_unknown_def_type }; @@ -888,6 +889,10 @@ public: we need to peel off iterations at the end to form an epilogue loop. */ bool peeling_for_niter; + /* When the loop has early breaks that we can vectorize we need to peel + the loop for the break finding loop. */ + bool early_breaks; + /* List of loop additional IV conditionals found in the loop. */ auto_vec<gcond *> conds; @@ -942,6 +947,20 @@ public: /* The controlling loop IV for the scalar loop being vectorized. This IV controls the natural exits of the loop. */ edge scalar_loop_iv_exit; + + /* Used to store the list of statements needing to be moved if doing early + break vectorization as they would violate the scalar loop semantics if + vectorized in their current location. These are stored in order that they need + to be moved. */ + auto_vec<gimple *> early_break_conflict; + + /* The final basic block where to move statements to. In the case of + multiple exits this could be pretty far away. */ + basic_block early_break_dest_bb; + + /* Statements whose VUSES need updating if early break vectorization is to + happen. */ + auto_vec<gimple*> early_break_vuses; } *loop_vec_info; /* Access Functions. */ @@ -996,6 +1015,10 @@ public: #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter +#define LOOP_VINFO_EARLY_BREAKS(L) (L)->early_breaks +#define LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS(L) (L)->early_break_conflict +#define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb +#define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses #define LOOP_VINFO_LOOP_CONDS(L) (L)->conds #define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
rb17963.patch
Description: rb17963.patch