Thanks,
Tamar
gcc/ChangeLog:
PR tree-optimization/115120
PR tree-optimization/119577
PR tree-optimization/119860
* tree-vect-loop-manip.cc (vect_can_advance_ivs_p): Check for nonlinear
mult induction and early break.
(vect_update_ivs_after_vectorizer): Support early break exits.
(vect_do_peeling): Support scalar IVs.
* tree-vect-loop.cc (vect_peel_nonlinear_iv_init): Support early break.
(vect_update_nonlinear_iv): use `unsigned_type_for` such that function
works for both vector and scalar types.
(vectorizable_induction, vectorizable_live_operation): Remove vector
early break IV code.
(vect_update_ivs_after_vectorizer_for_early_breaks): New.
(vect_transform_loop): Support new scalar IV for early break.
* tree-vect-slp.cc (vect_analyze_slp): Remove SLP build for early break
IVs.
* tree-vect-stmts.cc (vect_stmt_relevant_p): No longer mark early break
IVs as completely unused rather than used_only_live. They no longer
contribute to the vector loop and so should not be analyzed.
(can_vectorize_live_stmts): Remove vector early vreak IV code.
* tree-vectorizer.h (LOOP_VINFO_EARLY_BRK_NITERS_VAR): New.
(class loop_vec_info): Add early_break_niters_var.
gcc/testsuite/ChangeLog:
PR tree-optimization/115120
PR tree-optimization/119577
PR tree-optimization/119860
* gcc.dg/vect/vect-early-break_39.c: Update.
* gcc.dg/vect/vect-early-break_139.c: New testcase.
* gcc.target/aarch64/sve/peel_ind_10.c: Update.
* gcc.target/aarch64/sve/peel_ind_11.c: Update.
* gcc.target/aarch64/sve/peel_ind_12.c: Update.
* gcc.target/aarch64/sve/peel_ind_5.c: Update.
* gcc.target/aarch64/sve/peel_ind_6.c: Update.
* gcc.target/aarch64/sve/peel_ind_7.c: Update.
* gcc.target/aarch64/sve/peel_ind_9.c: Update.
---
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_139.c
b/gcc/testsuite/gcc.dg/vect/vect-early-break_139.c
new file mode 100644
index
0000000000000000000000000000000000000000..95994936c432f1205c82f9022ec4599e04522c37
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_139.c
@@ -0,0 +1,37 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+#include "tree-vect.h"
+
+__attribute__((noipa))
+unsigned loop9(unsigned char *a, unsigned n, unsigned c)
+{
+ for (unsigned j = 0;;)
+ {
+ if (c <= j)
+ __builtin_abort();
+
+ unsigned char *slot = (unsigned char *)a + j;
+
+ *slot = (char)j;
+
+ unsigned d = j + 1;
+ if (d < n)
+ j = d;
+ else
+ return d;
+ }
+}
+
+int main ()
+{
+ check_vect ();
+
+ unsigned char buff[16] = {0};
+ unsigned res = loop9 (buff, 16, 20);
+ if (res != 16)
+ __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_39.c
b/gcc/testsuite/gcc.dg/vect/vect-early-break_39.c
index
b3f40b8c9ba49e41bd283e46a462238c3b5825ef..bc862ad20e68db8f3c0ba6facf47e13a56a7cd6d
100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_39.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_39.c
@@ -23,5 +23,6 @@ unsigned test4(unsigned x, unsigned n)
return ret;
}
-/* cannot safely vectorize this due due to the group misalignment. */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 "vect"
} } */
+/* AArch64 will scalarize the load and is able to vectorize it. */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 "vect"
{ target aarch64*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 "vect"
{ target { ! aarch64*-*-* } } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10.c
b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10.c
index
b7a7bc5cb0cfdfdb74adb120c54ba15019832cf1..43abd01c078da7d3f80045ecbd37b72ac918f678
100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10.c
@@ -20,5 +20,4 @@ foo (int start)
}
/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
/* { dg-final { scan-tree-dump "Alignment of access forced using peeling"
"vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c
b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c
index
feb7ee7d61c92145e8defc095f2ad096b1e3f777..37806adea7b9788d3122fa32148a8709d5cf57be
100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c
@@ -15,6 +15,5 @@ foo (int *a) {
}
/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
/* { dg-final { scan-tree-dump "Alignment of access forced using peeling"
"vect" } } */
/* { dg-final { scan-assembler {\tnot\tp[0-7]\.b, p[0-7]/z, p.*\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c
b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c
index
260482a94df750b7886d72eed1964e70288c0886..e3ed63afb05cbef15d3c58a18acb0f3650161223
100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c
@@ -15,7 +15,6 @@ foo (int *restrict a, int * restrict b) {
}
/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
/* { dg-final { scan-tree-dump "Both peeling and versioning will be applied"
"vect" } } */
/* { dg-final { scan-assembler {\tnot\tp[0-7]\.b, p[0-7]/z, p.*\n} } } */
/* { dg-final { scan-assembler {\teor\t.*\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_5.c
b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_5.c
index
a03bb1dec21ef75aa0cbfb22c8bb02b99644239e..1977bf3af2db247825900c4200676f4dc2ca4f9a
100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_5.c
@@ -20,5 +20,4 @@ foo (void)
}
/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
/* { dg-final { scan-tree-dump "Alignment of access forced using peeling"
"vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_6.c
b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_6.c
index
9bfd1a65c4feb0c140d4abf98508fc8af08042ba..0b40d26ae2a3f3c882a7e571140f9efabcf9c41a
100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_6.c
@@ -20,5 +20,4 @@ foo (int start)
}
/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
/* { dg-final { scan-tree-dump "Alignment of access forced using peeling"
"vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_7.c
b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_7.c
index
0182e131a173b7b05e88c3393ba854b2da25c6b2..7a24d689e95a65aa65e1ec6558d117d19407a2c6
100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_7.c
@@ -20,5 +20,4 @@ foo (void)
}
/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
/* { dg-final { scan-tree-dump "Alignment of access forced using peeling"
"vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_9.c
b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_9.c
index
cc904e88170f072e1d3c6be86643d99a7cd5cb12..136d18c2ea89f5a93a1edfc24fe8b7f97bae82d8
100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_9.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_9.c
@@ -20,6 +20,6 @@ foo (void)
}
/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* Peels using a scalar loop. */
-/* { dg-final { scan-tree-dump-not "pfa_iv_offset" "vect" } } */
+/* Peels using fully masked loop. */
+/* { dg-final { scan-tree-dump "misalignment for fully-masked loop" "vect" } }
*/
/* { dg-final { scan-tree-dump "Alignment of access forced using peeling"
"vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
index
1ebc735a82f4a59d8eccff39346e46a449b4729a..1aca6c7de1d4196fb12bf3202258229a6ec3995d
100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
@@ -33,6 +33,5 @@ foo (void)
}
/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
/* { dg-final { scan-tree-dump "Alignment of access forced using peeling"
"vect" } } */
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index
9ddf9acf2f190958cf2594385f42fa6d58a61bc3..43847c4c3fbdbe7b8364d30e0b614b39cbabf367
100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2161,6 +2161,16 @@ vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
return false;
}
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+ && induction_type == vect_step_op_mul)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Peeling for is not supported for nonlinear mult"
+ " induction using partial vectorization.\n");
+ return false;
+ }
+
/* Avoid compile time hog on vect_peel_nonlinear_iv_init. */
if (induction_type == vect_step_op_mul)
{
@@ -2315,6 +2325,9 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
The phi args associated with the edge UPDATE_E in the bb
UPDATE_E->dest are updated accordingly.
+ - EARLY_EXIT_P - Indicates whether the exit is an early exit rather than
+ the main latch exit.
+
Assumption 1: Like the rest of the vectorizer, this function assumes
a single loop exit that has a single predecessor.
@@ -2333,7 +2346,8 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
static void
vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
- tree niters, edge update_e)
+ tree niters, edge update_e,
+ bool early_exit_p)
{
gphi_iterator gsi, gsi1;
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -2400,15 +2414,16 @@ vect_update_ivs_after_vectorizer (loop_vec_info
loop_vinfo,
else
ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
niters, step_expr,
- induction_type);
+ induction_type, early_exit_p);
var = create_tmp_var (type, "tmp");
gimple_seq new_stmts = NULL;
ni_name = force_gimple_operand (ni, &new_stmts, false, var);
- /* Exit_bb shouldn't be empty. */
- if (!gsi_end_p (last_gsi))
+ /* Exit_bb shouldn't be empty, but we also can't insert after a ctrl
+ statements. */
+ if (!gsi_end_p (last_gsi) && !is_ctrl_stmt (gsi_stmt (last_gsi)))
{
gsi_insert_seq_after (&last_gsi, stmts, GSI_SAME_STMT);
gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
@@ -2419,8 +2434,15 @@ vect_update_ivs_after_vectorizer (loop_vec_info
loop_vinfo,
gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
}
- /* Fix phi expressions in the successor bb. */
- adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
+ /* Fix phi expressions in all out of loop bb. */
+ imm_use_iterator imm_iter;
+ gimple *use_stmt;
+ use_operand_p use_p;
+ tree ic_var = PHI_ARG_DEF_FROM_EDGE (phi1, update_e);
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, ic_var)
+ if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
+ FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+ SET_USE (use_p, ni_name);
}
}
@@ -3562,14 +3584,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
update_e = single_succ_edge (LOOP_VINFO_IV_EXIT (loop_vinfo)->dest);
- /* If we have a peeled vector iteration, all exits are the same, leave it
- and so the main exit needs to be treated the same as the alternative
- exits in that we leave their updates to vectorizable_live_operations.
- */
- if (!LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
- vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
- update_e);
-
/* If we have a peeled vector iteration we will never skip the epilog
loop
and we can simplify the cfg a lot by not doing the edge split. */
if (skip_epilog
@@ -3625,6 +3639,41 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
scale_loop_profile (epilog, prob_epilog, -1);
}
+ /* If we have a peeled vector iteration, all exits are the same, leave it
+ and so the main exit needs to be treated the same as the alternative
+ exits in that we leave their updates to vectorizable_live_operations.
+ */
+ tree vector_iters_vf = niters_vector_mult_vf;
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+ {
+ tree scal_iv_ty = signed_type_for (TREE_TYPE (vector_iters_vf));
+ tree tmp_niters_vf = make_ssa_name (scal_iv_ty);
+ basic_block exit_bb = NULL;
+ edge update_e = NULL;
+
+ /* Identify the early exit merge block. I wish we had stored this.
*/
+ for (auto e : get_loop_exit_edges (loop))
+ if (e != LOOP_VINFO_IV_EXIT (loop_vinfo))
+ {
+ exit_bb = e->dest;
+ update_e = single_succ_edge (exit_bb);
+ break;
+ }
+ vect_update_ivs_after_vectorizer (loop_vinfo, tmp_niters_vf,
+ update_e, true);
+
+ if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
+ vector_iters_vf = tmp_niters_vf;
+
+ LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo) = tmp_niters_vf;
+ }
+
+ bool recalculate_peel_niters_init
+ = LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo);
+ vect_update_ivs_after_vectorizer (loop_vinfo, vector_iters_vf,
+ update_e,
+ recalculate_peel_niters_init);
+
/* Recalculate the dominators after adding the guard edge. */
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
iterate_fix_dominators (CDI_DOMINATORS, doms, false);
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index
ab6c0f084703d38d8f531fbb1c12ac16a4f6cdbf..3ac264f0ce32de5d307c4c4b623d3490ee9c41d2
100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -8951,14 +8951,25 @@ vect_create_nonlinear_iv_init (gimple_seq* stmts, tree
init_expr,
tree
vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree init_expr,
tree skip_niters, tree step_expr,
- enum vect_induction_op_type induction_type)
+ enum vect_induction_op_type induction_type,
+ bool early_exit_p)
{
- gcc_assert (TREE_CODE (skip_niters) == INTEGER_CST);
+ gcc_assert (TREE_CODE (skip_niters) == INTEGER_CST || early_exit_p);
tree type = TREE_TYPE (init_expr);
unsigned prec = TYPE_PRECISION (type);
switch (induction_type)
{
+ /* neg inductions are typically not used for loop termination conditions
but
+ are typically implemented as b = -b. That is every scalar iteration b
is
+ negated. That means that for the initial value of b we will have to
+ determine whether the number of skipped iteration is a multiple of 2
+ because every 2 scalar iterations we are back at "b". */
case vect_step_op_neg:
+ /* For early exits the neg induction will always be the same value at the
+ start of the iteration. */
+ if (early_exit_p)
+ break;
+
if (TREE_INT_CST_LOW (skip_niters) % 2)
init_expr = gimple_build (stmts, NEGATE_EXPR, type, init_expr);
/* else no change. */
@@ -8966,13 +8977,15 @@ vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree
init_expr,
case vect_step_op_shr:
case vect_step_op_shl:
- skip_niters = gimple_convert (stmts, type, skip_niters);
- step_expr = gimple_build (stmts, MULT_EXPR, type, step_expr,
skip_niters);
+ skip_niters = fold_build1 (NOP_EXPR, type, skip_niters);
+ step_expr = fold_build1 (NOP_EXPR, type, step_expr);
+ step_expr = fold_build2 (MULT_EXPR, type, step_expr, skip_niters);
/* When shift mount >= precision, need to avoid UD.
In the original loop, there's no UD, and according to semantic,
init_expr should be 0 for lshr, ashl, and >>= (prec - 1) for ashr. */
- if (!tree_fits_uhwi_p (step_expr)
+ if ((!tree_fits_uhwi_p (step_expr)
|| tree_to_uhwi (step_expr) >= prec)
+ && !early_exit_p)
{
if (induction_type == vect_step_op_shl
|| TYPE_UNSIGNED (type))
@@ -8983,13 +8996,19 @@ vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree
init_expr,
wide_int_to_tree (type, prec - 1));
}
else
- init_expr = gimple_build (stmts, (induction_type == vect_step_op_shr
+ {
+ init_expr = fold_build2 ((induction_type == vect_step_op_shr
? RSHIFT_EXPR : LSHIFT_EXPR),
- type, init_expr, step_expr);
+ type, init_expr, step_expr);
+ init_expr = force_gimple_operand (init_expr, stmts, false, NULL);
+ }
break;
case vect_step_op_mul:
{
+ /* Due to UB we can't support vect_step_op_mul with early break for now.
+ so assert and block. */
+ gcc_assert (TREE_CODE (skip_niters) == INTEGER_CST);
tree utype = unsigned_type_for (type);
init_expr = gimple_convert (stmts, utype, init_expr);
wide_int skipn = wi::to_wide (skip_niters);
@@ -9073,9 +9092,7 @@ vect_update_nonlinear_iv (gimple_seq* stmts, tree vectype,
case vect_step_op_mul:
{
/* Use unsigned mult to avoid UD integer overflow. */
- tree uvectype
- = build_vector_type (unsigned_type_for (TREE_TYPE (vectype)),
- TYPE_VECTOR_SUBPARTS (vectype));
+ tree uvectype = unsigned_type_for (vectype);
vec_def = gimple_convert (stmts, uvectype, vec_def);
vec_step = gimple_convert (stmts, uvectype, vec_step);
vec_def = gimple_build (stmts, MULT_EXPR, uvectype,
@@ -9322,7 +9339,7 @@ vectorizable_nonlinear_induction (loop_vec_info
loop_vinfo,
to adjust the start value here. */
if (niters_skip != NULL_TREE)
init_expr = vect_peel_nonlinear_iv_init (&stmts, init_expr, niters_skip,
- step_expr, induction_type);
+ step_expr, induction_type, false);
vec_init = vect_create_nonlinear_iv_init (&stmts, init_expr,
step_expr, nunits, vectype,
@@ -9703,53 +9720,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
peel_mul = gimple_build_vector_from_val (&init_stmts,
step_vectype, peel_mul);
-
- /* If early break then we have to create a new PHI which we can use as
- an offset to adjust the induction reduction in early exits.
-
- This is because when peeling for alignment using masking, the first
- few elements of the vector can be inactive. As such if we find the
- entry in the first iteration we have adjust the starting point of
- the scalar code.
-
- We do this by creating a new scalar PHI that keeps track of whether
- we are the first iteration of the loop (with the additional masking)
- or whether we have taken a loop iteration already.
-
- The generated sequence:
-
- pre-header:
- bb1:
- i_1 = <number of leading inactive elements>
-
- header:
- bb2:
- i_2 = PHI <i_1(bb1), 0(latch)>
- …
-
- early-exit:
- bb3:
- i_3 = iv_step * i_2 + PHI<vector-iv>
-
- The first part of the adjustment to create i_1 and i_2 are done here
- and the last part creating i_3 is done in
- vectorizable_live_operations when the induction extraction is
- materialized. */
- if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
- && !LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo))
- {
- auto skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
- tree ty_skip_niters = TREE_TYPE (skip_niters);
- tree break_lhs_phi = vect_get_new_vect_var (ty_skip_niters,
- vect_scalar_var,
- "pfa_iv_offset");
- gphi *nphi = create_phi_node (break_lhs_phi, bb);
- add_phi_arg (nphi, skip_niters, pe, UNKNOWN_LOCATION);
- add_phi_arg (nphi, build_zero_cst (ty_skip_niters),
- loop_latch_edge (iv_loop), UNKNOWN_LOCATION);
-
- LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo) = PHI_RESULT (nphi);
- }
}
tree step_mul = NULL_TREE;
unsigned ivn;
@@ -10325,8 +10295,7 @@ vectorizable_live_operation (vec_info *vinfo,
stmt_vec_info stmt_info,
to the latch then we're restarting the iteration in the
scalar loop. So get the first live value. */
bool early_break_first_element_p
- = (all_exits_as_early_p || !main_exit_edge)
- && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def;
+ = all_exits_as_early_p || !main_exit_edge;
if (early_break_first_element_p)
{
tmp_vec_lhs = vec_lhs0;
@@ -10335,52 +10304,13 @@ vectorizable_live_operation (vec_info *vinfo,
stmt_vec_info stmt_info,
gimple_stmt_iterator exit_gsi;
tree new_tree
- = vectorizable_live_operation_1 (loop_vinfo,
- e->dest, vectype,
- slp_node, bitsize,
- tmp_bitstart, tmp_vec_lhs,
- lhs_type, &exit_gsi);
+ = vectorizable_live_operation_1 (loop_vinfo,
+ e->dest, vectype,
+ slp_node, bitsize,
+ tmp_bitstart, tmp_vec_lhs,
+ lhs_type, &exit_gsi);
auto gsi = gsi_for_stmt (use_stmt);
- if (early_break_first_element_p
- && LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo))
- {
- tree step_expr
- = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
- tree break_lhs_phi
- = LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo);
- tree ty_skip_niters = TREE_TYPE (break_lhs_phi);
- gimple_seq iv_stmts = NULL;
-
- /* Now create the PHI for the outside loop usage to
- retrieve the value for the offset counter. */
- tree rphi_step
- = gimple_convert (&iv_stmts, ty_skip_niters, step_expr);
- tree tmp2
- = gimple_build (&iv_stmts, MULT_EXPR,
- ty_skip_niters, rphi_step,
- break_lhs_phi);
-
- if (POINTER_TYPE_P (TREE_TYPE (new_tree)))
- {
- tmp2 = gimple_convert (&iv_stmts, sizetype, tmp2);
- tmp2 = gimple_build (&iv_stmts, POINTER_PLUS_EXPR,
- TREE_TYPE (new_tree), new_tree,
- tmp2);
- }
- else
- {
- tmp2 = gimple_convert (&iv_stmts, TREE_TYPE (new_tree),
- tmp2);
- tmp2 = gimple_build (&iv_stmts, PLUS_EXPR,
- TREE_TYPE (new_tree), new_tree,
- tmp2);
- }
-
- new_tree = tmp2;
- gsi_insert_seq_before (&exit_gsi, iv_stmts, GSI_SAME_STMT);
- }
-
tree lhs_phi = gimple_phi_result (use_stmt);
remove_phi_node (&gsi, false);
gimple *copy = gimple_build_assign (lhs_phi, new_tree);
@@ -11021,6 +10951,101 @@ move_early_exit_stmts (loop_vec_info loop_vinfo)
SET_PHI_ARG_DEF_ON_EDGE (phi, e, last_seen_vuse);
}
+/* Generate adjustment code for early break scalar IVs filling in the value
+ we created earlier on for LOOP_VINFO_EARLY_BRK_NITERS_VAR. */
+
+static void
+vect_update_ivs_after_vectorizer_for_early_breaks (loop_vec_info loop_vinfo)
+{
+ DUMP_VECT_SCOPE ("vect_update_ivs_after_vectorizer_for_early_breaks");
+
+ if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+ return;
+
+ gcc_assert (LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo));
+
+ tree phi_var = LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo);
+ tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ tree ty_var = TREE_TYPE (phi_var);
+ auto loop = LOOP_VINFO_LOOP (loop_vinfo);
+ tree induc_var = niters_skip ? copy_ssa_name (phi_var) : phi_var;
+
+ auto induction_phi = create_phi_node (induc_var, loop->header);
+ tree induc_def = PHI_RESULT (induction_phi);
+
+ /* Create the iv update inside the loop. */
+ gimple_seq init_stmts = NULL;
+ gimple_seq stmts = NULL;
+ gimple_seq iv_stmts = NULL;
+ tree tree_vf = build_int_cst (ty_var, vf);
+
+ /* For loop len targets we have to use .SELECT_VL (ivtmp_33, VF); instead of
+ just += VF as the VF can change in between two loop iterations. */
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ {
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ tree_vf = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
+ NULL_TREE, 0, 0);
+ }
+
+ tree iter_var;
+ if (POINTER_TYPE_P (ty_var))
+ {
+ tree offset = gimple_convert (&stmts, sizetype, tree_vf);
+ iter_var = gimple_build (&stmts, POINTER_PLUS_EXPR, ty_var, induc_def,
+ gimple_convert (&stmts, sizetype, offset));
+ }
+ else
+ {
+ tree offset = gimple_convert (&stmts, ty_var, tree_vf);
+ iter_var = gimple_build (&stmts, PLUS_EXPR, ty_var, induc_def, offset);
+ }
+
+ tree init_var = build_zero_cst (ty_var);
+ if (niters_skip)
+ init_var = gimple_build (&init_stmts, MINUS_EXPR, ty_var, init_var,
+ gimple_convert (&init_stmts, ty_var, niters_skip));
+
+ add_phi_arg (induction_phi, iter_var,
+ loop_latch_edge (loop), UNKNOWN_LOCATION);
+ add_phi_arg (induction_phi, init_var,
+ loop_preheader_edge (loop), UNKNOWN_LOCATION);
+
+ /* Find the first insertion point in the BB. */
+ auto pe = loop_preheader_edge (loop);
+
+ /* If we've done any peeling, calculate the peeling adjustment needed to the
+ final IV. */
+ if (niters_skip)
+ {
+ induc_def = gimple_build (&iv_stmts, MAX_EXPR, TREE_TYPE (induc_def),
+ induc_def,
+ build_zero_cst (TREE_TYPE (induc_def)));
+ auto stmt = gimple_build_assign (phi_var, induc_def);
+ gimple_seq_add_stmt_without_update (&iv_stmts, stmt);
+ basic_block exit_bb = NULL;
+ /* Identify the early exit merge block. I wish we had stored this. */
+ for (auto e : get_loop_exit_edges (loop))
+ if (e != LOOP_VINFO_IV_EXIT (loop_vinfo))
+ {
+ exit_bb = e->dest;
+ break;
+ }
+
+ gcc_assert (exit_bb);
+ auto exit_gsi = gsi_after_labels (exit_bb);
+ gsi_insert_seq_before (&exit_gsi, iv_stmts, GSI_SAME_STMT);
+ }
+ /* Write the init_stmts in the loop-preheader block. */
+ auto psi = gsi_last_nondebug_bb (pe->src);
+ gsi_insert_seq_after (&psi, init_stmts, GSI_LAST_NEW_STMT);
+ /* Wite the adjustments in the header block. */
+ basic_block bb = loop->header;
+ auto si = gsi_after_labels (bb);
+ gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+}
+
/* Function vect_transform_loop.
The analysis phase has determined that the loop is vectorizable.
@@ -11165,7 +11190,10 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple
*loop_vectorized_call)
/* Handle any code motion that we need to for early-break vectorization
after
we've done peeling but just before we start vectorizing. */
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
- move_early_exit_stmts (loop_vinfo);
+ {
+ vect_update_ivs_after_vectorizer_for_early_breaks (loop_vinfo);
+ move_early_exit_stmts (loop_vinfo);
+ }
/* Remove existing clobber stmts and prefetches. */
for (i = 0; i < nbbs; i++)
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index
5b0de9291cb0fb85e3cc4575dd6731b26d03ecd7..658ad6dc25798da2464bd91ac6194d4b711e6612
100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -5885,48 +5885,6 @@ vect_analyze_slp (vec_info *vinfo, unsigned
max_tree_size,
"SLP build failed.\n");
}
}
-
- /* Find and create slp instances for inductions that have been forced
- live due to early break. */
- edge latch_e = loop_latch_edge (LOOP_VINFO_LOOP (loop_vinfo));
- for (auto stmt_info : LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo))
- {
- vec<stmt_vec_info> stmts;
- vec<stmt_vec_info> roots = vNULL;
- vec<tree> remain = vNULL;
- gphi *phi = as_a<gphi *> (STMT_VINFO_STMT (stmt_info));
- tree def = gimple_phi_arg_def_from_edge (phi, latch_e);
- stmt_vec_info lc_info = loop_vinfo->lookup_def (def);
- if (lc_info)
- {
- stmts.create (1);
- stmts.quick_push (vect_stmt_to_vectorize (lc_info));
- if (! vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
- stmts, roots, remain,
- max_tree_size, &limit,
- bst_map, force_single_lane))
- return opt_result::failure_at (vect_location,
- "SLP build failed.\n");
- }
- /* When the latch def is from a different cycle this can only
- be a induction. Build a simple instance for this.
- ??? We should be able to start discovery from the PHI
- for all inductions, but then there will be stray
- non-SLP stmts we choke on as needing non-SLP handling. */
- auto_vec<stmt_vec_info, 1> tem;
- tem.quick_push (stmt_info);
- if (!bst_map->get (tem))
- {
- stmts.create (1);
- stmts.quick_push (stmt_info);
- if (! vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
- stmts, roots, remain,
- max_tree_size, &limit,
- bst_map, force_single_lane))
- return opt_result::failure_at (vect_location,
- "SLP build failed.\n");
- }
- }
}
hash_set<slp_tree> visited_patterns;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index
de28316ddc660020be4bc462f26316f59402d911..1d7e50afcde1096d5598b43ab8d49454eb68385b
100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -356,7 +356,6 @@ is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
- it has uses outside the loop.
- it has vdefs (it alters memory).
- control stmts in the loop (except for the exit condition).
- - it is an induction and we have multiple exits.
CHECKME: what other side effects would the vectorizer allow? */
@@ -418,26 +417,6 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
}
}
- /* Check if it's a not live PHI and multiple exits. In this case
- there will be a usage later on after peeling which is needed for the
- alternate exit.
- ??? Unless the PHI was marked live because of early
- break, which also needs the latch def live and vectorized. */
- if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
- && is_a <gphi *> (stmt)
- && gimple_bb (stmt) == LOOP_VINFO_LOOP (loop_vinfo)->header
- && ((! VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))
- && ! *live_p)
- || STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "vec_stmt_relevant_p: PHI forced live for "
- "early break.\n");
- LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo).safe_push (stmt_info);
- *live_p = true;
- }
-
if (*live_p && *relevant == vect_unused_in_scope
&& !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
{
@@ -12985,17 +12964,12 @@ can_vectorize_live_stmts (vec_info *vinfo,
bool vec_stmt_p,
stmt_vector_for_cost *cost_vec)
{
- loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
stmt_vec_info slp_stmt_info;
unsigned int i;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
{
if (slp_stmt_info
- && (STMT_VINFO_LIVE_P (slp_stmt_info)
- || (loop_vinfo
- && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
- && STMT_VINFO_DEF_TYPE (slp_stmt_info)
- == vect_induction_def))
+ && STMT_VINFO_LIVE_P (slp_stmt_info)
&& !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
slp_node_instance, i,
vec_stmt_p, cost_vec))
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index
5d125afa6bc5166c04dbc68e968ca7207685abb8..0356b129e36f825c6504fca99b0cf65b9c09e325
100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1241,6 +1241,10 @@ public:
happen. */
auto_vec<gimple*> early_break_vuses;
+ /* The IV adjustment value for inductions that needs to be materialized
+ inside the relavent exit blocks in order to adjust for early break. */
+ tree early_break_niters_var;
+
/* Record statements that are needed to be live for early break
vectorization
but may not have an LC PHI node materialized yet in the exits. */
auto_vec<stmt_vec_info> early_break_live_ivs;
@@ -1308,6 +1312,7 @@ public:
(L)->early_break_live_ivs
#define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb
#define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses
+#define LOOP_VINFO_EARLY_BRK_NITERS_VAR(L) (L)->early_break_niters_var
#define LOOP_VINFO_LOOP_CONDS(L) (L)->conds
#define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond
#define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
@@ -2716,7 +2721,8 @@ extern tree cse_and_gimplify_to_preheader (loop_vec_info,
tree);
/* Nonlinear induction. */
extern tree vect_peel_nonlinear_iv_init (gimple_seq*, tree, tree,
- tree, enum vect_induction_op_type);
+ tree, enum vect_induction_op_type,
+ bool);
/* In tree-vect-slp.cc. */
extern void vect_slp_init (void);