The PR uncovers unchecked constraints on the ability to code-generate with SLP but also latent issues with regard to stmt order checking since loop (early-break) and BB (for quite some time) vectorization are no longer constraint to single-BBs. In particular get_later_stmt simply compares UIDs of stmts, but that's only reliable when they are in the same BB.
For the PR in question the problematical case is demoting a SLP node to external which fails to check we can actually code generate this in the way we do (using get_later_stmt). The following thus adds checking that we demote to external only when all defs are from the same BB. We no longer vectorize gcc.dg/vect/bb-slp-49.c but the testcase was for a wrong-code issue and the vectorization done is a no-op. Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. PR tree-optimization/116352 PR tree-optimization/117876 * tree-vect-slp.cc (vect_slp_can_convert_to_external): New. (vect_slp_convert_to_external): Call it. (vect_build_slp_tree_2): Likewise. * gcc.dg/vect/pr116352.c: New testcase. * gcc.dg/vect/bb-slp-49.c: Remove vectorization check. --- gcc/testsuite/gcc.dg/vect/bb-slp-49.c | 3 +-- gcc/testsuite/gcc.dg/vect/pr116352.c | 34 +++++++++++++++++++++++++++ gcc/tree-vect-slp.cc | 29 ++++++++++++++++++----- 3 files changed, 58 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr116352.c diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c index e7101fcff46..c0ad5d70a9a 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c @@ -23,6 +23,5 @@ main () return 0; } -/* See that we vectorize an SLP instance. */ +/* See that we try to vectorize an SLP instance. */ /* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } } */ -/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr116352.c b/gcc/testsuite/gcc.dg/vect/pr116352.c new file mode 100644 index 00000000000..3fe537c34ff --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr116352.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3" } */ + +static void addPrior(float center_x, float center_y, float width, float height, + bool normalized, float *dst) +{ + if (normalized) + { + dst[0] = (center_x - width * 0.5f); + dst[1] = (center_y - height * 0.5f); + dst[2] = (center_x + width * 0.5f); + dst[3] = (center_y + height * 0.5f); + } + else + { + dst[0] = center_x - width * 0.5f; + dst[1] = center_y - height * 0.5f; + dst[2] = center_x + width * 0.5f - 1.0f; + dst[3] = center_y + height * 0.5f - 1.0f; + } +} +void forward(float *outputPtr, int _offsetsXs, float *_offsetsX, + float *_offsetsY, float _stepX, float _stepY, + bool _bboxesNormalized, float _boxWidth, float _boxHeight) +{ + for (int j = 0; j < _offsetsXs; ++j) + { + float center_x = (_offsetsX[j]) * _stepX; + float center_y = (_offsetsY[j]) * _stepY; + addPrior(center_x, center_y, _boxWidth, _boxHeight, _bboxesNormalized, + outputPtr); + outputPtr += 4; + } +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index ec986cc3f68..1799d5a619b 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -67,6 +67,7 @@ static int vectorizable_slp_permutation_1 (vec_info *, gimple_stmt_iterator *, static bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *, slp_tree, stmt_vector_for_cost *); static void vect_print_slp_tree (dump_flags_t, dump_location_t, slp_tree); +static bool vect_slp_can_convert_to_external (const vec<stmt_vec_info> &); static object_allocator<_slp_tree> *slp_tree_pool; static slp_tree slp_first_node; @@ -2887,7 +2888,8 @@ fail: for (j = 0; j < group_size; ++j) if (!matches[j]) break; - if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype))) + if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype)) + && vect_slp_can_convert_to_external (oprnd_info->def_stmts)) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -7764,6 +7766,24 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node, node, node_instance, cost_vec); } +/* Verify if we can externalize a set of internal defs. */ + +static bool +vect_slp_can_convert_to_external (const vec<stmt_vec_info> &stmts) +{ + basic_block bb = NULL; + for (stmt_vec_info stmt : stmts) + if (!stmt) + return false; + /* Constant generation uses get_later_stmt which can only handle + defs from the same BB. */ + else if (!bb) + bb = gimple_bb (stmt->stmt); + else if (gimple_bb (stmt->stmt) != bb) + return false; + return true; +} + /* Try to build NODE from scalars, returning true on success. NODE_INSTANCE is the SLP instance that contains NODE. */ @@ -7779,13 +7799,10 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node, || !SLP_TREE_SCALAR_STMTS (node).exists () || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)) /* Force the mask use to be built from scalars instead. */ - || VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node))) + || VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node)) + || !vect_slp_can_convert_to_external (SLP_TREE_SCALAR_STMTS (node))) return false; - FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) - if (!stmt_info) - return false; - if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "Building vector operands of %p from scalars instead\n", -- 2.43.0