The testcase pr71752.c was failing because the SLP code was generating an SLP vector using arguments from the SLP scalar stmts, but was using the wrong argument number.
vect_get_slp_defs() is given a vector of operands. When calling down to vect_get_constant_vectors it uses i as op_num - making the assumption that the first op in the vector refers to the first argument in the SLP scalar statement, the second op refers to the second arg and so on. However, previously in vectorizable_reduction, the call to vect_get_vec_defs destroyed this ordering by potentially only passing op1. The solution is in vectorizable_reduction to create a vector of operands equal in size to the number of arguments in the SLP statements. We maintain the argument ordering and if we don't require defs for that argument we instead push NULL into the vector. In vect_get_slp_defs we need to handle cases where an op might be NULL. Tested with a check run on X86 and AArch64. Ok to commit? Changelog: gcc/ * tree-vect-loop.c (vectorizable_reduction): Keep SLP operand ordering. * tree-vect-slp.c (vect_get_slp_defs): Handle null operands. gcc/testsuite/ * gcc.dg/vect/pr71752.c: New. Thanks, Alan. diff --git a/gcc/testsuite/gcc.dg/vect/pr71752.c b/gcc/testsuite/gcc.dg/vect/pr71752.c new file mode 100644 index 0000000000000000000000000000000000000000..8d26754b4fedf8b104caae8742a445dff bf23f0a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr71752.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ + +unsigned int q4, yg; + +unsigned int +w6 (unsigned int z5, unsigned int jv) +{ + unsigned int *f2 = &jv; + + while (*f2 < 21) + { + q4 -= jv; + z5 -= jv; + f2 = &yg; + ++(*f2); + } + return z5; +} + diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 2a7e0c6661bc1ba82c9f03720e550749f2252a7c..826481af3d1d8b29bcdbd7d81c0fd5a85 9ecd9b0 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -5364,7 +5364,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, auto_vec<tree> vect_defs; auto_vec<gimple *> phis; int vec_num; - tree def0, def1, tem, op0, op1 = NULL_TREE; + tree def0, def1, tem, op1 = NULL_TREE; bool first_p = true; tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE; gimple *cond_expr_induction_def_stmt = NULL; @@ -5964,29 +5964,36 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, /* Handle uses. */ if (j == 0) { - op0 = ops[!reduc_index]; - if (op_type == ternary_op) - { - if (reduc_index == 0) - op1 = ops[2]; - else - op1 = ops[1]; - } + if (slp_node) + { + /* Get vec defs for all the operands except the reduction index, + ensuring the ordering of the ops in the vector is kept. */ + auto_vec<tree, 3> slp_ops; + auto_vec<vec<tree>, 3> vec_defs; - if (slp_node) - vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, - slp_node, -1); + slp_ops.quick_push ((reduc_index == 0) ? NULL : ops[0]); + slp_ops.quick_push ((reduc_index == 1) ? NULL : ops[1]); + if (op_type == ternary_op) + slp_ops.quick_push ((reduc_index == 2) ? NULL : ops[2]); + + vect_get_slp_defs (slp_ops, slp_node, &vec_defs, -1); + + vec_oprnds0.safe_splice (vec_defs[(reduc_index == 0) ? 1 : 0]); + if (op_type == ternary_op) + vec_oprnds1.safe_splice (vec_defs[(reduc_index == 2) ? 1 : 2]); + } else - { + { loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index], stmt); vec_oprnds0.quick_push (loop_vec_def0); if (op_type == ternary_op) { + op1 = (reduc_index == 0) ? ops[2] : ops[1]; loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt); vec_oprnds1.quick_push (loop_vec_def1); } - } + } } else { diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index fb325d54f1084461d44cd54a98e5b7f99541a188..7c480d59c823b5258255c8be047f050c8 3cc91fd 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3200,10 +3200,19 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node, vec<tree> vec_defs; tree oprnd; bool vectorized_defs; + bool first_iteration = true; first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; FOR_EACH_VEC_ELT (ops, i, oprnd) { + if (oprnd == NULL) + { + vec_defs = vNULL; + vec_defs.create (0); + vec_oprnds->quick_push (vec_defs); + continue; + } + /* For each operand we check if it has vectorized definitions in a child node or we need to create them (for invariants and constants). We check if the LHS of the first stmt of the next child matches OPRND. @@ -3240,7 +3249,7 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node, if (!vectorized_defs) { - if (i == 0) + if (first_iteration) { number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); /* Number of vector stmts was calculated according to LHS in @@ -3276,6 +3285,8 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node, /* For reductions, we only need initial values. */ if (reduc_index != -1) return; + + first_iteration = false; } }