The testcase pr71752.c was failing because the SLP code was generating an
SLP
vector using arguments from the SLP scalar stmts, but was using the wrong
argument number.

vect_get_slp_defs() is given a vector of operands. When calling down to
vect_get_constant_vectors it uses i as op_num - making the assumption that
the
first op in the vector refers to the first argument in the SLP scalar
statement, the second op refers to the second arg and so on.

However, previously in vectorizable_reduction, the call to
vect_get_vec_defs
destroyed this ordering by potentially only passing op1.

The solution is in vectorizable_reduction to create a vector of operands
equal
in size to the number of arguments in the SLP statements. We maintain the
argument ordering and if we don't require defs for that argument we instead
push NULL into the vector. In vect_get_slp_defs we need to handle cases
where
an op might be NULL.

Tested with a check run on X86 and AArch64.
Ok to commit?


Changelog:

gcc/
        * tree-vect-loop.c (vectorizable_reduction): Keep SLP operand ordering.
        * tree-vect-slp.c (vect_get_slp_defs): Handle null operands.

gcc/testsuite/
        * gcc.dg/vect/pr71752.c: New.



Thanks,
Alan.




diff --git a/gcc/testsuite/gcc.dg/vect/pr71752.c
b/gcc/testsuite/gcc.dg/vect/pr71752.c
new file mode 100644
index 
0000000000000000000000000000000000000000..8d26754b4fedf8b104caae8742a445dff
bf23f0a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr71752.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+
+unsigned int q4, yg;
+
+unsigned int
+w6 (unsigned int z5, unsigned int jv)
+{
+  unsigned int *f2 = &jv;
+
+  while (*f2 < 21)
+    {
+      q4 -= jv;
+      z5 -= jv;
+      f2 = &yg;
+      ++(*f2);
+    }
+  return z5;
+}
+
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 
2a7e0c6661bc1ba82c9f03720e550749f2252a7c..826481af3d1d8b29bcdbd7d81c0fd5a85
9ecd9b0 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -5364,7 +5364,7 @@ vectorizable_reduction (gimple *stmt,
gimple_stmt_iterator *gsi,
   auto_vec<tree> vect_defs;
   auto_vec<gimple *> phis;
   int vec_num;
-  tree def0, def1, tem, op0, op1 = NULL_TREE;
+  tree def0, def1, tem, op1 = NULL_TREE;
   bool first_p = true;
   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
   gimple *cond_expr_induction_def_stmt = NULL;
@@ -5964,29 +5964,36 @@ vectorizable_reduction (gimple *stmt,
gimple_stmt_iterator *gsi,
       /* Handle uses.  */
       if (j == 0)
         {
-          op0 = ops[!reduc_index];
-          if (op_type == ternary_op)
-            {
-              if (reduc_index == 0)
-                op1 = ops[2];
-              else
-                op1 = ops[1];
-            }
+         if (slp_node)
+           {
+             /* Get vec defs for all the operands except the reduction index,
+               ensuring the ordering of the ops in the vector is kept.  */
+             auto_vec<tree, 3> slp_ops;
+             auto_vec<vec<tree>, 3> vec_defs;

-          if (slp_node)
-            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
-                               slp_node, -1);
+             slp_ops.quick_push ((reduc_index == 0) ? NULL : ops[0]);
+             slp_ops.quick_push ((reduc_index == 1) ? NULL : ops[1]);
+             if (op_type == ternary_op)
+               slp_ops.quick_push ((reduc_index == 2) ? NULL : ops[2]);
+
+             vect_get_slp_defs (slp_ops, slp_node, &vec_defs, -1);
+
+             vec_oprnds0.safe_splice (vec_defs[(reduc_index == 0) ? 1 : 0]);
+             if (op_type == ternary_op)
+               vec_oprnds1.safe_splice (vec_defs[(reduc_index == 2) ? 1 : 2]);
+           }
           else
-            {
+           {
               loop_vec_def0 = vect_get_vec_def_for_operand
(ops[!reduc_index],
                                                             stmt);
               vec_oprnds0.quick_push (loop_vec_def0);
               if (op_type == ternary_op)
                {
+                op1 = (reduc_index == 0) ? ops[2] : ops[1];
                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt);
                  vec_oprnds1.quick_push (loop_vec_def1);
                }
-            }
+           }
         }
       else
         {
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 
fb325d54f1084461d44cd54a98e5b7f99541a188..7c480d59c823b5258255c8be047f050c8
3cc91fd 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -3200,10 +3200,19 @@ vect_get_slp_defs (vec<tree> ops, slp_tree
slp_node,
   vec<tree> vec_defs;
   tree oprnd;
   bool vectorized_defs;
+  bool first_iteration = true;

   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
   FOR_EACH_VEC_ELT (ops, i, oprnd)
     {
+      if (oprnd == NULL)
+       {
+         vec_defs = vNULL;
+         vec_defs.create (0);
+         vec_oprnds->quick_push (vec_defs);
+         continue;
+       }
+
       /* For each operand we check if it has vectorized definitions in a
child
         node or we need to create them (for invariants and constants).  We
         check if the LHS of the first stmt of the next child matches OPRND.
@@ -3240,7 +3249,7 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,

       if (!vectorized_defs)
         {
-          if (i == 0)
+         if (first_iteration)
             {
               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
               /* Number of vector stmts was calculated according to LHS in
@@ -3276,6 +3285,8 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
       /* For reductions, we only need initial values.  */
       if (reduc_index != -1)
         return;
+
+      first_iteration = false;
     }
 }


Reply via email to