This removes the non-SLP paths from vectorizable_call, propagates
out ncopies == 1 and removes empty loops resulting from that.

Bootstrapped and tested on x86_64-unknown-linux-gnu, will squash
and push.

        * tree-vect-stmts.cc (vectorizable_call): Remove non-SLP path.
---
 gcc/tree-vect-stmts.cc | 124 +++++++++++++++++++----------------------
 1 file changed, 56 insertions(+), 68 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 7a89bc0e5d4..0799e4fd664 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3381,7 +3381,6 @@ vectorizable_call (vec_info *vinfo,
   tree vectypes[ARRAY_SIZE (dt)] = {};
   slp_tree slp_op[ARRAY_SIZE (dt)] = {};
   int ndts = ARRAY_SIZE (dt);
-  int j;
   auto_vec<tree, 8> vargs;
   enum { NARROW, NONE, WIDEN } modifier;
   size_t i, nargs;
@@ -3510,13 +3509,14 @@ vectorizable_call (vec_info *vinfo,
       return false;
     }
 
-  if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p 
(vectype_out))
-  {
+  if (vect_emulated_vector_p (vectype_in)
+      || vect_emulated_vector_p (vectype_out))
+    {
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                         "use emulated vector type for call\n");
       return false;
-  }
+    }
 
   /* FORNOW */
   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
@@ -3693,32 +3693,28 @@ vectorizable_call (vec_info *vinfo,
       tree prev_res = NULL_TREE;
       vargs.safe_grow (vect_nargs, true);
       auto_vec<vec<tree> > vec_defs (nargs);
-      for (j = 0; j < 1; ++j)
-       {
-         /* Build argument list for the vectorized call.  */
-         if (cfn == CFN_GOMP_SIMD_LANE)
-           {
-             for (i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++i)
-               {
-                 /* ???  For multi-lane SLP we'd need to build
-                    { 0, 0, .., 1, 1, ... }.  */
-                 tree cst = build_index_vector (vectype_out,
-                                                i * nunits_out, 1);
-                 tree new_var
-                     = vect_get_new_ssa_name (vectype_out, vect_simple_var,
-                                              "cst_");
-                 gimple *init_stmt = gimple_build_assign (new_var, cst);
-                 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
-                 new_temp = make_ssa_name (vec_dest);
-                 gimple *new_stmt
-                     = gimple_build_assign (new_temp, new_var);
-                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
-                                              gsi);
-                 slp_node->push_vec_def (new_stmt);
-               }
-             continue;
-           }
 
+      /* Build argument list for the vectorized call.  */
+      if (cfn == CFN_GOMP_SIMD_LANE)
+       {
+         for (i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++i)
+           {
+             /* ???  For multi-lane SLP we'd need to build
+                { 0, 0, .., 1, 1, ... }.  */
+             tree cst = build_index_vector (vectype_out,
+                                            i * nunits_out, 1);
+             tree new_var
+               = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
+             gimple *init_stmt = gimple_build_assign (new_var, cst);
+             vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
+             new_temp = make_ssa_name (vec_dest);
+             gimple *new_stmt = gimple_build_assign (new_temp, new_var);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             slp_node->push_vec_def (new_stmt);
+           }
+       }
+      else
+       {
          vec<tree> vec_oprnds0;
          vect_get_slp_defs (vinfo, slp_node, &vec_defs);
          vec_oprnds0 = vec_defs[0];
@@ -3730,9 +3726,8 @@ vectorizable_call (vec_info *vinfo,
              if (masked_loop_p && reduc_idx >= 0)
                {
                  unsigned int vec_num = vec_oprnds0.length ();
-                 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
-                                                     gsi, masks, vec_num,
-                                                     vectype_out, i);
+                 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks,
+                                                     vec_num, vectype_out, i);
                }
              size_t k;
              for (k = 0; k < nargs; k++)
@@ -3782,12 +3777,11 @@ vectorizable_call (vec_info *vinfo,
                  else if (mask_opno >= 0 && masked_loop_p)
                    {
                      unsigned int vec_num = vec_oprnds0.length ();
-                     tree mask = vect_get_loop_mask (loop_vinfo,
-                                                     gsi, masks, vec_num,
-                                                     vectype_out, i);
-                     vargs[mask_opno] = prepare_vec_mask
-                                         (loop_vinfo, TREE_TYPE (mask), mask,
-                                          vargs[mask_opno], gsi);
+                     tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
+                                                     vec_num, vectype_out, i);
+                     vargs[mask_opno]
+                       = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
+                                           vargs[mask_opno], gsi);
                    }
 
                  gcall *call;
@@ -3804,6 +3798,7 @@ vectorizable_call (vec_info *vinfo,
              slp_node->push_vec_def (new_stmt);
            }
        }
+
       for (i = 0; i < nargs; i++)
        {
          vec<tree> vec_oprndsi = vec_defs[i];
@@ -3815,41 +3810,34 @@ vectorizable_call (vec_info *vinfo,
       auto_vec<vec<tree> > vec_defs (nargs);
       /* We don't define any narrowing conditional functions at present.  */
       gcc_assert (mask_opno < 0);
-      for (j = 0; j < 1; ++j)
-       {
-         /* Build argument list for the vectorized call.  */
-         if (j == 0)
-           vargs.create (nargs * 2);
-         else
-           vargs.truncate (0);
 
-         vec<tree> vec_oprnds0;
+      /* Build argument list for the vectorized call.  */
+      vargs.create (nargs * 2);
 
-         vect_get_slp_defs (vinfo, slp_node, &vec_defs);
-         vec_oprnds0 = vec_defs[0];
+      vect_get_slp_defs (vinfo, slp_node, &vec_defs);
+      vec<tree> vec_oprnds0 = vec_defs[0];
 
-         /* Arguments are ready.  Create the new vector stmt.  */
-         for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
+      /* Arguments are ready.  Create the new vector stmt.  */
+      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
+       {
+         size_t k;
+         vargs.truncate (0);
+         for (k = 0; k < nargs; k++)
            {
-             size_t k;
-             vargs.truncate (0);
-             for (k = 0; k < nargs; k++)
-               {
-                 vec<tree> vec_oprndsk = vec_defs[k];
-                 vargs.quick_push (vec_oprndsk[i]);
-                 vargs.quick_push (vec_oprndsk[i + 1]);
-               }
-             gcall *call;
-             if (ifn != IFN_LAST)
-               call = gimple_build_call_internal_vec (ifn, vargs);
-             else
-               call = gimple_build_call_vec (fndecl, vargs);
-             new_temp = make_ssa_name (vec_dest, call);
-             gimple_call_set_lhs (call, new_temp);
-             gimple_call_set_nothrow (call, true);
-             vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
-             slp_node->push_vec_def (call);
+             vec<tree> vec_oprndsk = vec_defs[k];
+             vargs.quick_push (vec_oprndsk[i]);
+             vargs.quick_push (vec_oprndsk[i + 1]);
            }
+         gcall *call;
+         if (ifn != IFN_LAST)
+           call = gimple_build_call_internal_vec (ifn, vargs);
+         else
+           call = gimple_build_call_vec (fndecl, vargs);
+         new_temp = make_ssa_name (vec_dest, call);
+         gimple_call_set_lhs (call, new_temp);
+         gimple_call_set_nothrow (call, true);
+         vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+         slp_node->push_vec_def (call);
        }
 
       for (i = 0; i < nargs; i++)
-- 
2.43.0

Reply via email to