The following records the alternate SLP instance entries coming from
stmts with stores that have no SSA def, like OMP SIMD calls without LHS.
There's a bit of fallout with having a SLP tree with a NULL vectype,
but nothing too gross.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

        PR tree-optimization/121395
        * tree-vectorizer.h (_loop_vec_info::alternate_defs): New member.
        (LOOP_VINFO_ALTERNATE_DEFS): New.
        * tree-vect-stmts.cc (vect_stmt_relevant_p): Populate it.
        (vectorizable_simd_clone_call): Do not register a SLP def
        when there is none.
        * tree-vect-slp.cc (vect_build_slp_tree_1): Allow a NULL
        vectype when there's no LHS.  Allow all calls w/o LHS.
        (vect_analyze_slp): Process LOOP_VINFO_ALTERNATE_DEFS as
        SLP graph entries.
        (vect_make_slp_decision): Handle a NULL SLP_TREE_VECTYPE.
        (vect_slp_analyze_node_operations_1): Likewise.
        (vect_schedule_slp_node): Likewise.
---
 gcc/testsuite/gcc.dg/vect/pr59984.c |  4 ++++
 gcc/tree-vect-slp.cc                | 37 ++++++++++++++++++++++-------
 gcc/tree-vect-stmts.cc              |  6 ++++-
 gcc/tree-vectorizer.h               |  5 ++++
 4 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr59984.c 
b/gcc/testsuite/gcc.dg/vect/pr59984.c
index c00c2267158..8ca446ea67c 100644
--- a/gcc/testsuite/gcc.dg/vect/pr59984.c
+++ b/gcc/testsuite/gcc.dg/vect/pr59984.c
@@ -64,3 +64,7 @@ main ()
   return 0;
 }
 
+/* { dg-final { scan-tree-dump "31:17: optimized: loop vectorized" "vect" } } 
*/
+/* { dg-final { scan-tree-dump "37:7: optimized: loop vectorized" "vect" } } */
+/* { dg-final { scan-tree-dump "44:17: optimized: loop vectorized" "vect" } } 
*/
+/* { dg-final { scan-tree-dump "50:7: optimized: loop vectorized" "vect" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 530932037a1..d751f3db13c 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1139,7 +1139,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
       soft_fail_nunits_vectype = nunits_vectype;
     }
 
-  gcc_assert (vectype);
+  gcc_assert (vectype || !gimple_get_lhs (first_stmt_info->stmt));
   *node_vectype = vectype;
 
   /* For every stmt in NODE find its def stmt/s.  */
@@ -1186,10 +1186,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
 
       gcall *call_stmt = dyn_cast <gcall *> (stmt);
       tree lhs = gimple_get_lhs (stmt);
-      if (lhs == NULL_TREE
-         && (!call_stmt
-             || !gimple_call_internal_p (stmt)
-             || !internal_store_fn_p (gimple_call_internal_fn (stmt))))
+      if (lhs == NULL_TREE && !call_stmt)
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -4916,6 +4913,22 @@ vect_analyze_slp (vec_info *vinfo, unsigned 
max_tree_size,
              return opt_result::failure_at (vect_location,
                                             "SLP build failed.\n");
          }
+
+      stmt_vec_info stmt_info;
+      FOR_EACH_VEC_ELT (LOOP_VINFO_ALTERNATE_DEFS (loop_vinfo), i, stmt_info)
+       {
+         vec<stmt_vec_info> stmts;
+         vec<stmt_vec_info> roots = vNULL;
+         vec<tree> remain = vNULL;
+         stmts.create (1);
+         stmts.quick_push (stmt_info);
+         if (! vect_build_slp_instance (vinfo, slp_inst_kind_store,
+                                        stmts, roots, remain, max_tree_size,
+                                        &limit, bst_map, NULL,
+                                        force_single_lane))
+           return opt_result::failure_at (vect_location,
+                                          "SLP build failed.\n");
+       }
     }
 
   if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
@@ -7633,7 +7646,8 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
       /* If all instances ended up with vector(1) T roots make sure to
         not vectorize.  RVV for example relies on loop vectorization
         when some instances are essentially kept scalar.  See PR121048.  */
-      if (known_gt (TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (root)), 1U))
+      if (SLP_TREE_VECTYPE (root)
+         && known_gt (TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (root)), 1U))
        decided_to_slp++;
     }
 
@@ -7960,7 +7974,10 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, 
slp_tree node,
      elements in a vector.  For single-defuse-cycle, lane-reducing op, and
      PHI statement that starts reduction comprised of only lane-reducing ops,
      the number is more than effective vector statements actually required.  */
-  SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
+  if (SLP_TREE_VECTYPE (node))
+    SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
+  else
+    SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
 
   /* Handle purely internal nodes.  */
   if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
@@ -11317,8 +11334,10 @@ vect_schedule_slp_node (vec_info *vinfo,
 
   stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
 
-  gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
-  SLP_TREE_VEC_DEFS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
+  gcc_assert (!SLP_TREE_VECTYPE (node)
+             || SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
+  if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0)
+    SLP_TREE_VEC_DEFS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
 
   if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
       && STMT_VINFO_DATA_REF (stmt_info))
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index ee6db260bfc..dd536ab08ee 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -386,6 +386,9 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, 
loop_vec_info loop_vinfo,
          dump_printf_loc (MSG_NOTE, vect_location,
                            "vec_stmt_relevant_p: stmt has vdefs.\n");
        *relevant = vect_used_in_scope;
+       if (! STMT_VINFO_DATA_REF (stmt_info)
+           && zero_ssa_operands (stmt_info->stmt, SSA_OP_DEF))
+         LOOP_VINFO_ALTERNATE_DEFS (loop_vinfo).safe_push (stmt_info);
       }
 
   /* uses outside the loop.  */
@@ -4754,7 +4757,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
            }
        }
 
-      SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
+      if (gimple_get_lhs (new_stmt))
+       SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
     }
 
   for (i = 0; i < nargs; ++i)
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 684ce7bd217..d860ac42735 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1000,6 +1000,10 @@ public:
      stmt in the chain.  */
   auto_vec<stmt_vec_info> reduction_chains;
 
+  /* Defs that could not be analyzed such as OMP SIMD calls without
+     a LHS.  */
+  auto_vec<stmt_vec_info> alternate_defs;
+
   /* Cost vector for a single scalar iteration.  */
   auto_vec<stmt_info_for_cost> scalar_cost_vec;
 
@@ -1239,6 +1243,7 @@ public:
 #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
 #define LOOP_VINFO_INV_PATTERN_DEF_SEQ(L)  (L)->inv_pattern_def_seq
 #define LOOP_VINFO_DRS_ADVANCED_BY(L)      (L)->drs_advanced_by
+#define LOOP_VINFO_ALTERNATE_DEFS(L)       (L)->alternate_defs
 
 #define LOOP_VINFO_FULLY_MASKED_P(L)           \
   (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L)      \
-- 
2.43.0

Reply via email to