When we scrap the last def of an odd lane numbered BB reduction
we can end up recording a pattern def which will later wreck
code generation.  The following puts this logic where it better
belongs, avoiding this issue.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

        PR tree-optimization/114249
        * tree-vect-slp.cc (vect_build_slp_instance): Move making
        a BB reduction lane number even ...
        (vect_slp_check_for_roots): ... here to avoid leaking
        pattern defs.

        * gcc.dg/vect/bb-slp-pr114249.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c | 20 ++++++++++++++++++++
 gcc/tree-vect-slp.cc                        | 20 ++++++++++----------
 2 files changed, 30 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c
new file mode 100644
index 00000000000..64c93cd9a2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+
+enum { SEG_THIN_POOL } read_only;
+struct {
+  unsigned skip_block_zeroing;
+  unsigned ignore_discard;
+  unsigned no_discard_passdown;
+  unsigned error_if_no_space;
+} _thin_pool_emit_segment_line_seg;
+void dm_snprintf();
+void _emit_segment()
+{
+  int features =
+      (_thin_pool_emit_segment_line_seg.error_if_no_space ? 1 : 0) +
+      (read_only ? 1 : 0) +
+      (_thin_pool_emit_segment_line_seg.ignore_discard ? 1 : 0) +
+      (_thin_pool_emit_segment_line_seg.no_discard_passdown ? 1 : 0) +
+      (_thin_pool_emit_segment_line_seg.skip_block_zeroing ? 1 : 0);
+  dm_snprintf(features);
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 324400db19e..527b06c9f9c 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3288,15 +3288,6 @@ vect_build_slp_instance (vec_info *vinfo,
                         "  %G", scalar_stmts[i]->stmt);
     }
 
-  /* When a BB reduction doesn't have an even number of lanes
-     strip it down, treating the remaining lane as scalar.
-     ???  Selecting the optimal set of lanes to vectorize would be nice
-     but SLP build for all lanes will fail quickly because we think
-     we're going to need unrolling.  */
-  if (kind == slp_inst_kind_bb_reduc
-      && (scalar_stmts.length () & 1))
-    remain.safe_insert (0, gimple_get_lhs (scalar_stmts.pop ()->stmt));
-
   /* Build the tree for the SLP instance.  */
   unsigned int group_size = scalar_stmts.length ();
   bool *matches = XALLOCAVEC (bool, group_size);
@@ -7549,6 +7540,7 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo)
              /* ???  For now do not allow mixing ops or externs/constants.  */
              bool invalid = false;
              unsigned remain_cnt = 0;
+             unsigned last_idx = 0;
              for (unsigned i = 0; i < chain.length (); ++i)
                {
                  if (chain[i].code != code)
@@ -7563,7 +7555,13 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo)
                                                      (chain[i].op)->stmt)
                          != chain[i].op))
                    remain_cnt++;
+                 else
+                   last_idx = i;
                }
+             /* Make sure to have an even number of lanes as we later do
+                all-or-nothing discovery, not trying to split further.  */
+             if ((chain.length () - remain_cnt) & 1)
+               remain_cnt++;
              if (!invalid && chain.length () - remain_cnt > 1)
                {
                  vec<stmt_vec_info> stmts;
@@ -7576,7 +7574,9 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo)
                      stmt_vec_info stmt_info;
                      if (chain[i].dt == vect_internal_def
                          && ((stmt_info = bb_vinfo->lookup_def (chain[i].op)),
-                             gimple_get_lhs (stmt_info->stmt) == chain[i].op))
+                             gimple_get_lhs (stmt_info->stmt) == chain[i].op)
+                         && (i != last_idx
+                             || (stmts.length () & 1)))
                        stmts.quick_push (stmt_info);
                      else
                        remain.quick_push (chain[i].op);
-- 
2.35.3

Reply via email to