We're running into a multiplication with one unvectorizable
operand we expect to build from scalars but SLP discovery
fatally fails the build of both since one stmt is commutated:

  _60 = _58 * _59;
  _63 = _59 * _62;
  _66 = _59 * _65;
...

where _59 is the "bad" operand.  The following patch makes the
case work where the first stmt has a good operand by not fatally
failing the SLP build for the operand but communicating upwards
how to commutate.

Bootstrapped / tested on x86_64-unknown-linux-gnu, pushed.

2020-10-09  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/97334
        * tree-vect-slp.c (vect_build_slp_tree_1): Do not fatally
        fail lanes other than zero when BB vectorizing.

        * gcc.dg/vect/bb-slp-pr65935.c: Amend.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c |  3 +++
 gcc/tree-vect-slp.c                        | 22 ++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c
index 4e3448eccd7..ea37e4e614c 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c
@@ -60,3 +60,6 @@ int main()
 /* We should also be able to use 2-lane SLP to initialize the real and
    imaginary components in the first loop of main.  */
 /* { dg-final { scan-tree-dump-times "optimized: basic block" 10 "slp1" } } */
+/* We should see the s->phase[dir] operand and only that operand built
+   from scalars.  See PR97334.  */
+/* { dg-final { scan-tree-dump-times "Building vector operands from scalars" 1 
"slp1" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 479c3eeaec7..495fb970e24 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -773,6 +773,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                             "Build SLP failed: unvectorizable statement %G",
                             stmt);
+         /* ???  For BB vectorization we want to commutate operands in a way
+            to shuffle all unvectorizable defs into one operand and have
+            the other still vectorized.  The following doesn't reliably
+            work for this though but it's the easiest we can do here.  */
+         if (is_a <bb_vec_info> (vinfo) && i != 0)
+           continue;
          /* Fatal mismatch.  */
          matches[0] = false;
           return false;
@@ -785,6 +791,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                             "Build SLP failed: not GIMPLE_ASSIGN nor "
                             "GIMPLE_CALL %G", stmt);
+         if (is_a <bb_vec_info> (vinfo) && i != 0)
+           continue;
          /* Fatal mismatch.  */
          matches[0] = false;
          return false;
@@ -797,6 +805,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
              && !vect_record_max_nunits (vinfo, stmt_info, group_size,
                                          nunits_vectype, max_nunits)))
        {
+         if (is_a <bb_vec_info> (vinfo) && i != 0)
+           continue;
          /* Fatal mismatch.  */
          matches[0] = false;
          return false;
@@ -823,6 +833,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                 "Build SLP failed: unsupported call type %G",
                                 call_stmt);
+             if (is_a <bb_vec_info> (vinfo) && i != 0)
+               continue;
              /* Fatal mismatch.  */
              matches[0] = false;
              return false;
@@ -865,6 +877,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
                      if (dump_enabled_p ())
                        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                         "Build SLP failed: no optab.\n");
+                     if (is_a <bb_vec_info> (vinfo) && i != 0)
+                       continue;
                      /* Fatal mismatch.  */
                      matches[0] = false;
                      return false;
@@ -876,6 +890,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
                        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                         "Build SLP failed: "
                                         "op not supported by target.\n");
+                     if (is_a <bb_vec_info> (vinfo) && i != 0)
+                       continue;
                      /* Fatal mismatch.  */
                      matches[0] = false;
                      return false;
@@ -900,6 +916,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
              if (TREE_CODE (vec) != SSA_NAME
                  || !types_compatible_p (vectype, TREE_TYPE (vec)))
                {
+                 if (is_a <bb_vec_info> (vinfo) && i != 0)
+                   continue;
                  if (dump_enabled_p ())
                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                     "Build SLP failed: "
@@ -1048,6 +1066,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
                                 "Build SLP failed: not grouped load %G", stmt);
 
              /* FORNOW: Not grouped loads are not supported.  */
+             if (is_a <bb_vec_info> (vinfo) && i != 0)
+               continue;
              /* Fatal mismatch.  */
              matches[0] = false;
              return false;
@@ -1066,6 +1086,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                 "Build SLP failed: operation unsupported %G",
                                 stmt);
+             if (is_a <bb_vec_info> (vinfo) && i != 0)
+               continue;
              /* Fatal mismatch.  */
              matches[0] = false;
              return false;
-- 
2.26.2

Reply via email to