The PR shows that code generation ends up pessimized by the new
canonicalization rules that end up nailing do-not-care elements
to specific values making it hard to generate good code later.

The temporary solution is to avoid this for the cases we also
obviously know the canonicalization will create more GIMPLE stmts than
before.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2020-01-31  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/92819
        * tree-ssa-forwprop.c (simplify_vector_constructor): Avoid
        generating more stmts than before.

        * gcc.target/i386/pr92819.c: New testcase.
---
 gcc/testsuite/gcc.target/i386/pr92819.c | 45 +++++++++++++++++++++++++++++++++
 gcc/tree-ssa-forwprop.c                 | 15 +++++++++--
 2 files changed, 58 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92819.c

diff --git a/gcc/testsuite/gcc.target/i386/pr92819.c 
b/gcc/testsuite/gcc.target/i386/pr92819.c
new file mode 100644
index 00000000000..773e3490ab3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92819.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -fdump-tree-forwprop1" } */
+
+typedef double v4df __attribute__((vector_size (32)));
+typedef double v2df __attribute__((vector_size (16)));
+typedef short v16hi __attribute__((vector_size (32)));
+typedef short v8hi __attribute__((vector_size (16)));
+
+v2df
+foo (v4df x, double *p)
+{
+  return (v2df) { x[1], *p };
+}
+
+v2df
+bar (v4df x, double *p)
+{
+  return (v2df) { x[0], *p }; /* BIT_INSERT_EXPR */
+}
+
+v2df
+baz (v2df x, double *p)
+{
+  return (v2df) { x[1], *p }; /* VEC_PERM_EXPR */
+}
+
+v2df
+qux (v2df x, double *p)
+{
+  return (v2df) { x[0], *p }; /* BIT_INSERT_EXPR */
+}
+
+v2df
+corge (v4df x, double *p)
+{
+  return (v2df) { x[3], *p };
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "forwprop1" } } */
+/* We can't check for 1:1 assembler here so check for what we do not
+   want to see.  */
+/* { dg-final { scan-assembler-not { "perm" } } } */
+/* { dg-final { scan-assembler-not { "insert" } } } */
+/* { dg-final { scan-assembler-not { "broadcast" } } } */
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 5203891950a..f65216d23e9 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -2230,7 +2230,6 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   unsigned HOST_WIDE_INT refnelts;
   enum tree_code conv_code;
   constructor_elt *elt;
-  bool maybe_ident;
 
   op = gimple_assign_rhs1 (stmt);
   type = TREE_TYPE (op);
@@ -2245,7 +2244,8 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   orig[0] = NULL;
   orig[1] = NULL;
   conv_code = ERROR_MARK;
-  maybe_ident = true;
+  bool maybe_ident = true;
+  bool maybe_blend[2] = { true, true };
   tree one_constant = NULL_TREE;
   tree one_nonconstant = NULL_TREE;
   auto_vec<tree> constants;
@@ -2290,6 +2290,8 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
              orig[j] = ref;
              if (elem != i || j != 0)
                maybe_ident = false;
+             if (elem != i)
+               maybe_blend[j] = false;
              elts.safe_push (std::make_pair (j, elem));
              continue;
            }
@@ -2439,6 +2441,15 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
     }
   else
     {
+      /* If we combine a vector with a non-vector avoid cases where
+        we'll obviously end up with more GIMPLE stmts which is when
+        we'll later not fold this to a single insert into the vector
+        and we had a single extract originally.  See PR92819.  */
+      if (nelts == 2
+         && refnelts > 2
+         && orig[1] == error_mark_node
+         && !maybe_blend[0])
+       return false;
       tree mask_type, perm_type, conv_src_type;
       perm_type = TREE_TYPE (orig[0]);
       conv_src_type = (nelts == refnelts
-- 
2.16.4

Reply via email to