Hi,

This fixes up r16-5561-g283eb27d5f674b where I allowed nop conversions
for the input operands.  There are several paths through the function
that still require an explicit nop conversion for them.  This patch adds
them.  I hope to have caught everything but I don't find variable naming 
particularly descriptive with op, ref, orig and implicit control flow
depending on orig.  I refactored it locally but guess that's rather
stage 1 material as the diff is complicated due to renaming.

Bootstrapped on x86 and power10.  Regtested on riscv64 and aarch64.

Regards
 Robin

        PR tree-optimization/122855
        PR tree-optimization/122850

gcc/ChangeLog:

        * tree-ssa-forwprop.cc (simplify_vector_constructor): Nop
        convert input if necessary.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/pr122850.c: New test.
        * gcc.dg/vect/pr122855.c: New test.
---
 gcc/testsuite/gcc.dg/vect/pr122850.c | 13 ++++++++
 gcc/testsuite/gcc.dg/vect/pr122855.c | 15 +++++++++
 gcc/tree-ssa-forwprop.cc             | 47 +++++++++++++++++++++++-----
 3 files changed, 67 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr122850.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr122855.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr122850.c 
b/gcc/testsuite/gcc.dg/vect/pr122850.c
new file mode 100644
index 00000000000..4f50aa9660d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr122850.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { x86_64-*-* i?86-*-* } } } */
+/* { dg-additional-options "-O3 -march=haswell -m32" } */
+
+typedef int v2ll __attribute__ ((__vector_size__ (2 * sizeof (int))));
+typedef unsigned int v2ull __attribute__ ((__vector_size__ (2 * sizeof 
(int))));
+typedef __attribute__ ((__vector_size__ (2 * sizeof (short)))) short v2s;
+
+v2ll
+f (v2ull e)
+{
+  v2s c = (v2s) e[0];
+  return (v2ll) {(int) c, 0};
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr122855.c 
b/gcc/testsuite/gcc.dg/vect/pr122855.c
new file mode 100644
index 00000000000..3084d2062a1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr122855.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { x86_64-*-* i?86-*-* } } } */
+/* { dg-additional-options "-O3 -march=haswell" } */
+
+int zoom_x3_weights_0, zoom_x3_j, zoom_x3_pixel2;
+
+void zoom_x3(char *__restrict s, char *__restrict zoom_x3_tmp) {
+  int pixel0 = 0, pixel1 = 0;
+  for (; zoom_x3_j; zoom_x3_j--) {
+    pixel0 += *s++ * zoom_x3_weights_0;
+    pixel1 += *s++ * zoom_x3_weights_0;
+    zoom_x3_pixel2 += *s++ * zoom_x3_weights_0;
+  }
+  *zoom_x3_tmp++ = pixel0 < 0 ? 0 : pixel0 > 255 ? 255 : pixel0;
+  *zoom_x3_tmp = pixel1 < 0 ? 0 : pixel1 > 255 ? 255 : pixel1;
+}
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 00140ce950c..2200fc04918 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -4183,24 +4183,45 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
          /* ???  We can see if we can safely convert to the original
             element type.  */
          converted_orig1 = conv_code != ERROR_MARK;
+         tree target_type = converted_orig1 ? type : perm_type;
+         tree nonconstant_for_splat = one_nonconstant;
+         /* If there's a nop conversion between the target element type and
+            the nonconstant's type, convert it.  */
+         if (!useless_type_conversion_p (TREE_TYPE (target_type),
+                                         TREE_TYPE (one_nonconstant)))
+           nonconstant_for_splat
+             = gimple_build (&stmts, NOP_EXPR, TREE_TYPE (target_type),
+                             one_nonconstant);
          orig[1] = gimple_build_vector_from_val (&stmts, UNKNOWN_LOCATION,
-                                                 converted_orig1
-                                                 ? type : perm_type,
-                                                 one_nonconstant);
+                                                 target_type,
+                                                 nonconstant_for_splat);
        }
       else if (orig[1] == error_mark_node)
        {
          /* ???  See if we can convert the vector to the original type.  */
          converted_orig1 = conv_code != ERROR_MARK;
          unsigned n = converted_orig1 ? nelts : refnelts;
-         tree_vector_builder vec (converted_orig1
-                                  ? type : perm_type, n, 1);
+         tree target_type = converted_orig1 ? type : perm_type;
+         tree_vector_builder vec (target_type, n, 1);
          for (unsigned i = 0; i < n; ++i)
            if (i < nelts && constants[i])
-             vec.quick_push (constants[i]);
+             {
+               tree constant = constants[i];
+               /* If there's a nop conversion, convert the constant.  */
+               if (!useless_type_conversion_p (TREE_TYPE (target_type),
+                                               TREE_TYPE (constant)))
+                 constant = fold_convert (TREE_TYPE (target_type), constant);
+               vec.quick_push (constant);
+             }
            else
-             /* ??? Push a don't-care value.  */
-             vec.quick_push (one_constant);
+             {
+               /* ??? Push a don't-care value.  */
+               tree constant = one_constant;
+               if (!useless_type_conversion_p (TREE_TYPE (target_type),
+                                               TREE_TYPE (constant)))
+                 constant = fold_convert (TREE_TYPE (target_type), constant);
+               vec.quick_push (constant);
+             }
          orig[1] = vec.build ();
        }
       tree blend_op2 = NULL_TREE;
@@ -4224,6 +4245,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
            return false;
          blend_op2 = vec_perm_indices_to_tree (mask_type, indices);
        }
+
+      /* For a real orig[1] (no splat, constant etc.) we might need to
+        nop-convert it.  Do so here.  */
+      if (orig[1] && orig[1] != error_mark_node
+         && !useless_type_conversion_p (perm_type, TREE_TYPE (orig[1]))
+         && tree_nop_conversion_p (TREE_TYPE (perm_type),
+                                   TREE_TYPE (TREE_TYPE (orig[1]))))
+       orig[1] = gimple_build (&stmts, VIEW_CONVERT_EXPR, perm_type,
+                               orig[1]);
+
       tree orig1_for_perm
        = converted_orig1 ? build_zero_cst (perm_type) : orig[1];
       tree res = gimple_build (&stmts, VEC_PERM_EXPR, perm_type,
-- 
2.51.1

Reply via email to