Hi,
This fixes up r16-5561-g283eb27d5f674b where I allowed nop conversions
for the input operands. There are several paths through the function
that still require an explicit nop conversion for them. This patch adds
them. I hope to have caught everything but I don't find variable naming
particularly descriptive with op, ref, orig and implicit control flow
depending on orig. I refactored it locally but guess that's rather
stage 1 material as the diff is complicated due to renaming.
Bootstrapped on x86 and power10. Regtested on riscv64 and aarch64.
Regards
Robin
PR tree-optimization/122855
PR tree-optimization/122850
gcc/ChangeLog:
* tree-ssa-forwprop.cc (simplify_vector_constructor): Nop
convert input if necessary.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/pr122850.c: New test.
* gcc.dg/vect/pr122855.c: New test.
---
gcc/testsuite/gcc.dg/vect/pr122850.c | 13 ++++++++
gcc/testsuite/gcc.dg/vect/pr122855.c | 15 +++++++++
gcc/tree-ssa-forwprop.cc | 47 +++++++++++++++++++++++-----
3 files changed, 67 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr122850.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr122855.c
diff --git a/gcc/testsuite/gcc.dg/vect/pr122850.c
b/gcc/testsuite/gcc.dg/vect/pr122850.c
new file mode 100644
index 00000000000..4f50aa9660d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr122850.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { x86_64-*-* i?86-*-* } } } */
+/* { dg-additional-options "-O3 -march=haswell -m32" } */
+
+typedef int v2ll __attribute__ ((__vector_size__ (2 * sizeof (int))));
+typedef unsigned int v2ull __attribute__ ((__vector_size__ (2 * sizeof
(int))));
+typedef __attribute__ ((__vector_size__ (2 * sizeof (short)))) short v2s;
+
+v2ll
+f (v2ull e)
+{
+ v2s c = (v2s) e[0];
+ return (v2ll) {(int) c, 0};
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr122855.c
b/gcc/testsuite/gcc.dg/vect/pr122855.c
new file mode 100644
index 00000000000..3084d2062a1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr122855.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { x86_64-*-* i?86-*-* } } } */
+/* { dg-additional-options "-O3 -march=haswell" } */
+
+int zoom_x3_weights_0, zoom_x3_j, zoom_x3_pixel2;
+
+void zoom_x3(char *__restrict s, char *__restrict zoom_x3_tmp) {
+ int pixel0 = 0, pixel1 = 0;
+ for (; zoom_x3_j; zoom_x3_j--) {
+ pixel0 += *s++ * zoom_x3_weights_0;
+ pixel1 += *s++ * zoom_x3_weights_0;
+ zoom_x3_pixel2 += *s++ * zoom_x3_weights_0;
+ }
+ *zoom_x3_tmp++ = pixel0 < 0 ? 0 : pixel0 > 255 ? 255 : pixel0;
+ *zoom_x3_tmp = pixel1 < 0 ? 0 : pixel1 > 255 ? 255 : pixel1;
+}
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 00140ce950c..2200fc04918 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -4183,24 +4183,45 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
/* ??? We can see if we can safely convert to the original
element type. */
converted_orig1 = conv_code != ERROR_MARK;
+ tree target_type = converted_orig1 ? type : perm_type;
+ tree nonconstant_for_splat = one_nonconstant;
+ /* If there's a nop conversion between the target element type and
+ the nonconstant's type, convert it. */
+ if (!useless_type_conversion_p (TREE_TYPE (target_type),
+ TREE_TYPE (one_nonconstant)))
+ nonconstant_for_splat
+ = gimple_build (&stmts, NOP_EXPR, TREE_TYPE (target_type),
+ one_nonconstant);
orig[1] = gimple_build_vector_from_val (&stmts, UNKNOWN_LOCATION,
- converted_orig1
- ? type : perm_type,
- one_nonconstant);
+ target_type,
+ nonconstant_for_splat);
}
else if (orig[1] == error_mark_node)
{
/* ??? See if we can convert the vector to the original type. */
converted_orig1 = conv_code != ERROR_MARK;
unsigned n = converted_orig1 ? nelts : refnelts;
- tree_vector_builder vec (converted_orig1
- ? type : perm_type, n, 1);
+ tree target_type = converted_orig1 ? type : perm_type;
+ tree_vector_builder vec (target_type, n, 1);
for (unsigned i = 0; i < n; ++i)
if (i < nelts && constants[i])
- vec.quick_push (constants[i]);
+ {
+ tree constant = constants[i];
+ /* If there's a nop conversion, convert the constant. */
+ if (!useless_type_conversion_p (TREE_TYPE (target_type),
+ TREE_TYPE (constant)))
+ constant = fold_convert (TREE_TYPE (target_type), constant);
+ vec.quick_push (constant);
+ }
else
- /* ??? Push a don't-care value. */
- vec.quick_push (one_constant);
+ {
+ /* ??? Push a don't-care value. */
+ tree constant = one_constant;
+ if (!useless_type_conversion_p (TREE_TYPE (target_type),
+ TREE_TYPE (constant)))
+ constant = fold_convert (TREE_TYPE (target_type), constant);
+ vec.quick_push (constant);
+ }
orig[1] = vec.build ();
}
tree blend_op2 = NULL_TREE;
@@ -4224,6 +4245,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
return false;
blend_op2 = vec_perm_indices_to_tree (mask_type, indices);
}
+
+ /* For a real orig[1] (no splat, constant etc.) we might need to
+ nop-convert it. Do so here. */
+ if (orig[1] && orig[1] != error_mark_node
+ && !useless_type_conversion_p (perm_type, TREE_TYPE (orig[1]))
+ && tree_nop_conversion_p (TREE_TYPE (perm_type),
+ TREE_TYPE (TREE_TYPE (orig[1]))))
+ orig[1] = gimple_build (&stmts, VIEW_CONVERT_EXPR, perm_type,
+ orig[1]);
+
tree orig1_for_perm
= converted_orig1 ? build_zero_cst (perm_type) : orig[1];
tree res = gimple_build (&stmts, VEC_PERM_EXPR, perm_type,
--
2.51.1