On Dienstag, 8. Mai 2018 12:42:33 CEST Richard Biener wrote:
> On Tue, May 8, 2018 at 12:37 PM, Allan Sandfeld Jensen
> 
> <li...@carewolf.com> wrote:
> > I have tried to fix PR85692 that I opened.
> 
> Please add a testcase as well.  It also helps if you shortly tell what
> the patch does
> in your mail.
> 
Okay. I have updated the patch with a test-case based on my motivating 
examples. The patch just extends patching a vector construction to not just a 
single source permute instruction, but also a two source permute instruction.
commit 15c0f6a933d60b085416a59221851b604b955958
Author: Allan Sandfeld Jensen <allan.jen...@qt.io>
Date:   Tue May 8 13:16:18 2018 +0200

    Try two source permute for vector construction
    
    simplify_vector_constructor() was detecting when vector construction could
    be implemented as a single source permute, but was not detecting when
    it could be implemented as a double source permute. This patch adds the
    second case.
    
    2018-05-08 Allan Sandfeld Jensen <allan.jen...@qt.io>
    
    gcc/
    
        PR tree-optimization/85692
        * tree-ssa-forwprop.c (simplify_vector_constructor): Try two
        source permute as well.
    
    gcc/testsuite
    
        * gcc.target/i386/pr85692.c: Test two simply constructions are
        detected as permute instructions.

diff --git a/gcc/testsuite/gcc.target/i386/pr85692.c b/gcc/testsuite/gcc.target/i386/pr85692.c
new file mode 100644
index 00000000000..322c1050161
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85692.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse4.1" } */
+/* { dg-final { scan-assembler "unpcklps" } } */
+/* { dg-final { scan-assembler "blendps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+
+typedef float v4sf __attribute__ ((vector_size (16)));
+
+v4sf unpcklps(v4sf a, v4sf b)
+{
+    return v4sf{a[0],b[0],a[1],b[1]};
+}
+
+v4sf blendps(v4sf a, v4sf b)
+{
+    return v4sf{a[0],b[1],a[2],b[3]};
+}
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 58ec6b47a5b..fbee8064160 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -2004,7 +2004,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 {
   gimple *stmt = gsi_stmt (*gsi);
   gimple *def_stmt;
-  tree op, op2, orig, type, elem_type;
+  tree op, op2, orig1, orig2, type, elem_type;
   unsigned elem_size, i;
   unsigned HOST_WIDE_INT nelts;
   enum tree_code code, conv_code;
@@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   elem_type = TREE_TYPE (type);
   elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
 
-  vec_perm_builder sel (nelts, nelts, 1);
-  orig = NULL;
+  vec_perm_builder sel (nelts, 2, nelts);
+  orig1 = NULL;
+  orig2 = NULL;
   conv_code = ERROR_MARK;
   maybe_ident = true;
   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
@@ -2063,10 +2064,26 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 	return false;
       op1 = gimple_assign_rhs1 (def_stmt);
       ref = TREE_OPERAND (op1, 0);
-      if (orig)
+      if (orig1)
 	{
-	  if (ref != orig)
-	    return false;
+	  if (ref == orig1 || orig2)
+	    {
+	      if (ref != orig1 && ref != orig2)
+	        return false;
+	    }
+	  else
+	    {
+	      if (TREE_CODE (ref) != SSA_NAME)
+		return false;
+	      if (! VECTOR_TYPE_P (TREE_TYPE (ref))
+		  || ! useless_type_conversion_p (TREE_TYPE (op1),
+						  TREE_TYPE (TREE_TYPE (ref))))
+		return false;
+	      if (TREE_TYPE (orig1) != TREE_TYPE (ref))
+		return false;
+	      orig2 = ref;
+	      maybe_ident = false;
+	  }
 	}
       else
 	{
@@ -2076,12 +2093,14 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 	      || ! useless_type_conversion_p (TREE_TYPE (op1),
 					      TREE_TYPE (TREE_TYPE (ref))))
 	    return false;
-	  orig = ref;
+	  orig1 = ref;
 	}
       unsigned int elt;
       if (maybe_ne (bit_field_size (op1), elem_size)
 	  || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt))
 	return false;
+      if (orig2 && ref == orig2)
+	elt += nelts;
       if (elt != i)
 	maybe_ident = false;
       sel.quick_push (elt);
@@ -2089,14 +2108,17 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   if (i < nelts)
     return false;
 
-  if (! VECTOR_TYPE_P (TREE_TYPE (orig))
+  if (! VECTOR_TYPE_P (TREE_TYPE (orig1))
       || maybe_ne (TYPE_VECTOR_SUBPARTS (type),
-		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig))))
+		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig1))))
     return false;
 
+  if (!orig2)
+    orig2 = orig1;
+
   tree tem;
   if (conv_code != ERROR_MARK
-      && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig),
+      && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig1),
 					   &tem, &conv_code)
 	  || conv_code == CALL_EXPR))
     return false;
@@ -2104,16 +2126,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   if (maybe_ident)
     {
       if (conv_code == ERROR_MARK)
-	gimple_assign_set_rhs_from_tree (gsi, orig);
+	gimple_assign_set_rhs_from_tree (gsi, orig1);
       else
-	gimple_assign_set_rhs_with_ops (gsi, conv_code, orig,
+	gimple_assign_set_rhs_with_ops (gsi, conv_code, orig1,
 					NULL_TREE, NULL_TREE);
     }
   else
     {
       tree mask_type;
 
-      vec_perm_indices indices (sel, 1, nelts);
+      vec_perm_indices indices (sel, 2, nelts);
       if (!can_vec_perm_const_p (TYPE_MODE (type), indices))
 	return false;
       mask_type
@@ -2125,15 +2147,14 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 	return false;
       op2 = vec_perm_indices_to_tree (mask_type, indices);
       if (conv_code == ERROR_MARK)
-	gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig, orig, op2);
+	gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig1, orig2, op2);
       else
 	{
 	  gimple *perm
-	    = gimple_build_assign (make_ssa_name (TREE_TYPE (orig)),
-				   VEC_PERM_EXPR, orig, orig, op2);
-	  orig = gimple_assign_lhs (perm);
+	    = gimple_build_assign (make_ssa_name (TREE_TYPE (orig1)),
+				   VEC_PERM_EXPR, orig1, orig2, op2);
 	  gsi_insert_before (gsi, perm, GSI_SAME_STMT);
-	  gimple_assign_set_rhs_with_ops (gsi, conv_code, orig,
+	  gimple_assign_set_rhs_with_ops (gsi, conv_code, gimple_assign_lhs (perm),
 					  NULL_TREE, NULL_TREE);
 	}
     }

Reply via email to