On Wed, May 09, 2018 at 04:53:19PM +0200, Allan Sandfeld Jensen wrote:
> > > @@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator
> > > *gsi)> 
> > >    elem_type = TREE_TYPE (type);
> > >    elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
> > > 
> > > -  vec_perm_builder sel (nelts, nelts, 1);
> > > -  orig = NULL;
> > > +  vec_perm_builder sel (nelts, 2, nelts);
> > 
> > Why this change?  I admit the vec_parm_builder arguments are confusing, but
> > I think the second times third is the number of how many indices are being
> > pushed into the vector, so I think (nelts, nelts, 1) is right.
> > 
> I had the impression it was what was selected from. In any case, I changed it 
> because without I get crash when vec_perm_indices is created later with a 
> possible nparms of 2.

The documentation is apparently in vector-builder.h:
   This class is a wrapper around auto_vec<T> for building vectors of T.
   It aims to encode each vector as npatterns interleaved patterns,
   where each pattern represents a sequence:

     { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }

   The first three elements in each pattern provide enough information
   to derive the other elements.  If all patterns have a STEP of zero,
   we only need to encode the first two elements in each pattern.
   If BASE1 is also equal to BASE0 for all patterns, we only need to
   encode the first element in each pattern.  The number of encoded
   elements per pattern is given by nelts_per_pattern.

   The class can be used in two ways:

   1. It can be used to build a full image of the vector, which is then
      canonicalized by finalize ().  In this case npatterns is initially
      the number of elements in the vector and nelts_per_pattern is
      initially 1.

   2. It can be used to build a vector that already has a known encoding.
      This is preferred since it is more efficient and copes with
      variable-length vectors.  finalize () then canonicalizes the encoding
      to a simpler form if possible.

As the vector is constant width and we are building the full image of the
vector, the right arguments are (nelts, nelts, 1) as per 1. above, and the
finalization can perhaps change it to something more compact.

> > (and sorry for missing your patch first, the PR wasn't ASSIGNED and there
> > was no link to gcc-patches for it).
> > 
> It is okay. You are welcome to take it over. I am not a regular gcc 
> contributor and thus not well-versed in the details, only the basic logic of 
> how things work.

Ok, here is my version of the patch.  Bootstrapped/regtested on x86_64-linux
and i686-linux, ok for trunk?

2018-05-10  Allan Sandfeld Jensen  <allan.jen...@qt.io>
            Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/85692
        * tree-ssa-forwprop.c (simplify_vector_constructor): Try two
        source permute as well.

        * gcc.target/i386/pr85692.c: New test.

--- gcc/tree-ssa-forwprop.c.jj  2018-05-08 18:16:36.866614130 +0200
+++ gcc/tree-ssa-forwprop.c     2018-05-09 20:44:32.621900540 +0200
@@ -2004,7 +2004,7 @@ simplify_vector_constructor (gimple_stmt
 {
   gimple *stmt = gsi_stmt (*gsi);
   gimple *def_stmt;
-  tree op, op2, orig, type, elem_type;
+  tree op, op2, orig[2], type, elem_type;
   unsigned elem_size, i;
   unsigned HOST_WIDE_INT nelts;
   enum tree_code code, conv_code;
@@ -2023,7 +2023,8 @@ simplify_vector_constructor (gimple_stmt
   elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
 
   vec_perm_builder sel (nelts, nelts, 1);
-  orig = NULL;
+  orig[0] = NULL;
+  orig[1] = NULL;
   conv_code = ERROR_MARK;
   maybe_ident = true;
   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
@@ -2063,25 +2064,35 @@ simplify_vector_constructor (gimple_stmt
        return false;
       op1 = gimple_assign_rhs1 (def_stmt);
       ref = TREE_OPERAND (op1, 0);
-      if (orig)
+      unsigned int j;
+      for (j = 0; j < 2; ++j)
        {
-         if (ref != orig)
-           return false;
-       }
-      else
-       {
-         if (TREE_CODE (ref) != SSA_NAME)
-           return false;
-         if (! VECTOR_TYPE_P (TREE_TYPE (ref))
-             || ! useless_type_conversion_p (TREE_TYPE (op1),
-                                             TREE_TYPE (TREE_TYPE (ref))))
-           return false;
-         orig = ref;
+         if (!orig[j])
+           {
+             if (TREE_CODE (ref) != SSA_NAME)
+               return false;
+             if (! VECTOR_TYPE_P (TREE_TYPE (ref))
+                 || ! useless_type_conversion_p (TREE_TYPE (op1),
+                                                 TREE_TYPE (TREE_TYPE (ref))))
+               return false;
+             if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]),
+                                                  TREE_TYPE (ref)))
+               return false;
+             orig[j] = ref;
+             break;
+           }
+         else if (ref == orig[j])
+           break;
        }
+      if (j == 2)
+       return false;
+
       unsigned int elt;
       if (maybe_ne (bit_field_size (op1), elem_size)
          || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt))
        return false;
+      if (j)
+       elt += nelts;
       if (elt != i)
        maybe_ident = false;
       sel.quick_push (elt);
@@ -2089,14 +2100,15 @@ simplify_vector_constructor (gimple_stmt
   if (i < nelts)
     return false;
 
-  if (! VECTOR_TYPE_P (TREE_TYPE (orig))
+  if (! VECTOR_TYPE_P (TREE_TYPE (orig[0]))
       || maybe_ne (TYPE_VECTOR_SUBPARTS (type),
-                  TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig))))
+                  TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0]))))
     return false;
 
   tree tem;
   if (conv_code != ERROR_MARK
-      && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig),
+      && (! supportable_convert_operation (conv_code, type,
+                                          TREE_TYPE (orig[0]),
                                           &tem, &conv_code)
          || conv_code == CALL_EXPR))
     return false;
@@ -2104,16 +2116,16 @@ simplify_vector_constructor (gimple_stmt
   if (maybe_ident)
     {
       if (conv_code == ERROR_MARK)
-       gimple_assign_set_rhs_from_tree (gsi, orig);
+       gimple_assign_set_rhs_from_tree (gsi, orig[0]);
       else
-       gimple_assign_set_rhs_with_ops (gsi, conv_code, orig,
+       gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0],
                                        NULL_TREE, NULL_TREE);
     }
   else
     {
       tree mask_type;
 
-      vec_perm_indices indices (sel, 1, nelts);
+      vec_perm_indices indices (sel, orig[1] ? 2 : 1, nelts);
       if (!can_vec_perm_const_p (TYPE_MODE (type), indices))
        return false;
       mask_type
@@ -2124,16 +2136,19 @@ simplify_vector_constructor (gimple_stmt
                       GET_MODE_SIZE (TYPE_MODE (type))))
        return false;
       op2 = vec_perm_indices_to_tree (mask_type, indices);
+      if (!orig[1])
+       orig[1] = orig[0];
       if (conv_code == ERROR_MARK)
-       gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig, orig, op2);
+       gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0],
+                                       orig[1], op2);
       else
        {
          gimple *perm
-           = gimple_build_assign (make_ssa_name (TREE_TYPE (orig)),
-                                  VEC_PERM_EXPR, orig, orig, op2);
-         orig = gimple_assign_lhs (perm);
+           = gimple_build_assign (make_ssa_name (TREE_TYPE (orig[0])),
+                                  VEC_PERM_EXPR, orig[0], orig[1], op2);
+         orig[0] = gimple_assign_lhs (perm);
          gsi_insert_before (gsi, perm, GSI_SAME_STMT);
-         gimple_assign_set_rhs_with_ops (gsi, conv_code, orig,
+         gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0],
                                          NULL_TREE, NULL_TREE);
        }
     }
--- gcc/testsuite/gcc.target/i386/pr85692.c.jj  2018-05-09 20:44:37.153904405 
+0200
+++ gcc/testsuite/gcc.target/i386/pr85692.c     2018-05-09 20:44:37.153904405 
+0200
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+/* { dg-final { scan-assembler "unpcklps" } } */
+/* { dg-final { scan-assembler "blendps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+
+typedef float v4sf __attribute__ ((vector_size (16)));
+
+v4sf unpcklps(v4sf a, v4sf b)
+{
+    return (v4sf){a[0],b[0],a[1],b[1]};
+}
+
+v4sf blendps(v4sf a, v4sf b)
+{
+    return (v4sf){a[0],b[1],a[2],b[3]};
+}


        Jakub

Reply via email to