Re: [PATCH] Use two source permute for vector initialization (PR 85692, take 2)

2018-05-11 Thread Richard Biener
On Thu, 10 May 2018, Jakub Jelinek wrote:

> On Wed, May 09, 2018 at 04:53:19PM +0200, Allan Sandfeld Jensen wrote:
> > > > @@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator
> > > > *gsi)> 
> > > >elem_type = TREE_TYPE (type);
> > > >elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
> > > > 
> > > > -  vec_perm_builder sel (nelts, nelts, 1);
> > > > -  orig = NULL;
> > > > +  vec_perm_builder sel (nelts, 2, nelts);
> > > 
> > > Why this change?  I admit the vec_parm_builder arguments are confusing, 
> > > but
> > > I think the second times third is the number of how many indices are being
> > > pushed into the vector, so I think (nelts, nelts, 1) is right.
> > > 
> > I had the impression it was what was selected from. In any case, I changed 
> > it 
> > because without I get crash when vec_perm_indices is created later with a 
> > possible nparms of 2.
> 
> The documentation is apparently in vector-builder.h:
>This class is a wrapper around auto_vec for building vectors of T.
>It aims to encode each vector as npatterns interleaved patterns,
>where each pattern represents a sequence:
> 
>  { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }
> 
>The first three elements in each pattern provide enough information
>to derive the other elements.  If all patterns have a STEP of zero,
>we only need to encode the first two elements in each pattern.
>If BASE1 is also equal to BASE0 for all patterns, we only need to
>encode the first element in each pattern.  The number of encoded
>elements per pattern is given by nelts_per_pattern.
> 
>The class can be used in two ways:
> 
>1. It can be used to build a full image of the vector, which is then
>   canonicalized by finalize ().  In this case npatterns is initially
>   the number of elements in the vector and nelts_per_pattern is
>   initially 1.
> 
>2. It can be used to build a vector that already has a known encoding.
>   This is preferred since it is more efficient and copes with
>   variable-length vectors.  finalize () then canonicalizes the encoding
>   to a simpler form if possible.
> 
> As the vector is constant width and we are building the full image of the
> vector, the right arguments are (nelts, nelts, 1) as per 1. above, and the
> finalization can perhaps change it to something more compact.
> 
> > > (and sorry for missing your patch first, the PR wasn't ASSIGNED and there
> > > was no link to gcc-patches for it).
> > > 
> > It is okay. You are welcome to take it over. I am not a regular gcc 
> > contributor and thus not well-versed in the details, only the basic logic 
> > of 
> > how things work.
> 
> Ok, here is my version of the patch.  Bootstrapped/regtested on x86_64-linux
> and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2018-05-10  Allan Sandfeld Jensen  
>   Jakub Jelinek  
> 
>   PR tree-optimization/85692
>   * tree-ssa-forwprop.c (simplify_vector_constructor): Try two
>   source permute as well.
> 
>   * gcc.target/i386/pr85692.c: New test.
> 
> --- gcc/tree-ssa-forwprop.c.jj2018-05-08 18:16:36.866614130 +0200
> +++ gcc/tree-ssa-forwprop.c   2018-05-09 20:44:32.621900540 +0200
> @@ -2004,7 +2004,7 @@ simplify_vector_constructor (gimple_stmt
>  {
>gimple *stmt = gsi_stmt (*gsi);
>gimple *def_stmt;
> -  tree op, op2, orig, type, elem_type;
> +  tree op, op2, orig[2], type, elem_type;
>unsigned elem_size, i;
>unsigned HOST_WIDE_INT nelts;
>enum tree_code code, conv_code;
> @@ -2023,7 +2023,8 @@ simplify_vector_constructor (gimple_stmt
>elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
>  
>vec_perm_builder sel (nelts, nelts, 1);
> -  orig = NULL;
> +  orig[0] = NULL;
> +  orig[1] = NULL;
>conv_code = ERROR_MARK;
>maybe_ident = true;
>FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
> @@ -2063,25 +2064,35 @@ simplify_vector_constructor (gimple_stmt
>   return false;
>op1 = gimple_assign_rhs1 (def_stmt);
>ref = TREE_OPERAND (op1, 0);
> -  if (orig)
> +  unsigned int j;
> +  for (j = 0; j < 2; ++j)
>   {
> -   if (ref != orig)
> - return false;
> - }
> -  else
> - {
> -   if (TREE_CODE (ref) != SSA_NAME)
> - return false;
> -   if (! VECTOR_TYPE_P (TREE_TYPE (ref))
> -   || ! useless_type_conversion_p (TREE_TYPE (op1),
> -   TREE_TYPE (TREE_TYPE (ref
> - return false;
> -   orig = ref;
> +   if (!orig[j])
> + {
> +   if (TREE_CODE (ref) != SSA_NAME)
> + return false;
> +   if (! VECTOR_TYPE_P (TREE_TYPE (ref))
> +   || ! useless_type_conversion_p (TREE_TYPE (op1),
> +   TREE_TYPE (TREE_TYPE (ref
> + return false;
> +   if (j && 

Re: [PATCH] Use two source permute for vector initialization (PR 85692, take 2)

2018-05-10 Thread Allan Sandfeld Jensen
On Donnerstag, 10. Mai 2018 09:57:22 CEST Jakub Jelinek wrote:
> On Wed, May 09, 2018 at 04:53:19PM +0200, Allan Sandfeld Jensen wrote:
> > > > @@ -2022,8 +2022,9 @@ simplify_vector_constructor
> > > > (gimple_stmt_iterator
> > > > *gsi)>
> > > > 
> > > >elem_type = TREE_TYPE (type);
> > > >elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
> > > > 
> > > > -  vec_perm_builder sel (nelts, nelts, 1);
> > > > -  orig = NULL;
> > > > +  vec_perm_builder sel (nelts, 2, nelts);
> > > 
> > > Why this change?  I admit the vec_parm_builder arguments are confusing,
> > > but
> > > I think the second times third is the number of how many indices are
> > > being
> > > pushed into the vector, so I think (nelts, nelts, 1) is right.
> > 
> > I had the impression it was what was selected from. In any case, I changed
> > it because without I get crash when vec_perm_indices is created later
> > with a possible nparms of 2.
> 
> The documentation is apparently in vector-builder.h:
>This class is a wrapper around auto_vec for building vectors of T.
>It aims to encode each vector as npatterns interleaved patterns,
>where each pattern represents a sequence:
> 
>  { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }
> 
>The first three elements in each pattern provide enough information
>to derive the other elements.  If all patterns have a STEP of zero,
>we only need to encode the first two elements in each pattern.
>If BASE1 is also equal to BASE0 for all patterns, we only need to
>encode the first element in each pattern.  The number of encoded
>elements per pattern is given by nelts_per_pattern.
> 
>The class can be used in two ways:
> 
>1. It can be used to build a full image of the vector, which is then
>   canonicalized by finalize ().  In this case npatterns is initially
>   the number of elements in the vector and nelts_per_pattern is
>   initially 1.
> 
>2. It can be used to build a vector that already has a known encoding.
>   This is preferred since it is more efficient and copes with
>   variable-length vectors.  finalize () then canonicalizes the encoding
>   to a simpler form if possible.
> 
> As the vector is constant width and we are building the full image of the
> vector, the right arguments are (nelts, nelts, 1) as per 1. above, and the
> finalization can perhaps change it to something more compact.
> 
> > > (and sorry for missing your patch first, the PR wasn't ASSIGNED and
> > > there
> > > was no link to gcc-patches for it).
> > 
> > It is okay. You are welcome to take it over. I am not a regular gcc
> > contributor and thus not well-versed in the details, only the basic logic
> > of how things work.
> 
> Ok, here is my version of the patch.  Bootstrapped/regtested on x86_64-linux
> and i686-linux, ok for trunk?
> 
Looks good to me if that counts for anything.

'Allan




[PATCH] Use two source permute for vector initialization (PR 85692, take 2)

2018-05-10 Thread Jakub Jelinek
On Wed, May 09, 2018 at 04:53:19PM +0200, Allan Sandfeld Jensen wrote:
> > > @@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator
> > > *gsi)> 
> > >elem_type = TREE_TYPE (type);
> > >elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
> > > 
> > > -  vec_perm_builder sel (nelts, nelts, 1);
> > > -  orig = NULL;
> > > +  vec_perm_builder sel (nelts, 2, nelts);
> > 
> > Why this change?  I admit the vec_parm_builder arguments are confusing, but
> > I think the second times third is the number of how many indices are being
> > pushed into the vector, so I think (nelts, nelts, 1) is right.
> > 
> I had the impression it was what was selected from. In any case, I changed it 
> because without I get crash when vec_perm_indices is created later with a 
> possible nparms of 2.

The documentation is apparently in vector-builder.h:
   This class is a wrapper around auto_vec for building vectors of T.
   It aims to encode each vector as npatterns interleaved patterns,
   where each pattern represents a sequence:

 { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }

   The first three elements in each pattern provide enough information
   to derive the other elements.  If all patterns have a STEP of zero,
   we only need to encode the first two elements in each pattern.
   If BASE1 is also equal to BASE0 for all patterns, we only need to
   encode the first element in each pattern.  The number of encoded
   elements per pattern is given by nelts_per_pattern.

   The class can be used in two ways:

   1. It can be used to build a full image of the vector, which is then
  canonicalized by finalize ().  In this case npatterns is initially
  the number of elements in the vector and nelts_per_pattern is
  initially 1.

   2. It can be used to build a vector that already has a known encoding.
  This is preferred since it is more efficient and copes with
  variable-length vectors.  finalize () then canonicalizes the encoding
  to a simpler form if possible.

As the vector is constant width and we are building the full image of the
vector, the right arguments are (nelts, nelts, 1) as per 1. above, and the
finalization can perhaps change it to something more compact.

> > (and sorry for missing your patch first, the PR wasn't ASSIGNED and there
> > was no link to gcc-patches for it).
> > 
> It is okay. You are welcome to take it over. I am not a regular gcc 
> contributor and thus not well-versed in the details, only the basic logic of 
> how things work.

Ok, here is my version of the patch.  Bootstrapped/regtested on x86_64-linux
and i686-linux, ok for trunk?

2018-05-10  Allan Sandfeld Jensen  
Jakub Jelinek  

PR tree-optimization/85692
* tree-ssa-forwprop.c (simplify_vector_constructor): Try two
source permute as well.

* gcc.target/i386/pr85692.c: New test.

--- gcc/tree-ssa-forwprop.c.jj  2018-05-08 18:16:36.866614130 +0200
+++ gcc/tree-ssa-forwprop.c 2018-05-09 20:44:32.621900540 +0200
@@ -2004,7 +2004,7 @@ simplify_vector_constructor (gimple_stmt
 {
   gimple *stmt = gsi_stmt (*gsi);
   gimple *def_stmt;
-  tree op, op2, orig, type, elem_type;
+  tree op, op2, orig[2], type, elem_type;
   unsigned elem_size, i;
   unsigned HOST_WIDE_INT nelts;
   enum tree_code code, conv_code;
@@ -2023,7 +2023,8 @@ simplify_vector_constructor (gimple_stmt
   elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
 
   vec_perm_builder sel (nelts, nelts, 1);
-  orig = NULL;
+  orig[0] = NULL;
+  orig[1] = NULL;
   conv_code = ERROR_MARK;
   maybe_ident = true;
   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
@@ -2063,25 +2064,35 @@ simplify_vector_constructor (gimple_stmt
return false;
   op1 = gimple_assign_rhs1 (def_stmt);
   ref = TREE_OPERAND (op1, 0);
-  if (orig)
+  unsigned int j;
+  for (j = 0; j < 2; ++j)
{
- if (ref != orig)
-   return false;
-   }
-  else
-   {
- if (TREE_CODE (ref) != SSA_NAME)
-   return false;
- if (! VECTOR_TYPE_P (TREE_TYPE (ref))
- || ! useless_type_conversion_p (TREE_TYPE (op1),
- TREE_TYPE (TREE_TYPE (ref
-   return false;
- orig = ref;
+ if (!orig[j])
+   {
+ if (TREE_CODE (ref) != SSA_NAME)
+   return false;
+ if (! VECTOR_TYPE_P (TREE_TYPE (ref))
+ || ! useless_type_conversion_p (TREE_TYPE (op1),
+ TREE_TYPE (TREE_TYPE (ref
+   return false;
+ if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]),
+  TREE_TYPE (ref)))
+   return false;
+ orig[j] = ref;
+ break;
+   }
+ else if (ref == orig[j])
+   break;
}
+  

Re: [Patch] Use two source permute for vector initialization (PR 85692)

2018-05-09 Thread Allan Sandfeld Jensen
On Mittwoch, 9. Mai 2018 11:08:02 CEST Jakub Jelinek wrote:
> On Tue, May 08, 2018 at 01:25:35PM +0200, Allan Sandfeld Jensen wrote:
> > 2018-05-08 Allan Sandfeld Jensen 
> 
> 2 spaces between date and name and two spaces between name and email
> address.
> 
> > gcc/
> > 
> > PR tree-optimization/85692
> > * tree-ssa-forwprop.c (simplify_vector_constructor): Try two
> > source permute as well.
> > 
> > gcc/testsuite
> > 
> > * gcc.target/i386/pr85692.c: Test two simply constructions are
> > detected as permute instructions.
> 
> Just
>   * gcc.target/i386/pr85692.c: New test.
> 
> > diff --git a/gcc/testsuite/gcc.target/i386/pr85692.c
> > b/gcc/testsuite/gcc.target/i386/pr85692.c new file mode 100644
> > index 000..322c1050161
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr85692.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -msse4.1" } */
> > +/* { dg-final { scan-assembler "unpcklps" } } */
> > +/* { dg-final { scan-assembler "blendps" } } */
> > +/* { dg-final { scan-assembler-not "shufps" } } */
> > +/* { dg-final { scan-assembler-not "unpckhps" } } */
> > +
> > +typedef float v4sf __attribute__ ((vector_size (16)));
> > +
> > +v4sf unpcklps(v4sf a, v4sf b)
> > +{
> > +return v4sf{a[0],b[0],a[1],b[1]};
> 
> Though, not really sure if this has been tested at all.
> The above is valid only in C++ (and only C++11 and above), while the
> test is compiled as C and thus has to fail.
>
Yes, I thought it had been tested, but it wasn't. It also needs to change the 
first line to be a compile and not run test.

> > @@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator
> > *gsi)> 
> >elem_type = TREE_TYPE (type);
> >elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
> > 
> > -  vec_perm_builder sel (nelts, nelts, 1);
> > -  orig = NULL;
> > +  vec_perm_builder sel (nelts, 2, nelts);
> 
> Why this change?  I admit the vec_parm_builder arguments are confusing, but
> I think the second times third is the number of how many indices are being
> pushed into the vector, so I think (nelts, nelts, 1) is right.
> 
I had the impression it was what was selected from. In any case, I changed it 
because without I get crash when vec_perm_indices is created later with a 
possible nparms of 2.

> > @@ -2063,10 +2064,26 @@ simplify_vector_constructor (gimple_stmt_iterator
> > *gsi)> 
> > return false;
> > 
> >op1 = gimple_assign_rhs1 (def_stmt);
> >ref = TREE_OPERAND (op1, 0);
> > 
> > -  if (orig)
> > +  if (orig1)
> > 
> > {
> > 
> > - if (ref != orig)
> > -   return false;
> > + if (ref == orig1 || orig2)
> > +   {
> > + if (ref != orig1 && ref != orig2)
> > +   return false;
> > +   }
> > + else
> > +   {
> > + if (TREE_CODE (ref) != SSA_NAME)
> > +   return false;
> > + if (! VECTOR_TYPE_P (TREE_TYPE (ref))
> > + || ! useless_type_conversion_p (TREE_TYPE (op1),
> > + TREE_TYPE (TREE_TYPE (ref
> > +   return false;
> > + if (TREE_TYPE (orig1) != TREE_TYPE (ref))
> > +   return false;
> 
> I think even different type is acceptable here, as long as its conversion to
> orig1's type is useless.
> 
> Furthermore, I think the way you wrote the patch with 2 variables rather
> than an array of 2 elements means too much duplication, this else block
> is a duplication of the else block below.  See the patch I've added to the
> PR
It seemed to me it was clearer like this, but I can see your point.

> (and sorry for missing your patch first, the PR wasn't ASSIGNED and there
> was no link to gcc-patches for it).
> 
It is okay. You are welcome to take it over. I am not a regular gcc 
contributor and thus not well-versed in the details, only the basic logic of 
how things work.

'Allan





Re: [Patch] Use two source permute for vector initialization (PR 85692)

2018-05-09 Thread Jakub Jelinek
On Tue, May 08, 2018 at 01:25:35PM +0200, Allan Sandfeld Jensen wrote:
> 2018-05-08 Allan Sandfeld Jensen 

2 spaces between date and name and two spaces between name and email
address.

> gcc/
> 
> PR tree-optimization/85692
> * tree-ssa-forwprop.c (simplify_vector_constructor): Try two
> source permute as well.
> 
> gcc/testsuite
> 
> * gcc.target/i386/pr85692.c: Test two simply constructions are
> detected as permute instructions.

Just
* gcc.target/i386/pr85692.c: New test.
> 
> diff --git a/gcc/testsuite/gcc.target/i386/pr85692.c 
> b/gcc/testsuite/gcc.target/i386/pr85692.c
> new file mode 100644
> index 000..322c1050161
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr85692.c
> @@ -0,0 +1,18 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -msse4.1" } */
> +/* { dg-final { scan-assembler "unpcklps" } } */
> +/* { dg-final { scan-assembler "blendps" } } */
> +/* { dg-final { scan-assembler-not "shufps" } } */
> +/* { dg-final { scan-assembler-not "unpckhps" } } */
> +
> +typedef float v4sf __attribute__ ((vector_size (16)));
> +
> +v4sf unpcklps(v4sf a, v4sf b)
> +{
> +return v4sf{a[0],b[0],a[1],b[1]};

Though, not really sure if this has been tested at all.
The above is valid only in C++ (and only C++11 and above), while the
test is compiled as C and thus has to fail.

In C one should use e.g.
return (v4sf){a[0],b[0],a[1],b[1]};
instead (i.e. a compound literal).

> @@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
>elem_type = TREE_TYPE (type);
>elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
>  
> -  vec_perm_builder sel (nelts, nelts, 1);
> -  orig = NULL;
> +  vec_perm_builder sel (nelts, 2, nelts);

Why this change?  I admit the vec_parm_builder arguments are confusing, but
I think the second times third is the number of how many indices are being
pushed into the vector, so I think (nelts, nelts, 1) is right.

> @@ -2063,10 +2064,26 @@ simplify_vector_constructor (gimple_stmt_iterator 
> *gsi)
>   return false;
>op1 = gimple_assign_rhs1 (def_stmt);
>ref = TREE_OPERAND (op1, 0);
> -  if (orig)
> +  if (orig1)
>   {
> -   if (ref != orig)
> - return false;
> +   if (ref == orig1 || orig2)
> + {
> +   if (ref != orig1 && ref != orig2)
> + return false;
> + }
> +   else
> + {
> +   if (TREE_CODE (ref) != SSA_NAME)
> + return false;
> +   if (! VECTOR_TYPE_P (TREE_TYPE (ref))
> +   || ! useless_type_conversion_p (TREE_TYPE (op1),
> +   TREE_TYPE (TREE_TYPE (ref
> + return false;
> +   if (TREE_TYPE (orig1) != TREE_TYPE (ref))
> + return false;

I think even different type is acceptable here, as long as its conversion to
orig1's type is useless.

Furthermore, I think the way you wrote the patch with 2 variables rather
than an array of 2 elements means too much duplication, this else block
is a duplication of the else block below.  See the patch I've added to the
PR (and sorry for missing your patch first, the PR wasn't ASSIGNED and there
was no link to gcc-patches for it).

> @@ -2125,15 +2147,14 @@ simplify_vector_constructor (gimple_stmt_iterator 
> *gsi)
>   return false;
>op2 = vec_perm_indices_to_tree (mask_type, indices);
>if (conv_code == ERROR_MARK)
> - gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig, orig, op2);
> + gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig1, orig2, op2);
>else
>   {
> gimple *perm
> - = gimple_build_assign (make_ssa_name (TREE_TYPE (orig)),
> -VEC_PERM_EXPR, orig, orig, op2);
> -   orig = gimple_assign_lhs (perm);
> + = gimple_build_assign (make_ssa_name (TREE_TYPE (orig1)),
> +VEC_PERM_EXPR, orig1, orig2, op2);
> gsi_insert_before (gsi, perm, GSI_SAME_STMT);
> -   gimple_assign_set_rhs_with_ops (gsi, conv_code, orig,
> +   gimple_assign_set_rhs_with_ops (gsi, conv_code, gimple_assign_lhs 
> (perm),

Too long line.

Jakub


Re: [Patch] Use two source permute for vector initialization (PR 85692)

2018-05-08 Thread Allan Sandfeld Jensen
On Dienstag, 8. Mai 2018 12:42:33 CEST Richard Biener wrote:
> On Tue, May 8, 2018 at 12:37 PM, Allan Sandfeld Jensen
> 
>  wrote:
> > I have tried to fix PR85692 that I opened.
> 
> Please add a testcase as well.  It also helps if you shortly tell what
> the patch does
> in your mail.
> 
Okay. I have updated the patch with a test-case based on my motivating 
examples. The patch just extends patching a vector construction to not just a 
single source permute instruction, but also a two source permute instruction.
commit 15c0f6a933d60b085416a59221851b604b955958
Author: Allan Sandfeld Jensen 
Date:   Tue May 8 13:16:18 2018 +0200

Try two source permute for vector construction

simplify_vector_constructor() was detecting when vector construction could
be implemented as a single source permute, but was not detecting when
it could be implemented as a double source permute. This patch adds the
second case.

2018-05-08 Allan Sandfeld Jensen 

gcc/

PR tree-optimization/85692
* tree-ssa-forwprop.c (simplify_vector_constructor): Try two
source permute as well.

gcc/testsuite

* gcc.target/i386/pr85692.c: Test two simply constructions are
detected as permute instructions.

diff --git a/gcc/testsuite/gcc.target/i386/pr85692.c b/gcc/testsuite/gcc.target/i386/pr85692.c
new file mode 100644
index 000..322c1050161
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85692.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse4.1" } */
+/* { dg-final { scan-assembler "unpcklps" } } */
+/* { dg-final { scan-assembler "blendps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+
+typedef float v4sf __attribute__ ((vector_size (16)));
+
+v4sf unpcklps(v4sf a, v4sf b)
+{
+return v4sf{a[0],b[0],a[1],b[1]};
+}
+
+v4sf blendps(v4sf a, v4sf b)
+{
+return v4sf{a[0],b[1],a[2],b[3]};
+}
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 58ec6b47a5b..fbee8064160 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -2004,7 +2004,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 {
   gimple *stmt = gsi_stmt (*gsi);
   gimple *def_stmt;
-  tree op, op2, orig, type, elem_type;
+  tree op, op2, orig1, orig2, type, elem_type;
   unsigned elem_size, i;
   unsigned HOST_WIDE_INT nelts;
   enum tree_code code, conv_code;
@@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   elem_type = TREE_TYPE (type);
   elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
 
-  vec_perm_builder sel (nelts, nelts, 1);
-  orig = NULL;
+  vec_perm_builder sel (nelts, 2, nelts);
+  orig1 = NULL;
+  orig2 = NULL;
   conv_code = ERROR_MARK;
   maybe_ident = true;
   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
@@ -2063,10 +2064,26 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 	return false;
   op1 = gimple_assign_rhs1 (def_stmt);
   ref = TREE_OPERAND (op1, 0);
-  if (orig)
+  if (orig1)
 	{
-	  if (ref != orig)
-	return false;
+	  if (ref == orig1 || orig2)
+	{
+	  if (ref != orig1 && ref != orig2)
+	return false;
+	}
+	  else
+	{
+	  if (TREE_CODE (ref) != SSA_NAME)
+		return false;
+	  if (! VECTOR_TYPE_P (TREE_TYPE (ref))
+		  || ! useless_type_conversion_p (TREE_TYPE (op1),
+		  TREE_TYPE (TREE_TYPE (ref
+		return false;
+	  if (TREE_TYPE (orig1) != TREE_TYPE (ref))
+		return false;
+	  orig2 = ref;
+	  maybe_ident = false;
+	  }
 	}
   else
 	{
@@ -2076,12 +2093,14 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 	  || ! useless_type_conversion_p (TREE_TYPE (op1),
 	  TREE_TYPE (TREE_TYPE (ref
 	return false;
-	  orig = ref;
+	  orig1 = ref;
 	}
   unsigned int elt;
   if (maybe_ne (bit_field_size (op1), elem_size)
 	  || !constant_multiple_p (bit_field_offset (op1), elem_size, ))
 	return false;
+  if (orig2 && ref == orig2)
+	elt += nelts;
   if (elt != i)
 	maybe_ident = false;
   sel.quick_push (elt);
@@ -2089,14 +2108,17 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   if (i < nelts)
 return false;
 
-  if (! VECTOR_TYPE_P (TREE_TYPE (orig))
+  if (! VECTOR_TYPE_P (TREE_TYPE (orig1))
   || maybe_ne (TYPE_VECTOR_SUBPARTS (type),
-		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig
+		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig1
 return false;
 
+  if (!orig2)
+orig2 = orig1;
+
   tree tem;
   if (conv_code != ERROR_MARK
-  && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig),
+  && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig1),
 	   , _code)
 	  || conv_code == CALL_EXPR))
 return false;
@@ -2104,16 +2126,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   if (maybe_ident)
 {
   if 

Re: [Patch] Use two source permute for vector initialization (PR 85692)

2018-05-08 Thread Richard Biener
On Tue, May 8, 2018 at 12:37 PM, Allan Sandfeld Jensen
 wrote:
> I have tried to fix PR85692 that I opened.

Please add a testcase as well.  It also helps if you shortly tell what
the patch does
in your mail.

Thanks,
Richard.

> 2018-05-08  Allan Sandfeld Jense 
>
> PR tree-optimization/85692
> * tree-ssa-forwprop.c (simplify_vector_constructor): Detect
>   two source permute operations as well.


[Patch] Use two source permute for vector initialization (PR 85692)

2018-05-08 Thread Allan Sandfeld Jensen
I have tried to fix PR85692 that I opened.

2018-05-08  Allan Sandfeld Jense 

PR tree-optimization/85692
* tree-ssa-forwprop.c (simplify_vector_constructor): Detect
  two source permute operations as well.
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 58ec6b47a5b..fbee8064160 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -2004,7 +2004,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 {
   gimple *stmt = gsi_stmt (*gsi);
   gimple *def_stmt;
-  tree op, op2, orig, type, elem_type;
+  tree op, op2, orig1, orig2, type, elem_type;
   unsigned elem_size, i;
   unsigned HOST_WIDE_INT nelts;
   enum tree_code code, conv_code;
@@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   elem_type = TREE_TYPE (type);
   elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
 
-  vec_perm_builder sel (nelts, nelts, 1);
-  orig = NULL;
+  vec_perm_builder sel (nelts, 2, nelts);
+  orig1 = NULL;
+  orig2 = NULL;
   conv_code = ERROR_MARK;
   maybe_ident = true;
   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
@@ -2063,10 +2064,26 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 	return false;
   op1 = gimple_assign_rhs1 (def_stmt);
   ref = TREE_OPERAND (op1, 0);
-  if (orig)
+  if (orig1)
 	{
-	  if (ref != orig)
-	return false;
+	  if (ref == orig1 || orig2)
+	{
+	  if (ref != orig1 && ref != orig2)
+	return false;
+	}
+	  else
+	{
+	  if (TREE_CODE (ref) != SSA_NAME)
+		return false;
+	  if (! VECTOR_TYPE_P (TREE_TYPE (ref))
+		  || ! useless_type_conversion_p (TREE_TYPE (op1),
+		  TREE_TYPE (TREE_TYPE (ref
+		return false;
+	  if (TREE_TYPE (orig1) != TREE_TYPE (ref))
+		return false;
+	  orig2 = ref;
+	  maybe_ident = false;
+	  }
 	}
   else
 	{
@@ -2076,12 +2093,14 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 	  || ! useless_type_conversion_p (TREE_TYPE (op1),
 	  TREE_TYPE (TREE_TYPE (ref
 	return false;
-	  orig = ref;
+	  orig1 = ref;
 	}
   unsigned int elt;
   if (maybe_ne (bit_field_size (op1), elem_size)
 	  || !constant_multiple_p (bit_field_offset (op1), elem_size, ))
 	return false;
+  if (orig2 && ref == orig2)
+	elt += nelts;
   if (elt != i)
 	maybe_ident = false;
   sel.quick_push (elt);
@@ -2089,14 +2108,17 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   if (i < nelts)
 return false;
 
-  if (! VECTOR_TYPE_P (TREE_TYPE (orig))
+  if (! VECTOR_TYPE_P (TREE_TYPE (orig1))
   || maybe_ne (TYPE_VECTOR_SUBPARTS (type),
-		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig
+		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig1
 return false;
 
+  if (!orig2)
+orig2 = orig1;
+
   tree tem;
   if (conv_code != ERROR_MARK
-  && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig),
+  && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig1),
 	   , _code)
 	  || conv_code == CALL_EXPR))
 return false;
@@ -2104,16 +2126,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   if (maybe_ident)
 {
   if (conv_code == ERROR_MARK)
-	gimple_assign_set_rhs_from_tree (gsi, orig);
+	gimple_assign_set_rhs_from_tree (gsi, orig1);
   else
-	gimple_assign_set_rhs_with_ops (gsi, conv_code, orig,
+	gimple_assign_set_rhs_with_ops (gsi, conv_code, orig1,
 	NULL_TREE, NULL_TREE);
 }
   else
 {
   tree mask_type;
 
-  vec_perm_indices indices (sel, 1, nelts);
+  vec_perm_indices indices (sel, 2, nelts);
   if (!can_vec_perm_const_p (TYPE_MODE (type), indices))
 	return false;
   mask_type
@@ -2125,15 +2147,14 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 	return false;
   op2 = vec_perm_indices_to_tree (mask_type, indices);
   if (conv_code == ERROR_MARK)
-	gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig, orig, op2);
+	gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig1, orig2, op2);
   else
 	{
 	  gimple *perm
-	= gimple_build_assign (make_ssa_name (TREE_TYPE (orig)),
-   VEC_PERM_EXPR, orig, orig, op2);
-	  orig = gimple_assign_lhs (perm);
+	= gimple_build_assign (make_ssa_name (TREE_TYPE (orig1)),
+   VEC_PERM_EXPR, orig1, orig2, op2);
 	  gsi_insert_before (gsi, perm, GSI_SAME_STMT);
-	  gimple_assign_set_rhs_with_ops (gsi, conv_code, orig,
+	  gimple_assign_set_rhs_with_ops (gsi, conv_code, gimple_assign_lhs (perm),
 	  NULL_TREE, NULL_TREE);
 	}
 }