Re: Implement SLP of internal functions

2018-05-25 Thread Richard Biener
On Fri, May 25, 2018 at 12:31 PM Richard Sandiford <
richard.sandif...@linaro.org> wrote:

> Richard Biener  writes:
> >> Index: gcc/tree-vect-slp.c
> >> ===
> >> --- gcc/tree-vect-slp.c 2018-05-16 11:02:46.262494712 +0100
> >> +++ gcc/tree-vect-slp.c 2018-05-16 11:12:11.873116180 +0100
> >> @@ -564,6 +564,41 @@ vect_get_and_check_slp_defs (vec_info *v
> >> return 0;
> >>   }
> >
> >> +/* Return true if call statements CALL1 and CALL2 are similar enough
> >> +   to be combined into the same SLP group.  */
> >> +
> >> +static bool
> >> +compatible_calls_p (gcall *call1, gcall *call2)
> >> +{
> >> +  unsigned int nargs = gimple_call_num_args (call1);
> >> +  if (nargs != gimple_call_num_args (call2))
> >> +return false;
> >> +
> >> +  if (gimple_call_combined_fn (call1) != gimple_call_combined_fn
(call2))
> >> +return false;
> >> +
> >> +  if (gimple_call_internal_p (call1))
> >> +{
> >> +  if (TREE_TYPE (gimple_call_lhs (call1))
> >> + != TREE_TYPE (gimple_call_lhs (call2)))
> >> +   return false;
> >> +  for (unsigned int i = 0; i < nargs; ++i)
> >> +   if (TREE_TYPE (gimple_call_arg (call1, i))
> >> +   != TREE_TYPE (gimple_call_arg (call2, i)))
> >
> > Please use types_compatible_p in these two type comparisons.

> OK.

> > Can you please add a generic vect_call_sqrtf to the main vectorizer
> > testsuite?  In fact I already see
> > gcc.dg/vect/fast-math-bb-slp-call-1.c.  Does that mean SQRT does never
> > appear as internal function before vectorization?

> Yeah, sqrt vectorisation is scalar built-in -> vector internal function.

> But this patch adds a generic type keyed off vect_double_cond_arith.
> Would that be OK instead?

Yes, that works for me.

Thanks,
Richard.

> Tested as before.

> Thanks,
> Richard


> 2018-05-25  Richard Sandiford  

> gcc/
>  * internal-fn.h (vectorizable_internal_fn_p): New function.
>  * tree-vect-slp.c (compatible_calls_p): Likewise.
>  (vect_build_slp_tree_1): Remove nops argument.  Handle calls
>  to internal functions.
>  (vect_build_slp_tree_2): Update call to vect_build_slp_tree_1.

> gcc/testsuite/
>  * gcc.dg/vect/vect-cond-arith-6.c: New test.
>  * gcc.target/aarch64/sve/cond_arith_4.c: Likewise.
>  * gcc.target/aarch64/sve/cond_arith_4_run.c: Likewise.
>  * gcc.target/aarch64/sve/cond_arith_5.c: Likewise.
>  * gcc.target/aarch64/sve/cond_arith_5_run.c: Likewise.
>  * gcc.target/aarch64/sve/slp_14.c: Likewise.
>  * gcc.target/aarch64/sve/slp_14_run.c: Likewise.

> Index: gcc/internal-fn.h
> ===
> --- gcc/internal-fn.h   2018-05-25 11:28:05.953287025 +0100
> +++ gcc/internal-fn.h   2018-05-25 11:28:06.193277781 +0100
> @@ -160,6 +160,17 @@ direct_internal_fn_p (internal_fn fn)
> return direct_internal_fn_array[fn].type0 >= -1;
>   }

> +/* Return true if FN is a direct internal function that can be
vectorized by
> +   converting the return type and all argument types to vectors of the
same
> +   number of elements.  E.g. we can vectorize an IFN_SQRT on floats as an
> +   IFN_SQRT on vectors of N floats.  */
> +
> +inline bool
> +vectorizable_internal_fn_p (internal_fn fn)
> +{
> +  return direct_internal_fn_array[fn].vectorizable;
> +}
> +
>   /* Return optab information about internal function FN.  Only meaningful
>  if direct_internal_fn_p (FN).  */

> Index: gcc/tree-vect-slp.c
> ===
> --- gcc/tree-vect-slp.c 2018-05-25 11:28:05.953287025 +0100
> +++ gcc/tree-vect-slp.c 2018-05-25 11:28:06.195277704 +0100
> @@ -565,6 +565,41 @@ vect_get_and_check_slp_defs (vec_info *v
> return 0;
>   }

> +/* Return true if call statements CALL1 and CALL2 are similar enough
> +   to be combined into the same SLP group.  */
> +
> +static bool
> +compatible_calls_p (gcall *call1, gcall *call2)
> +{
> +  unsigned int nargs = gimple_call_num_args (call1);
> +  if (nargs != gimple_call_num_args (call2))
> +return false;
> +
> +  if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
> +return false;
> +
> +  if (gimple_call_internal_p (call1))
> +{
> +  if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
> +  TREE_TYPE (gimple_call_lhs (call2
> +   return false;
> +  for (unsigned int i = 0; i < nargs; ++i)
> +   if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
> +TREE_TYPE (gimple_call_arg (call2, i
> + return false;
> +}
> +  else
> +{
> +  if (!operand_equal_p (gimple_call_fn (call1),
> +   gimple_call_fn (call2), 0))
> +   return false;
> +
> +  if (gimple_call_fntype (call1) != 

Re: Implement SLP of internal functions

2018-05-25 Thread Richard Sandiford
Richard Biener  writes:
>> Index: gcc/tree-vect-slp.c
>> ===
>> --- gcc/tree-vect-slp.c 2018-05-16 11:02:46.262494712 +0100
>> +++ gcc/tree-vect-slp.c 2018-05-16 11:12:11.873116180 +0100
>> @@ -564,6 +564,41 @@ vect_get_and_check_slp_defs (vec_info *v
>> return 0;
>>   }
>
>> +/* Return true if call statements CALL1 and CALL2 are similar enough
>> +   to be combined into the same SLP group.  */
>> +
>> +static bool
>> +compatible_calls_p (gcall *call1, gcall *call2)
>> +{
>> +  unsigned int nargs = gimple_call_num_args (call1);
>> +  if (nargs != gimple_call_num_args (call2))
>> +return false;
>> +
>> +  if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
>> +return false;
>> +
>> +  if (gimple_call_internal_p (call1))
>> +{
>> +  if (TREE_TYPE (gimple_call_lhs (call1))
>> + != TREE_TYPE (gimple_call_lhs (call2)))
>> +   return false;
>> +  for (unsigned int i = 0; i < nargs; ++i)
>> +   if (TREE_TYPE (gimple_call_arg (call1, i))
>> +   != TREE_TYPE (gimple_call_arg (call2, i)))
>
> Please use types_compatible_p in these two type comparisons.

OK.

> Can you please add a generic vect_call_sqrtf to the main vectorizer
> testsuite?  In fact I already see
> gcc.dg/vect/fast-math-bb-slp-call-1.c.  Does that mean SQRT does never
> appear as internal function before vectorization?

Yeah, sqrt vectorisation is scalar built-in -> vector internal function.

But this patch adds a generic type keyed off vect_double_cond_arith.
Would that be OK instead?

Tested as before.

Thanks,
Richard


2018-05-25  Richard Sandiford  

gcc/
* internal-fn.h (vectorizable_internal_fn_p): New function.
* tree-vect-slp.c (compatible_calls_p): Likewise.
(vect_build_slp_tree_1): Remove nops argument.  Handle calls
to internal functions.
(vect_build_slp_tree_2): Update call to vect_build_slp_tree_1.

gcc/testsuite/
* gcc.dg/vect/vect-cond-arith-6.c: New test.
* gcc.target/aarch64/sve/cond_arith_4.c: Likewise.
* gcc.target/aarch64/sve/cond_arith_4_run.c: Likewise.
* gcc.target/aarch64/sve/cond_arith_5.c: Likewise.
* gcc.target/aarch64/sve/cond_arith_5_run.c: Likewise.
* gcc.target/aarch64/sve/slp_14.c: Likewise.
* gcc.target/aarch64/sve/slp_14_run.c: Likewise.

Index: gcc/internal-fn.h
===
--- gcc/internal-fn.h   2018-05-25 11:28:05.953287025 +0100
+++ gcc/internal-fn.h   2018-05-25 11:28:06.193277781 +0100
@@ -160,6 +160,17 @@ direct_internal_fn_p (internal_fn fn)
   return direct_internal_fn_array[fn].type0 >= -1;
 }
 
+/* Return true if FN is a direct internal function that can be vectorized by
+   converting the return type and all argument types to vectors of the same
+   number of elements.  E.g. we can vectorize an IFN_SQRT on floats as an
+   IFN_SQRT on vectors of N floats.  */
+
+inline bool
+vectorizable_internal_fn_p (internal_fn fn)
+{
+  return direct_internal_fn_array[fn].vectorizable;
+}
+
 /* Return optab information about internal function FN.  Only meaningful
if direct_internal_fn_p (FN).  */
 
Index: gcc/tree-vect-slp.c
===
--- gcc/tree-vect-slp.c 2018-05-25 11:28:05.953287025 +0100
+++ gcc/tree-vect-slp.c 2018-05-25 11:28:06.195277704 +0100
@@ -565,6 +565,41 @@ vect_get_and_check_slp_defs (vec_info *v
   return 0;
 }
 
+/* Return true if call statements CALL1 and CALL2 are similar enough
+   to be combined into the same SLP group.  */
+
+static bool
+compatible_calls_p (gcall *call1, gcall *call2)
+{
+  unsigned int nargs = gimple_call_num_args (call1);
+  if (nargs != gimple_call_num_args (call2))
+return false;
+
+  if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
+return false;
+
+  if (gimple_call_internal_p (call1))
+{
+  if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
+  TREE_TYPE (gimple_call_lhs (call2
+   return false;
+  for (unsigned int i = 0; i < nargs; ++i)
+   if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
+TREE_TYPE (gimple_call_arg (call2, i
+ return false;
+}
+  else
+{
+  if (!operand_equal_p (gimple_call_fn (call1),
+   gimple_call_fn (call2), 0))
+   return false;
+
+  if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
+   return false;
+}
+  return true;
+}
+
 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
caller's attempt to find the vector type in STMT with the narrowest
element type.  Return true if VECTYPE is nonnull and if it is valid
@@ -653,8 +688,8 @@ vect_two_operations_perm_ok_p (vec stmts, unsigned int group_size,
-

Re: Implement SLP of internal functions

2018-05-17 Thread Richard Biener
On Wed, May 16, 2018 at 12:18 PM Richard Sandiford <
richard.sandif...@linaro.org> wrote:

> SLP of calls was previously restricted to built-in functions.
> This patch extends it to internal functions.

> Tested on aarch64-linux-gnu (with and without SVE), aarch64_be-elf
> and x86_64-linux-gnu.  OK to install?

> Richard


> 2018-05-16  Richard Sandiford  

> gcc/
>  * internal-fn.h (vectorizable_internal_fn_p): New function.
>  * tree-vect-slp.c (compatible_calls_p): Likewise.
>  (vect_build_slp_tree_1): Remove nops argument.  Handle calls
>  to internal functions.
>  (vect_build_slp_tree_2): Update call to vect_build_slp_tree_1.

> gcc/testsuite/
>  * gcc.target/aarch64/sve/cond_arith_4.c: New test.
>  * gcc.target/aarch64/sve/cond_arith_4_run.c: Likewise.
>  * gcc.target/aarch64/sve/cond_arith_5.c: Likewise.
>  * gcc.target/aarch64/sve/cond_arith_5_run.c: Likewise.
>  * gcc.target/aarch64/sve/slp_14.c: Likewise.
>  * gcc.target/aarch64/sve/slp_14_run.c: Likewise.

> Index: gcc/internal-fn.h
> ===
> --- gcc/internal-fn.h   2018-05-16 11:06:14.513574219 +0100
> +++ gcc/internal-fn.h   2018-05-16 11:12:11.872116220 +0100
> @@ -158,6 +158,17 @@ direct_internal_fn_p (internal_fn fn)
> return direct_internal_fn_array[fn].type0 >= -1;
>   }

> +/* Return true if FN is a direct internal function that can be
vectorized by
> +   converting the return type and all argument types to vectors of the
same
> +   number of elements.  E.g. we can vectorize an IFN_SQRT on floats as an
> +   IFN_SQRT on vectors of N floats.  */
> +
> +inline bool
> +vectorizable_internal_fn_p (internal_fn fn)
> +{
> +  return direct_internal_fn_array[fn].vectorizable;
> +}
> +
>   /* Return optab information about internal function FN.  Only meaningful
>  if direct_internal_fn_p (FN).  */

> Index: gcc/tree-vect-slp.c
> ===
> --- gcc/tree-vect-slp.c 2018-05-16 11:02:46.262494712 +0100
> +++ gcc/tree-vect-slp.c 2018-05-16 11:12:11.873116180 +0100
> @@ -564,6 +564,41 @@ vect_get_and_check_slp_defs (vec_info *v
> return 0;
>   }

> +/* Return true if call statements CALL1 and CALL2 are similar enough
> +   to be combined into the same SLP group.  */
> +
> +static bool
> +compatible_calls_p (gcall *call1, gcall *call2)
> +{
> +  unsigned int nargs = gimple_call_num_args (call1);
> +  if (nargs != gimple_call_num_args (call2))
> +return false;
> +
> +  if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
> +return false;
> +
> +  if (gimple_call_internal_p (call1))
> +{
> +  if (TREE_TYPE (gimple_call_lhs (call1))
> + != TREE_TYPE (gimple_call_lhs (call2)))
> +   return false;
> +  for (unsigned int i = 0; i < nargs; ++i)
> +   if (TREE_TYPE (gimple_call_arg (call1, i))
> +   != TREE_TYPE (gimple_call_arg (call2, i)))

Please use types_compatible_p in these two type comparisons.

Can you please add a generic vect_call_sqrtf to the main
vectorizer testsuite?  In fact I already see
gcc.dg/vect/fast-math-bb-slp-call-1.c.
Does that mean SQRT does never appear as internal function before
vectorization?

OK with that changes.
Richard.

> + return false;
> +}
> +  else
> +{
> +  if (!operand_equal_p (gimple_call_fn (call1),
> +   gimple_call_fn (call2), 0))
> +   return false;
> +
> +  if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
> +   return false;
> +}
> +  return true;
> +}
> +
>   /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
>  caller's attempt to find the vector type in STMT with the narrowest
>  element type.  Return true if VECTYPE is nonnull and if it is valid
> @@ -625,8 +660,8 @@ vect_record_max_nunits (vec_info *vinfo,
>   static bool
>   vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
> vec stmts, unsigned int group_size,
> -  unsigned nops, poly_uint64 *max_nunits,
> -  bool *matches, bool *two_operators)
> +  poly_uint64 *max_nunits, bool *matches,
> +  bool *two_operators)
>   {
> unsigned int i;
> gimple *first_stmt = stmts[0], *stmt = stmts[0];
> @@ -698,7 +733,9 @@ vect_build_slp_tree_1 (vec_info *vinfo,
> if (gcall *call_stmt = dyn_cast  (stmt))
>  {
>rhs_code = CALL_EXPR;
> - if (gimple_call_internal_p (call_stmt)
> + if ((gimple_call_internal_p (call_stmt)
> +  && (!vectorizable_internal_fn_p
> +  (gimple_call_internal_fn (call_stmt
>|| gimple_call_tail_p (call_stmt)
>|| gimple_call_noreturn_p (call_stmt)
>|| !gimple_call_nothrow_p (call_stmt)
> @@ -833,11 

Implement SLP of internal functions

2018-05-16 Thread Richard Sandiford
SLP of calls was previously restricted to built-in functions.
This patch extends it to internal functions.

Tested on aarch64-linux-gnu (with and without SVE), aarch64_be-elf
and x86_64-linux-gnu.  OK to install?

Richard


2018-05-16  Richard Sandiford  

gcc/
* internal-fn.h (vectorizable_internal_fn_p): New function.
* tree-vect-slp.c (compatible_calls_p): Likewise.
(vect_build_slp_tree_1): Remove nops argument.  Handle calls
to internal functions.
(vect_build_slp_tree_2): Update call to vect_build_slp_tree_1.

gcc/testsuite/
* gcc.target/aarch64/sve/cond_arith_4.c: New test.
* gcc.target/aarch64/sve/cond_arith_4_run.c: Likewise.
* gcc.target/aarch64/sve/cond_arith_5.c: Likewise.
* gcc.target/aarch64/sve/cond_arith_5_run.c: Likewise.
* gcc.target/aarch64/sve/slp_14.c: Likewise.
* gcc.target/aarch64/sve/slp_14_run.c: Likewise.

Index: gcc/internal-fn.h
===
--- gcc/internal-fn.h   2018-05-16 11:06:14.513574219 +0100
+++ gcc/internal-fn.h   2018-05-16 11:12:11.872116220 +0100
@@ -158,6 +158,17 @@ direct_internal_fn_p (internal_fn fn)
   return direct_internal_fn_array[fn].type0 >= -1;
 }
 
+/* Return true if FN is a direct internal function that can be vectorized by
+   converting the return type and all argument types to vectors of the same
+   number of elements.  E.g. we can vectorize an IFN_SQRT on floats as an
+   IFN_SQRT on vectors of N floats.  */
+
+inline bool
+vectorizable_internal_fn_p (internal_fn fn)
+{
+  return direct_internal_fn_array[fn].vectorizable;
+}
+
 /* Return optab information about internal function FN.  Only meaningful
if direct_internal_fn_p (FN).  */
 
Index: gcc/tree-vect-slp.c
===
--- gcc/tree-vect-slp.c 2018-05-16 11:02:46.262494712 +0100
+++ gcc/tree-vect-slp.c 2018-05-16 11:12:11.873116180 +0100
@@ -564,6 +564,41 @@ vect_get_and_check_slp_defs (vec_info *v
   return 0;
 }
 
+/* Return true if call statements CALL1 and CALL2 are similar enough
+   to be combined into the same SLP group.  */
+
+static bool
+compatible_calls_p (gcall *call1, gcall *call2)
+{
+  unsigned int nargs = gimple_call_num_args (call1);
+  if (nargs != gimple_call_num_args (call2))
+return false;
+
+  if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
+return false;
+
+  if (gimple_call_internal_p (call1))
+{
+  if (TREE_TYPE (gimple_call_lhs (call1))
+ != TREE_TYPE (gimple_call_lhs (call2)))
+   return false;
+  for (unsigned int i = 0; i < nargs; ++i)
+   if (TREE_TYPE (gimple_call_arg (call1, i))
+   != TREE_TYPE (gimple_call_arg (call2, i)))
+ return false;
+}
+  else
+{
+  if (!operand_equal_p (gimple_call_fn (call1),
+   gimple_call_fn (call2), 0))
+   return false;
+
+  if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
+   return false;
+}
+  return true;
+}
+
 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
caller's attempt to find the vector type in STMT with the narrowest
element type.  Return true if VECTYPE is nonnull and if it is valid
@@ -625,8 +660,8 @@ vect_record_max_nunits (vec_info *vinfo,
 static bool
 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
   vec stmts, unsigned int group_size,
-  unsigned nops, poly_uint64 *max_nunits,
-  bool *matches, bool *two_operators)
+  poly_uint64 *max_nunits, bool *matches,
+  bool *two_operators)
 {
   unsigned int i;
   gimple *first_stmt = stmts[0], *stmt = stmts[0];
@@ -698,7 +733,9 @@ vect_build_slp_tree_1 (vec_info *vinfo,
   if (gcall *call_stmt = dyn_cast  (stmt))
{
  rhs_code = CALL_EXPR;
- if (gimple_call_internal_p (call_stmt)
+ if ((gimple_call_internal_p (call_stmt)
+  && (!vectorizable_internal_fn_p
+  (gimple_call_internal_fn (call_stmt
  || gimple_call_tail_p (call_stmt)
  || gimple_call_noreturn_p (call_stmt)
  || !gimple_call_nothrow_p (call_stmt)
@@ -833,11 +870,8 @@ vect_build_slp_tree_1 (vec_info *vinfo,
  if (rhs_code == CALL_EXPR)
{
  gimple *first_stmt = stmts[0];
- if (gimple_call_num_args (stmt) != nops
- || !operand_equal_p (gimple_call_fn (first_stmt),
-  gimple_call_fn (stmt), 0)
- || gimple_call_fntype (first_stmt)
-!= gimple_call_fntype (stmt))
+ if (!compatible_calls_p (as_a  (first_stmt),
+  as_a  (stmt)))
{
  if (dump_enabled_p ())
{
@@ -1166,8 +1200,7 @@