Re: [PATCH 2/6] Make builtin_vectorized_function take a combined_fn

2015-11-16 Thread Richard Biener
On Fri, Nov 13, 2015 at 1:27 PM, Richard Sandiford
 wrote:
> Richard Biener  writes:
>> On Mon, Nov 9, 2015 at 5:25 PM, Richard Sandiford
>>  wrote:
>>> This patch replaces the fndecl argument to builtin_vectorized_function
>>> with a combined_fn and gets the vectoriser to call it for internal
>>> functions too.  The patch also moves vectorisation of machine-specific
>>> built-ins to a new hook, builtin_md_vectorized_function.
>>>
>>> I've attached a -b version too since that's easier to read.
>>
>> @@ -42095,8 +42018,7 @@ ix86_builtin_vectorized_function (tree fndecl,
>> tree type_out,
>>
>>/* Dispatch to a handler for a vectorization library.  */
>>if (ix86_veclib_handler)
>> -return ix86_veclib_handler ((enum built_in_function) fn, type_out,
>> -   type_in);
>> +return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
>>
>>return NULL_TREE;
>>  }
>>
>> fn is already a combined_fn?  Why does the builtin_vectorized_function
>> not take one but an unsigned int?
>
> Not everything that includes the target headers includes tree.h.
> This is like builtin_conversion taking a tree_code as an unsigned int.
>
>> @@ -42176,11 +42077,12 @@ ix86_veclibabi_svml (enum built_in_function
>> fn, tree type_out, tree type_in)
>>return NULL_TREE;
>>  }
>>
>> -  bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
>> +  tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
>> +  bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
>>
>> -  if (fn == BUILT_IN_LOGF)
>> +  if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
>>
>> with 'fn' now a combined_fn how is this going to work with IFNs?
>
> By this point we already know that the function has one of the
> supported modes.  A previous patch extended matchfn_built_in
> to handle combined_fns.  E.g.
>
>   mathfn_built_in (float_type_node, IFN_SQRT)
>
> returns BUILT_IN_SQRTF.

Ah, I missed that I suppose.

>> +/* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION.  */
>> +
>> +static tree
>> +rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
>> +  tree type_in)
>> +{
>>
>> any reason you are using a fndecl for this hook instead of the function code?
>
> It just seems more helpful to pass the fndecl when we have it.
> It's cheap to go from the decl to the code but it's less cheap
> to go the other way.

Ok, but for the other hook you changed it ...

>> @@ -1639,20 +1639,20 @@ vect_finish_stmt_generation (gimple *stmt,
>> gimple *vec_stmt,
>>  tree
>>  vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
>>  {
>> -  tree fndecl = gimple_call_fndecl (call);
>> -
>> -  /* We only handle functions that do not read or clobber memory -- i.e.
>> - const or novops ones.  */
>> -  if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
>> +  /* We only handle functions that do not read or clobber memory.  */
>> +  if (gimple_vuse (call))
>>  return NULL_TREE;
>>
>> -  if (!fndecl
>> -  || TREE_CODE (fndecl) != FUNCTION_DECL
>> -  || !DECL_BUILT_IN (fndecl))
>> -return NULL_TREE;
>> +  combined_fn fn = gimple_call_combined_fn (call);
>> +  if (fn != CFN_LAST)
>> +return targetm.vectorize.builtin_vectorized_function
>> +  (fn, vectype_out, vectype_in);
>>
>> -  return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
>> -   vectype_in);
>> +  if (gimple_call_builtin_p (call, BUILT_IN_MD))
>> +return targetm.vectorize.builtin_md_vectorized_function
>> +  (gimple_call_fndecl (call), vectype_out, vectype_in);
>> +
>> +  return NULL_TREE;
>>
>> Looking at this and the issues above wouldn't it be easier to simply
>> pass the call stmt to the hook (which then can again handle
>> both normal and target builtins)?  And it has context available
>> (actual arguments and number of arguments for IFN calls).
>
> I'd rather not do that, since it means we have to construct a gcall *
> in cases where we're not asking about a straight-forward vectorisation
> of a preexisting scalar statement.
>
> The number of arguments is an inherent property of the function,
> it doesn't require access to a particular call.
>  The hook tells
> us what vector types we're using (and by extension what types
> the scalar op would have).

... so merging the hooks by passing both the combined fn code
and the decl would be possible?  The decl can be NULL if
the fn code is not CFN_LAST and if it is CFN_LAST then the decl
may be a target builtin?

Maybe I'm just too worried about that clean separation...  so decide
for yourselves here.

Thus, ok.

Thanks,
Richard.

> Thanks,
> Richard
>


Re: [PATCH 2/6] Make builtin_vectorized_function take a combined_fn

2015-11-13 Thread Richard Sandiford
Richard Biener  writes:
> On Mon, Nov 9, 2015 at 5:25 PM, Richard Sandiford
>  wrote:
>> This patch replaces the fndecl argument to builtin_vectorized_function
>> with a combined_fn and gets the vectoriser to call it for internal
>> functions too.  The patch also moves vectorisation of machine-specific
>> built-ins to a new hook, builtin_md_vectorized_function.
>>
>> I've attached a -b version too since that's easier to read.
>
> @@ -42095,8 +42018,7 @@ ix86_builtin_vectorized_function (tree fndecl,
> tree type_out,
>
>/* Dispatch to a handler for a vectorization library.  */
>if (ix86_veclib_handler)
> -return ix86_veclib_handler ((enum built_in_function) fn, type_out,
> -   type_in);
> +return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
>
>return NULL_TREE;
>  }
>
> fn is already a combined_fn?  Why does the builtin_vectorized_function
> not take one but an unsigned int?

Not everything that includes the target headers includes tree.h.
This is like builtin_conversion taking a tree_code as an unsigned int.

> @@ -42176,11 +42077,12 @@ ix86_veclibabi_svml (enum built_in_function
> fn, tree type_out, tree type_in)
>return NULL_TREE;
>  }
>
> -  bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
> +  tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
> +  bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
>
> -  if (fn == BUILT_IN_LOGF)
> +  if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
>
> with 'fn' now a combined_fn how is this going to work with IFNs?

By this point we already know that the function has one of the
supported modes.  A previous patch extended matchfn_built_in
to handle combined_fns.  E.g.

  mathfn_built_in (float_type_node, IFN_SQRT)

returns BUILT_IN_SQRTF.

> +/* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION.  */
> +
> +static tree
> +rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
> +  tree type_in)
> +{
>
> any reason you are using a fndecl for this hook instead of the function code?

It just seems more helpful to pass the fndecl when we have it.
It's cheap to go from the decl to the code but it's less cheap
to go the other way.

> @@ -1639,20 +1639,20 @@ vect_finish_stmt_generation (gimple *stmt,
> gimple *vec_stmt,
>  tree
>  vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
>  {
> -  tree fndecl = gimple_call_fndecl (call);
> -
> -  /* We only handle functions that do not read or clobber memory -- i.e.
> - const or novops ones.  */
> -  if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
> +  /* We only handle functions that do not read or clobber memory.  */
> +  if (gimple_vuse (call))
>  return NULL_TREE;
>
> -  if (!fndecl
> -  || TREE_CODE (fndecl) != FUNCTION_DECL
> -  || !DECL_BUILT_IN (fndecl))
> -return NULL_TREE;
> +  combined_fn fn = gimple_call_combined_fn (call);
> +  if (fn != CFN_LAST)
> +return targetm.vectorize.builtin_vectorized_function
> +  (fn, vectype_out, vectype_in);
>
> -  return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
> -   vectype_in);
> +  if (gimple_call_builtin_p (call, BUILT_IN_MD))
> +return targetm.vectorize.builtin_md_vectorized_function
> +  (gimple_call_fndecl (call), vectype_out, vectype_in);
> +
> +  return NULL_TREE;
>
> Looking at this and the issues above wouldn't it be easier to simply
> pass the call stmt to the hook (which then can again handle
> both normal and target builtins)?  And it has context available
> (actual arguments and number of arguments for IFN calls).

I'd rather not do that, since it means we have to construct a gcall *
in cases where we're not asking about a straight-forward vectorisation
of a preexisting scalar statement.

The number of arguments is an inherent property of the function,
it doesn't require access to a particular call.  The hook tells
us what vector types we're using (and by extension what types
the scalar op would have).

Thanks,
Richard



Re: [PATCH 2/6] Make builtin_vectorized_function take a combined_fn

2015-11-10 Thread Richard Biener
On Mon, Nov 9, 2015 at 5:25 PM, Richard Sandiford
 wrote:
> This patch replaces the fndecl argument to builtin_vectorized_function
> with a combined_fn and gets the vectoriser to call it for internal
> functions too.  The patch also moves vectorisation of machine-specific
> built-ins to a new hook, builtin_md_vectorized_function.
>
> I've attached a -b version too since that's easier to read.

@@ -42095,8 +42018,7 @@ ix86_builtin_vectorized_function (tree fndecl,
tree type_out,

   /* Dispatch to a handler for a vectorization library.  */
   if (ix86_veclib_handler)
-return ix86_veclib_handler ((enum built_in_function) fn, type_out,
-   type_in);
+return ix86_veclib_handler (combined_fn (fn), type_out, type_in);

   return NULL_TREE;
 }

fn is already a combined_fn?  Why does the builtin_vectorized_function
not take one but an unsigned int?

@@ -42176,11 +42077,12 @@ ix86_veclibabi_svml (enum built_in_function
fn, tree type_out, tree type_in)
   return NULL_TREE;
 }

-  bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
+  tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
+  bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));

-  if (fn == BUILT_IN_LOGF)
+  if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)

with 'fn' now a combined_fn how is this going to work with IFNs?

@@ -42194,9 +42096,7 @@ ix86_veclibabi_svml (enum built_in_function
fn, tree type_out, tree type_in)
   name[4] &= ~0x20;

   arity = 0;
-  for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
-   args;
-   args = TREE_CHAIN (args))
+  for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
 arity++;


or this?

Did you try this out?  We have only two basic testcases for all this
code using sin()
which may not end up as IFN even with -ffast-math(?).

+/* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION.  */
+
+static tree
+rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
+  tree type_in)
+{

any reason you are using a fndecl for this hook instead of the function code?

@@ -1639,20 +1639,20 @@ vect_finish_stmt_generation (gimple *stmt,
gimple *vec_stmt,
 tree
 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
 {
-  tree fndecl = gimple_call_fndecl (call);
-
-  /* We only handle functions that do not read or clobber memory -- i.e.
- const or novops ones.  */
-  if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
+  /* We only handle functions that do not read or clobber memory.  */
+  if (gimple_vuse (call))
 return NULL_TREE;

-  if (!fndecl
-  || TREE_CODE (fndecl) != FUNCTION_DECL
-  || !DECL_BUILT_IN (fndecl))
-return NULL_TREE;
+  combined_fn fn = gimple_call_combined_fn (call);
+  if (fn != CFN_LAST)
+return targetm.vectorize.builtin_vectorized_function
+  (fn, vectype_out, vectype_in);

-  return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
-   vectype_in);
+  if (gimple_call_builtin_p (call, BUILT_IN_MD))
+return targetm.vectorize.builtin_md_vectorized_function
+  (gimple_call_fndecl (call), vectype_out, vectype_in);
+
+  return NULL_TREE;

Looking at this and the issues above wouldn't it be easier to simply
pass the call stmt to the hook (which then can again handle
both normal and target builtins)?  And it has context available
(actual arguments and number of arguments for IFN calls).

Richard.

>
> gcc/
> * target.def (builtin_vectorized_function): Take a combined_fn (in
> the form of an unsigned int) rather than a function decl.
> (builtin_md_vectorized_function): New.
> * targhooks.h (default_builtin_vectorized_function): Replace the
> fndecl argument with an unsigned int.
> (default_builtin_md_vectorized_function): Declare.
> * targhooks.c (default_builtin_vectorized_function): Replace the
> fndecl argument with an unsigned int.
> (default_builtin_md_vectorized_function): New function.
> * doc/tm.texi.in (TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION):
> New hook.
> * doc/tm.texi: Regenerate.
> * tree-vect-stmts.c (vectorizable_function): Update call to
> builtin_vectorized_function, also passing internal functions.
> Call builtin_md_vectorized_function for target-specific builtins.
> * config/aarch64/aarch64-protos.h
> (aarch64_builtin_vectorized_function): Replace fndecl argument
> with an unsigned int.
> * config/aarch64/aarch64-builtins.c: Include case-cfn-macros.h.
> (aarch64_builtin_vectorized_function): Update after above changes.
> Use CASE_CFN_*.
> * config/arm/arm-protos.h (arm_builtin_vectorized_function): Replace
> fndecl argument with an unsigned int.
> * config/arm/arm-builtins.c: Include 

[PATCH 2/6] Make builtin_vectorized_function take a combined_fn

2015-11-09 Thread Richard Sandiford
This patch replaces the fndecl argument to builtin_vectorized_function
with a combined_fn and gets the vectoriser to call it for internal
functions too.  The patch also moves vectorisation of machine-specific
built-ins to a new hook, builtin_md_vectorized_function.

I've attached a -b version too since that's easier to read.


gcc/
* target.def (builtin_vectorized_function): Take a combined_fn (in
the form of an unsigned int) rather than a function decl.
(builtin_md_vectorized_function): New.
* targhooks.h (default_builtin_vectorized_function): Replace the
fndecl argument with an unsigned int.
(default_builtin_md_vectorized_function): Declare.
* targhooks.c (default_builtin_vectorized_function): Replace the
fndecl argument with an unsigned int.
(default_builtin_md_vectorized_function): New function.
* doc/tm.texi.in (TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION):
New hook.
* doc/tm.texi: Regenerate.
* tree-vect-stmts.c (vectorizable_function): Update call to
builtin_vectorized_function, also passing internal functions.
Call builtin_md_vectorized_function for target-specific builtins.
* config/aarch64/aarch64-protos.h
(aarch64_builtin_vectorized_function): Replace fndecl argument
with an unsigned int.
* config/aarch64/aarch64-builtins.c: Include case-cfn-macros.h.
(aarch64_builtin_vectorized_function): Update after above changes.
Use CASE_CFN_*.
* config/arm/arm-protos.h (arm_builtin_vectorized_function): Replace
fndecl argument with an unsigned int.
* config/arm/arm-builtins.c: Include case-cfn-macros.h
(arm_builtin_vectorized_function): Update after above changes.
Use CASE_CFN_*.
* config/i386/i386.c: Include case-cfn-macros.h
(ix86_veclib_handler): Take a combined_fn rather than a
built_in_function.
(ix86_veclibabi_svml, ix86_veclibabi_acml): Likewise.  Use
mathfn_built_in rather than calling builtin_decl_implicit directly.
(ix86_builtin_vectorized_function) Update after above changes.
Use CASE_CFN_*.
* config/rs6000/rs6000.c: Include case-cfn-macros.h
(rs6000_builtin_vectorized_libmass): Replace fndecl argument
with a combined_fn.  Use CASE_CFN_*.  Use mathfn_built_in rather
than calling builtin_decl_implicit directly.
(rs6000_builtin_vectorized_function): Update after above changes.
Use CASE_CFN_*.  Move BUILT_IN_MD to...
(rs6000_builtin_md_vectorized_function): ...this new function.
(TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION): Define.

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 6b4208f..c4cda4f 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -38,6 +38,7 @@
 #include "expr.h"
 #include "langhooks.h"
 #include "gimple-iterator.h"
+#include "case-cfn-macros.h"
 
 #define v8qi_UP  V8QImode
 #define v4hi_UP  V4HImode
@@ -1258,7 +1259,8 @@ aarch64_expand_builtin (tree exp,
 }
 
 tree
-aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
+aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
+ tree type_in)
 {
   machine_mode in_mode, out_mode;
   int in_n, out_n;
@@ -1282,130 +1284,119 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
 	: (AARCH64_CHECK_BUILTIN_MODE (2, S) \
 	   ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \
 	   : NULL_TREE)))
-  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+  switch (fn)
 {
-  enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
-  switch (fn)
-	{
 #undef AARCH64_CHECK_BUILTIN_MODE
 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
   (out_mode == N##Fmode && out_n == C \
&& in_mode == N##Fmode && in_n == C)
-	case BUILT_IN_FLOOR:
-	case BUILT_IN_FLOORF:
-	  return AARCH64_FIND_FRINT_VARIANT (floor);
-	case BUILT_IN_CEIL:
-	case BUILT_IN_CEILF:
-	  return AARCH64_FIND_FRINT_VARIANT (ceil);
-	case BUILT_IN_TRUNC:
-	case BUILT_IN_TRUNCF:
-	  return AARCH64_FIND_FRINT_VARIANT (btrunc);
-	case BUILT_IN_ROUND:
-	case BUILT_IN_ROUNDF:
-	  return AARCH64_FIND_FRINT_VARIANT (round);
-	case BUILT_IN_NEARBYINT:
-	case BUILT_IN_NEARBYINTF:
-	  return AARCH64_FIND_FRINT_VARIANT (nearbyint);
-	case BUILT_IN_SQRT:
-	case BUILT_IN_SQRTF:
-	  return AARCH64_FIND_FRINT_VARIANT (sqrt);
+CASE_CFN_FLOOR:
+  return AARCH64_FIND_FRINT_VARIANT (floor);
+CASE_CFN_CEIL:
+  return AARCH64_FIND_FRINT_VARIANT (ceil);
+CASE_CFN_TRUNC:
+  return AARCH64_FIND_FRINT_VARIANT (btrunc);
+CASE_CFN_ROUND:
+  return AARCH64_FIND_FRINT_VARIANT (round);
+CASE_CFN_NEARBYINT:
+  return AARCH64_FIND_FRINT_VARIANT (nearbyint);
+CASE_CFN_SQRT:
+  return AARCH64_FIND_FRINT_VARIANT (sqrt);
 #undef