On 06/19/2017 12:35 PM, Jan Hubicka wrote:
>> Hello.
>>
>> Following patch tries to resolve following 2 issues:
>>
>> a) When one takes address of a function that uses target_clones attribute,
>>    default implementation is always returned.
>>
>> b) Using dlsym("foo") should work and thus the resolver function should
>>    use the default name. Because of that, default implementation must be
>>    renamed.
>>
>> Unfortunately, we currently do not support redirection of ipa_refs, thus
>> walk_tree is needed to resolve that. Hopefully there should not be any
>> different IPA_REF that needs to be handled.
> 
> The cgraph interface for redirection is meant mostly for full IPA passes
> that can not touch bodies directly, in this case I think it is fine to
> walk all references.
>>
>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>>
>> Ready to be installed?
>> Martin
> 
>> >From 198c8464978c21cd68d4743de5648ecfefd2e09c Mon Sep 17 00:00:00 2001
>> From: marxin <mli...@suse.cz>
>> Date: Wed, 17 May 2017 15:56:22 +0200
>> Subject: [PATCH] Fix multi-versioning issues (PR ipa/80732).
>>
>> gcc/ChangeLog:
>>
>> 2017-05-19  Martin Liska  <mli...@suse.cz>
>>
>>      PR ipa/80732
>>      * attribs.c (make_dispatcher_decl): Do not append '.ifunc'
>>      to dispatcher function name.
>>      * multiple_target.c (replace_function_decl): New function.
>>      (create_dispatcher_calls): Redirect both edges and references.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2017-05-19  Martin Liska  <mli...@suse.cz>
>>
>>      PR ipa/80732
>>      * gcc.target/i386/mvc5.c: Scan indirect_function.
>>      * gcc.target/i386/mvc7.c: Likewise.
>>      * gcc.target/i386/pr80732.c: New test.
>> ---
>>  gcc/attribs.c                           |   6 +-
>>  gcc/multiple_target.c                   | 105 
>> ++++++++++++++++++++++----------
>>  gcc/testsuite/gcc.target/i386/mvc5.c    |   2 +-
>>  gcc/testsuite/gcc.target/i386/mvc7.c    |   2 +-
>>  gcc/testsuite/gcc.target/i386/pr80732.c |  85 ++++++++++++++++++++++++++
>>  5 files changed, 161 insertions(+), 39 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/i386/pr80732.c
>>
>> diff --git a/gcc/attribs.c b/gcc/attribs.c
>> index 4ba0eab8899..5eb19e82795 100644
>> --- a/gcc/attribs.c
>> +++ b/gcc/attribs.c
>> @@ -888,12 +888,8 @@ make_dispatcher_decl (const tree decl)
>>    tree func_decl;
>>    char *func_name;
>>    tree fn_type, func_type;
>> -  bool is_uniq = false;
>>  
>> -  if (TREE_PUBLIC (decl) == 0)
>> -    is_uniq = true;
>> -
>> -  func_name = make_unique_name (decl, "ifunc", is_uniq);
>> +  func_name = xstrdup (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
>>  
>>    fn_type = TREE_TYPE (decl);
>>    func_type = build_function_type (TREE_TYPE (fn_type),
>> diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c
>> index 2ee6a9591ba..fba2636ba16 100644
>> --- a/gcc/multiple_target.c
>> +++ b/gcc/multiple_target.c
>> @@ -34,6 +34,27 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "target.h"
>>  #include "attribs.h"
>>  #include "pretty-print.h"
>> +#include "gimple-iterator.h"
>> +#include "gimple-walk.h"
>> +
>> +/* Walker callback that replaces all FUNCTION_DECL of a function that's
>> +   going to be versioned.  */
>> +
>> +static tree
>> +replace_function_decl (tree *op, int *walk_subtrees, void *data)
>> +{
>> +  struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
>> +  cgraph_function_version_info *info = (cgraph_function_version_info 
>> *)wi->info;
>> +
>> +  if (TREE_CODE (*op) == FUNCTION_DECL
>> +      && info->this_node->decl == *op)
>> +    {
>> +      *op = info->dispatcher_resolver;
>> +      *walk_subtrees = 0;
>> +    }
>> +
>> +  return NULL;
>> +}
>>  
>>  /* If the call in NODE has multiple target attribute with multiple fields,
>>     replace it with dispatcher call and create dispatcher (once).  */
>> @@ -41,51 +62,48 @@ along with GCC; see the file COPYING3.  If not see
>>  static void
>>  create_dispatcher_calls (struct cgraph_node *node)
>>  {
>> -  cgraph_edge *e;
>> -  cgraph_edge *e_next = NULL;
>> +  ipa_ref *ref;
>> +
>> +  if (!DECL_FUNCTION_VERSIONED (node->decl))
>> +    return;
>> +
>> +  auto_vec<cgraph_edge *> edges_to_redirect;
>> +  auto_vec<ipa_ref *> references_to_redirect;
>> +
>> +  for (unsigned i = 0; node->iterate_referring (i, ref); i++)
>> +    references_to_redirect.safe_push (ref);
>>  
>>    /* We need to remember NEXT_CALLER as it could be modified in the loop.  
>> */
>> -  for (e = node->callers; e ;e = (e == NULL) ? e_next : e->next_caller)
>> -    {
>> -      tree resolver_decl;
>> -      tree idecl;
>> -      tree decl;
>> -      gimple *call = e->call_stmt;
>> -      struct cgraph_node *inode;
>> -
>> -      /* Checking if call of function is call of versioned function.
>> -     Versioned function are not inlined, so there is no need to
>> -     check for inline.  */
>> -      if (!call
>> -      || !(decl = gimple_call_fndecl (call))
>> -      || !DECL_FUNCTION_VERSIONED (decl))
>> -    continue;
>> +  for (cgraph_edge *e = node->callers; e ; e = e->next_caller)
>> +    edges_to_redirect.safe_push (e);
>>  
>> +  if (!edges_to_redirect.is_empty () || !references_to_redirect.is_empty ())
>> +    {
>>        if (!targetm.has_ifunc_p ())
>>      {
>> -      error_at (gimple_location (call),
>> +      error_at (DECL_SOURCE_LOCATION (node->decl),
>>                  "the call requires ifunc, which is not"
>>                  " supported by this target");
>> -      break;
>> +      return;
>>      }
>>        else if (!targetm.get_function_versions_dispatcher)
>>      {
>> -      error_at (gimple_location (call),
>> +      error_at (DECL_SOURCE_LOCATION (node->decl),
>>                  "target does not support function version dispatcher");
>> -      break;
>> +      return;
>>      }
>>  
>> -      e_next = e->next_caller;
>> -      idecl = targetm.get_function_versions_dispatcher (decl);
>> +      tree idecl = targetm.get_function_versions_dispatcher (node->decl);
>>        if (!idecl)
>>      {
>> -      error_at (gimple_location (call),
>> +      error_at (DECL_SOURCE_LOCATION (node->decl),
>>                  "default target_clones attribute was not set");
>> -      break;
>> +      return;
>>      }
>> -      inode = cgraph_node::get (idecl);
>> +
>> +      cgraph_node *inode = cgraph_node::get (idecl);
>>        gcc_assert (inode);
>> -      resolver_decl = targetm.generate_version_dispatcher_body (inode);
>> +      tree resolver_decl = targetm.generate_version_dispatcher_body (inode);
>>  
>>        /* Update aliases.  */
>>        inode->alias = true;
>> @@ -93,12 +111,35 @@ create_dispatcher_calls (struct cgraph_node *node)
>>        if (!inode->analyzed)
>>      inode->resolve_alias (cgraph_node::get (resolver_decl));
>>  
>> -      e->redirect_callee (inode);
>> -      e->redirect_call_stmt_to_callee ();
>> -      /*  Since REDIRECT_CALLEE modifies NEXT_CALLER field we move to
>> -      previously set NEXT_CALLER.  */
>> -      e = NULL;
>> +      /* Redirect edges.  */
>> +      unsigned i;
>> +      cgraph_edge *e;
>> +      FOR_EACH_VEC_ELT (edges_to_redirect, i, e)
>> +    {
>> +      e->redirect_callee (inode);
>> +      e->redirect_call_stmt_to_callee ();
>> +    }
>> +
>> +      /* Redirect references.  */
>> +      FOR_EACH_VEC_ELT (references_to_redirect, i, ref)
>> +    {
>> +      if (ref->use == IPA_REF_ADDR)
>> +        {
>> +          struct walk_stmt_info wi;
>> +          memset (&wi, 0, sizeof (wi));
>> +          wi.info = (void *)node->function_version ();
>> +          gimple_stmt_iterator it = gsi_for_stmt (ref->stmt);
>> +          if (ref->referring->decl != resolver_decl)
>> +            walk_gimple_stmt (&it, NULL, replace_function_decl, &wi);
>> +        }
>> +      else
>> +        gcc_unreachable ();
> 
> Don't you need to handle static initializers here as well?

Yep, needs to be done. Done in updated version of patch. I also updated the 
test-case
and survives regression tests.

I'm going to install the patch.

Thanks
Martin

> 
> OK with this change or explanation why you don't :)
> Thanks,
> Honza
>> +    }
>>      }
>> +
>> +  symtab->change_decl_assembler_name (node->decl,
>> +                                  clone_function_name (node->decl,
>> +                                                       "default"));
>>  }
>>  
>>  /* Return length of attribute names string,
>> diff --git a/gcc/testsuite/gcc.target/i386/mvc5.c 
>> b/gcc/testsuite/gcc.target/i386/mvc5.c
>> index 8fea04c792b..677f79f3fd0 100644
>> --- a/gcc/testsuite/gcc.target/i386/mvc5.c
>> +++ b/gcc/testsuite/gcc.target/i386/mvc5.c
>> @@ -1,7 +1,7 @@
>>  /* { dg-do compile } */
>>  /* { dg-require-ifunc "" } */
>>  /* { dg-options "-fno-inline" } */
>> -/* { dg-final { scan-assembler-times "foo.ifunc" 6 } } */
>> +/* { dg-final { scan-assembler "foo,foo.resolver" } } */
>>  
>>  __attribute__((target_clones("default","avx","avx2")))
>>  int
>> diff --git a/gcc/testsuite/gcc.target/i386/mvc7.c 
>> b/gcc/testsuite/gcc.target/i386/mvc7.c
>> index 9a9d7a3da30..a3697ba9b75 100644
>> --- a/gcc/testsuite/gcc.target/i386/mvc7.c
>> +++ b/gcc/testsuite/gcc.target/i386/mvc7.c
>> @@ -3,7 +3,7 @@
>>  /* { dg-final { scan-assembler "foo.resolver" } } */
>>  /* { dg-final { scan-assembler "avx" } } */
>>  /* { dg-final { scan-assembler "slm" } } */
>> -/* { dg-final { scan-assembler-times "foo.ifunc" 4 } } */
>> +/* { dg-final { scan-assembler "foo,foo.resolver" } } */
>>  
>>  __attribute__((target_clones("avx","default","arch=slm","arch=core-avx2")))
>>  int foo ();
>> diff --git a/gcc/testsuite/gcc.target/i386/pr80732.c 
>> b/gcc/testsuite/gcc.target/i386/pr80732.c
>> new file mode 100644
>> index 00000000000..fb168260eaa
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/pr80732.c
>> @@ -0,0 +1,85 @@
>> +/* PR ipa/80732 */
>> +/* { dg-do run } */
>> +/* { dg-options "-ldl -fPIC -rdynamic -O3 -g" } */
>> +/* { dg-require-ifunc "" } */
>> +/* { dg-require-effective-target fma4 } */
>> +
>> +
>> +#include <dlfcn.h>
>> +
>> +__attribute__((target_clones("default","fma"),noinline,optimize("fast-math")))
>> +double f1(double a, double b, double c)
>> +{
>> +    return a * b + c;
>> +}
>> +
>> +double k1(double a, double b, double c, void **p)
>> +{
>> +    *p = f1;
>> +    return f1(a, b, c);
>> +}
>> +
>> +__attribute__((target("fma"),optimize("fast-math")))
>> +static double f2_fma(double a, double b, double c)
>> +{
>> +    return a * b + c;
>> +}
>> +
>> +__attribute__((optimize("fast-math")))
>> +static double f2_default(double a, double b, double c)
>> +{
>> +    return a * b + c;
>> +}
>> +
>> +static void *f2_resolve(void)
>> +{
>> +    __builtin_cpu_init ();
>> +    if (__builtin_cpu_supports("fma"))
>> +        return f2_fma;
>> +    else
>> +        return f2_default;
>> +}
>> +
>> +double f2(double a, double b, double c) 
>> __attribute__((ifunc("f2_resolve")));
>> +
>> +double k2(double a, double b, double c, void **p)
>> +{
>> +    *p = f2;
>> +    return f2(a, b, c);
>> +}
>> +
>> +int main()
>> +{
>> +    char buffer[256];
>> +    const char *expectation = "4.93038e-32, 4.93038e-32, 4.93038e-32";
>> +
>> +    volatile double a = 1.0000000000000002;
>> +    volatile double b = -0.9999999999999998;
>> +    volatile double c = 1.0;
>> +
>> +    void *hdl = dlopen(0, RTLD_NOW);
>> +
>> +    double (*pf1)(double, double, double) = dlsym(hdl, "f1");
>> +    double (*pk1)(double, double, double, void**) = dlsym(hdl, "k1");
>> +    double (*_pf1)(double, double, double);
>> +
>> +    double v1_1 = pf1(a, b, c);
>> +    double v1_2 = pk1(a, b, c, (void**)&_pf1);
>> +    double v1_3 = _pf1(a, b, c);
>> +    __builtin_sprintf (buffer, "%g, %g, %g", v1_1, v1_2, v1_3);
>> +    if (__builtin_strcmp (buffer, expectation) != 0)
>> +      __builtin_abort ();
>> +
>> +    double (*pf2)(double, double, double) = dlsym(hdl, "f2");
>> +    double (*pk2)(double, double, double, void**) = dlsym(hdl, "k2");
>> +    double (*_pf2)(double, double, double);
>> +
>> +    double v2_1 = pf2(a, b, c);
>> +    double v2_2 = pk2(a, b, c, (void**)&_pf2);
>> +    double v2_3 = _pf2(a, b, c);
>> +    __builtin_sprintf(buffer, "%g, %g, %g", v2_1, v2_2, v2_3);
>> +    if (__builtin_strcmp (buffer, expectation) != 0)
>> +      __builtin_abort ();
>> +
>> +    return 0;
>> +}
>> -- 
>> 2.12.2
>>
> 

>From 77ee5ebe36466b240a7fb291921f430466c231e9 Mon Sep 17 00:00:00 2001
From: marxin <mli...@suse.cz>
Date: Wed, 17 May 2017 15:56:22 +0200
Subject: [PATCH] Fix multi-versioning issues (PR ipa/80732).

gcc/ChangeLog:

2017-05-19  Martin Liska  <mli...@suse.cz>

	PR ipa/80732
	* attribs.c (make_dispatcher_decl): Do not append '.ifunc'
	to dispatcher function name.
	* multiple_target.c (replace_function_decl): New function.
	(create_dispatcher_calls): Redirect both edges and references.

gcc/testsuite/ChangeLog:

2017-05-19  Martin Liska  <mli...@suse.cz>

	PR ipa/80732
	* gcc.target/i386/mvc5.c: Scan indirect_function.
	* gcc.target/i386/mvc7.c: Likewise.
	* gcc.target/i386/pr80732.c: New test.
---
 gcc/attribs.c                           |   6 +-
 gcc/multiple_target.c                   | 115 +++++++++++++++++++++++---------
 gcc/testsuite/gcc.target/i386/mvc5.c    |   2 +-
 gcc/testsuite/gcc.target/i386/mvc7.c    |   2 +-
 gcc/testsuite/gcc.target/i386/pr80732.c |  92 +++++++++++++++++++++++++
 5 files changed, 178 insertions(+), 39 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr80732.c

diff --git a/gcc/attribs.c b/gcc/attribs.c
index 4ba0eab8899..5eb19e82795 100644
--- a/gcc/attribs.c
+++ b/gcc/attribs.c
@@ -888,12 +888,8 @@ make_dispatcher_decl (const tree decl)
   tree func_decl;
   char *func_name;
   tree fn_type, func_type;
-  bool is_uniq = false;
 
-  if (TREE_PUBLIC (decl) == 0)
-    is_uniq = true;
-
-  func_name = make_unique_name (decl, "ifunc", is_uniq);
+  func_name = xstrdup (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
 
   fn_type = TREE_TYPE (decl);
   func_type = build_function_type (TREE_TYPE (fn_type),
diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c
index 2ee6a9591ba..bdb5b3bf228 100644
--- a/gcc/multiple_target.c
+++ b/gcc/multiple_target.c
@@ -34,6 +34,27 @@ along with GCC; see the file COPYING3.  If not see
 #include "target.h"
 #include "attribs.h"
 #include "pretty-print.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+
+/* Walker callback that replaces all FUNCTION_DECL of a function that's
+   going to be versioned.  */
+
+static tree
+replace_function_decl (tree *op, int *walk_subtrees, void *data)
+{
+  struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
+  cgraph_function_version_info *info = (cgraph_function_version_info *)wi->info;
+
+  if (TREE_CODE (*op) == FUNCTION_DECL
+      && info->this_node->decl == *op)
+    {
+      *op = info->dispatcher_resolver;
+      *walk_subtrees = 0;
+    }
+
+  return NULL;
+}
 
 /* If the call in NODE has multiple target attribute with multiple fields,
    replace it with dispatcher call and create dispatcher (once).  */
@@ -41,51 +62,48 @@ along with GCC; see the file COPYING3.  If not see
 static void
 create_dispatcher_calls (struct cgraph_node *node)
 {
-  cgraph_edge *e;
-  cgraph_edge *e_next = NULL;
+  ipa_ref *ref;
+
+  if (!DECL_FUNCTION_VERSIONED (node->decl))
+    return;
+
+  auto_vec<cgraph_edge *> edges_to_redirect;
+  auto_vec<ipa_ref *> references_to_redirect;
+
+  for (unsigned i = 0; node->iterate_referring (i, ref); i++)
+    references_to_redirect.safe_push (ref);
 
   /* We need to remember NEXT_CALLER as it could be modified in the loop.  */
-  for (e = node->callers; e ;e = (e == NULL) ? e_next : e->next_caller)
-    {
-      tree resolver_decl;
-      tree idecl;
-      tree decl;
-      gimple *call = e->call_stmt;
-      struct cgraph_node *inode;
-
-      /* Checking if call of function is call of versioned function.
-	 Versioned function are not inlined, so there is no need to
-	 check for inline.  */
-      if (!call
-	  || !(decl = gimple_call_fndecl (call))
-	  || !DECL_FUNCTION_VERSIONED (decl))
-	continue;
+  for (cgraph_edge *e = node->callers; e ; e = e->next_caller)
+    edges_to_redirect.safe_push (e);
 
+  if (!edges_to_redirect.is_empty () || !references_to_redirect.is_empty ())
+    {
       if (!targetm.has_ifunc_p ())
 	{
-	  error_at (gimple_location (call),
+	  error_at (DECL_SOURCE_LOCATION (node->decl),
 		    "the call requires ifunc, which is not"
 		    " supported by this target");
-	  break;
+	  return;
 	}
       else if (!targetm.get_function_versions_dispatcher)
 	{
-	  error_at (gimple_location (call),
+	  error_at (DECL_SOURCE_LOCATION (node->decl),
 		    "target does not support function version dispatcher");
-	  break;
+	  return;
 	}
 
-      e_next = e->next_caller;
-      idecl = targetm.get_function_versions_dispatcher (decl);
+      tree idecl = targetm.get_function_versions_dispatcher (node->decl);
       if (!idecl)
 	{
-	  error_at (gimple_location (call),
+	  error_at (DECL_SOURCE_LOCATION (node->decl),
 		    "default target_clones attribute was not set");
-	  break;
+	  return;
 	}
-      inode = cgraph_node::get (idecl);
+
+      cgraph_node *inode = cgraph_node::get (idecl);
       gcc_assert (inode);
-      resolver_decl = targetm.generate_version_dispatcher_body (inode);
+      tree resolver_decl = targetm.generate_version_dispatcher_body (inode);
 
       /* Update aliases.  */
       inode->alias = true;
@@ -93,12 +111,45 @@ create_dispatcher_calls (struct cgraph_node *node)
       if (!inode->analyzed)
 	inode->resolve_alias (cgraph_node::get (resolver_decl));
 
-      e->redirect_callee (inode);
-      e->redirect_call_stmt_to_callee ();
-      /*  Since REDIRECT_CALLEE modifies NEXT_CALLER field we move to
-	  previously set NEXT_CALLER.  */
-      e = NULL;
+      /* Redirect edges.  */
+      unsigned i;
+      cgraph_edge *e;
+      FOR_EACH_VEC_ELT (edges_to_redirect, i, e)
+	{
+	  e->redirect_callee (inode);
+	  e->redirect_call_stmt_to_callee ();
+	}
+
+      /* Redirect references.  */
+      FOR_EACH_VEC_ELT (references_to_redirect, i, ref)
+	{
+	  if (ref->use == IPA_REF_ADDR)
+	    {
+	      struct walk_stmt_info wi;
+	      memset (&wi, 0, sizeof (wi));
+	      wi.info = (void *)node->function_version ();
+
+	      if (dyn_cast<varpool_node *> (ref->referring))
+		{
+		  hash_set<tree> visited_nodes;
+		  walk_tree (&DECL_INITIAL (ref->referring->decl),
+			     replace_function_decl, &wi, &visited_nodes);
+		}
+	      else
+		{
+		  gimple_stmt_iterator it = gsi_for_stmt (ref->stmt);
+		  if (ref->referring->decl != resolver_decl)
+		    walk_gimple_stmt (&it, NULL, replace_function_decl, &wi);
+		}
+	    }
+	  else
+	    gcc_unreachable ();
+	}
     }
+
+  symtab->change_decl_assembler_name (node->decl,
+				      clone_function_name (node->decl,
+							   "default"));
 }
 
 /* Return length of attribute names string,
diff --git a/gcc/testsuite/gcc.target/i386/mvc5.c b/gcc/testsuite/gcc.target/i386/mvc5.c
index 8fea04c792b..677f79f3fd0 100644
--- a/gcc/testsuite/gcc.target/i386/mvc5.c
+++ b/gcc/testsuite/gcc.target/i386/mvc5.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-require-ifunc "" } */
 /* { dg-options "-fno-inline" } */
-/* { dg-final { scan-assembler-times "foo.ifunc" 6 } } */
+/* { dg-final { scan-assembler "foo,foo.resolver" } } */
 
 __attribute__((target_clones("default","avx","avx2")))
 int
diff --git a/gcc/testsuite/gcc.target/i386/mvc7.c b/gcc/testsuite/gcc.target/i386/mvc7.c
index 9a9d7a3da30..a3697ba9b75 100644
--- a/gcc/testsuite/gcc.target/i386/mvc7.c
+++ b/gcc/testsuite/gcc.target/i386/mvc7.c
@@ -3,7 +3,7 @@
 /* { dg-final { scan-assembler "foo.resolver" } } */
 /* { dg-final { scan-assembler "avx" } } */
 /* { dg-final { scan-assembler "slm" } } */
-/* { dg-final { scan-assembler-times "foo.ifunc" 4 } } */
+/* { dg-final { scan-assembler "foo,foo.resolver" } } */
 
 __attribute__((target_clones("avx","default","arch=slm","arch=core-avx2")))
 int foo ();
diff --git a/gcc/testsuite/gcc.target/i386/pr80732.c b/gcc/testsuite/gcc.target/i386/pr80732.c
new file mode 100644
index 00000000000..2c59c5e224b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr80732.c
@@ -0,0 +1,92 @@
+/* PR ipa/80732 */
+/* { dg-do run } */
+/* { dg-options "-ldl -fPIC -rdynamic -O3 -g -pie" } */
+/* { dg-require-ifunc "" } */
+/* { dg-require-effective-target fma4 } */
+/* { dg-require-effective-target fpic } */
+/* { dg-require-effective-target pie } */
+
+#include <dlfcn.h>
+
+__attribute__((target_clones("default","fma"),noinline,optimize("fast-math")))
+double f1(double a, double b, double c)
+{
+    return a * b + c;
+}
+
+double k1(double a, double b, double c, void **p)
+{
+    *p = f1;
+    return f1(a, b, c);
+}
+
+__attribute__((target("fma"),optimize("fast-math")))
+static double f2_fma(double a, double b, double c)
+{
+    return a * b + c;
+}
+
+__attribute__((optimize("fast-math")))
+static double f2_default(double a, double b, double c)
+{
+    return a * b + c;
+}
+
+static void *f2_resolve(void)
+{
+    __builtin_cpu_init ();
+    if (__builtin_cpu_supports("fma"))
+        return f2_fma;
+    else
+        return f2_default;
+}
+
+double f2(double a, double b, double c) __attribute__((ifunc("f2_resolve")));
+
+double k2(double a, double b, double c, void **p)
+{
+    *p = f2;
+    return f2(a, b, c);
+}
+
+double (*initializer) (double, double, double) = { &f1 };
+
+int main()
+{
+    char buffer[256];
+    const char *expectation = "4.93038e-32, 4.93038e-32, 4.93038e-32";
+
+    volatile double a = 1.0000000000000002;
+    volatile double b = -0.9999999999999998;
+    volatile double c = 1.0;
+
+    void *hdl = dlopen(0, RTLD_NOW);
+
+    double (*pf1)(double, double, double) = dlsym(hdl, "f1");
+    double (*pk1)(double, double, double, void**) = dlsym(hdl, "k1");
+    double (*_pf1)(double, double, double);
+
+    double v1_1 = pf1(a, b, c);
+    double v1_2 = pk1(a, b, c, (void**)&_pf1);
+    double v1_3 = _pf1(a, b, c);
+    __builtin_sprintf (buffer, "%g, %g, %g", v1_1, v1_2, v1_3);
+    if (__builtin_strcmp (buffer, expectation) != 0)
+      __builtin_abort ();
+
+    double (*pf2)(double, double, double) = dlsym(hdl, "f2");
+    double (*pk2)(double, double, double, void**) = dlsym(hdl, "k2");
+    double (*_pf2)(double, double, double);
+
+    double v2_1 = pf2(a, b, c);
+    double v2_2 = pk2(a, b, c, (void**)&_pf2);
+    double v2_3 = _pf2(a, b, c);
+    __builtin_sprintf(buffer, "%g, %g, %g", v2_1, v2_2, v2_3);
+    if (__builtin_strcmp (buffer, expectation) != 0)
+      __builtin_abort ();
+
+    __builtin_sprintf(buffer, "%g, %g, %g", initializer (a, b, c), v2_2, v2_3);
+    if (__builtin_strcmp (buffer, expectation) != 0)
+      __builtin_abort ();
+
+    return 0;
+}
-- 
2.13.1

Reply via email to