[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-03-05 Thread Yaxun Liu via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC355421: [CUDA][HIP][Sema] Fix template kernel with function 
as template parameter (authored by yaxunl, committed by ).
Herald added a project: clang.

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411

Files:
  lib/Sema/SemaCUDA.cpp
  lib/Sema/SemaExpr.cpp
  test/SemaCUDA/call-device-fn-from-host.cu
  test/SemaCUDA/call-host-fn-from-device.cu


Index: test/SemaCUDA/call-device-fn-from-host.cu
===
--- test/SemaCUDA/call-device-fn-from-host.cu
+++ test/SemaCUDA/call-device-fn-from-host.cu
@@ -37,7 +37,7 @@
 }
 
 template  __host__ __device__ void hd2() { device_fn(); }
-// expected-error@-1 {{reference to __device__ function 'device_fn' in 
__host__ __device__ function}}
+// expected-error@-1 2 {{reference to __device__ function 'device_fn' in 
__host__ __device__ function}}
 void host_fn() { hd2(); }
 
 __host__ __device__ void hd() { device_fn(); }
@@ -90,3 +90,8 @@
 static __host__ __device__ void hd_func() { device_fn(); }
 __global__ void kernel() { hd_func(); }
 void host_func(void) { kernel<<<1, 1>>>(); }
+
+// Should allow host function call kernel template with device function 
argument.
+__device__ void f();
+template __global__ void t() { F(); }
+__host__ void g() { t<<<1,1>>>(); }
Index: test/SemaCUDA/call-host-fn-from-device.cu
===
--- test/SemaCUDA/call-host-fn-from-device.cu
+++ test/SemaCUDA/call-host-fn-from-device.cu
@@ -56,14 +56,14 @@
 }
 
 template  __host__ __device__ void hd2() { host_fn(); }
-// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ 
__device__ function}}
+// expected-error@-1 2 {{reference to __host__ function 'host_fn' in __host__ 
__device__ function}}
 __global__ void kernel() { hd2(); }
 
 __host__ __device__ void hd() { host_fn(); }
 // expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ 
__device__ function}}
 
 template  __host__ __device__ void hd3() { host_fn(); }
-// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ 
__device__ function}}
+// expected-error@-1 2 {{reference to __host__ function 'host_fn' in __host__ 
__device__ function}}
 __device__ void device_fn() { hd3(); }
 
 // No error because this is never instantiated.
Index: lib/Sema/SemaExpr.cpp
===
--- lib/Sema/SemaExpr.cpp
+++ lib/Sema/SemaExpr.cpp
@@ -14799,6 +14799,9 @@
   if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType()))
 ResolveExceptionSpec(Loc, FPT);
 
+  if (getLangOpts().CUDA)
+CheckCUDACall(Loc, Func);
+
   // If we don't need to mark the function as used, and we don't need to
   // try to provide a definition, there's nothing more to do.
   if ((Func->isUsed(/*CheckUsedAttr=*/false) || !OdrUse) &&
Index: lib/Sema/SemaCUDA.cpp
===
--- lib/Sema/SemaCUDA.cpp
+++ lib/Sema/SemaCUDA.cpp
@@ -675,6 +675,11 @@
 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
   assert(getLangOpts().CUDA && "Should only be called during CUDA 
compilation");
   assert(Callee && "Callee may not be null.");
+
+  auto  = ExprEvalContexts.back();
+  if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
+return true;
+
   // FIXME: Is bailing out early correct here?  Should we instead assume that
   // the caller is a global initializer?
   FunctionDecl *Caller = dyn_cast(CurContext);


Index: test/SemaCUDA/call-device-fn-from-host.cu
===
--- test/SemaCUDA/call-device-fn-from-host.cu
+++ test/SemaCUDA/call-device-fn-from-host.cu
@@ -37,7 +37,7 @@
 }
 
 template  __host__ __device__ void hd2() { device_fn(); }
-// expected-error@-1 {{reference to __device__ function 'device_fn' in __host__ __device__ function}}
+// expected-error@-1 2 {{reference to __device__ function 'device_fn' in __host__ __device__ function}}
 void host_fn() { hd2(); }
 
 __host__ __device__ void hd() { device_fn(); }
@@ -90,3 +90,8 @@
 static __host__ __device__ void hd_func() { device_fn(); }
 __global__ void kernel() { hd_func(); }
 void host_func(void) { kernel<<<1, 1>>>(); }
+
+// Should allow host function call kernel template with device function argument.
+__device__ void f();
+template __global__ void t() { F(); }
+__host__ void g() { t<<<1,1>>>(); }
Index: test/SemaCUDA/call-host-fn-from-device.cu
===
--- test/SemaCUDA/call-host-fn-from-device.cu
+++ test/SemaCUDA/call-host-fn-from-device.cu
@@ -56,14 +56,14 @@
 }
 
 template  __host__ __device__ void hd2() { host_fn(); }
-// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}

[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-26 Thread John McCall via Phabricator via cfe-commits
rjmccall accepted this revision.
rjmccall added a comment.
This revision is now accepted and ready to land.

In D56411#1410598 , @yaxunl wrote:

> In D56411#1410153 , @rjmccall wrote:
>
> > In D56411#1406212 , @yaxunl wrote:
> >
> > > I would like to fix the validation issue only and leave the overload 
> > > resolution issue for future.
> >
> >
> > As I understand it, the "validation issue" is just that you'd like a 
> > diagnostic to be emitted when resolving the template argument in order to 
> > force SFINAE to pick a different template.  I think that's actually just 
> > the overload-resolution issue.
>
>
> Currently there are two host-ness related issues about function type template 
> arguments:
>
> 1. when there are two or more candidates for the template argument, clang 
> goes through host-ness based overloading resolution, which does not work 
> properly
> 2. when there is only one candidate for the template argument, clang does not 
> go through overloading resolution, therefore the first issue does not show 
> up. However, clang still checks host-ness of template argument. As discussed 
> before, clang should not check host-ness in non-evaluation or 
> constant-evaluation context. Instead, clang should check host-ness in 
> template instantiation.
>
>   I refer the first issue as host-ness overloading resolution issue and the 
> second issue as host-ness validation issue. They are related but separate.
>
>   The first issue only happens when host-ness based overloading resolution is 
> used. For applications which can be compiled with nvcc, this cannot happen, 
> therefore it is less common and less urgent.
>
>   The second issue can happen to applications which can be compiled with 
> nvcc, therefore is more imminent.
>
>   Fixing the second issue is relatively straightforward. It does not need to 
> introduce new AST types for host-ness. Also it is orthogonal to fixing the 
> first issue.


Okay, I understand now.  LGTM.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-26 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1410153 , @rjmccall wrote:

> In D56411#1406212 , @yaxunl wrote:
>
> > I would like to fix the validation issue only and leave the overload 
> > resolution issue for future.
>
>
> As I understand it, the "validation issue" is just that you'd like a 
> diagnostic to be emitted when resolving the template argument in order to 
> force SFINAE to pick a different template.  I think that's actually just the 
> overload-resolution issue.


Currently there are two host-ness related issues about function type template 
arguments:

1. when there are two or more candidates for the template argument, clang goes 
through host-ness based overloading resolution, which does not work properly

2. when there is only one candidate for the template argument, clang does not 
go through overloading resolution, therefore the first issue does not show up. 
However, clang still checks host-ness of template argument. As discussed 
before, clang should not check host-ness in non-evaluation or 
constant-evaluation context. Instead, clang should check host-ness in template 
instantiation.

I refer the first issue as host-ness overloading resolution issue and the 
second issue as host-ness validation issue. They are related but separate.

The first issue only happens when host-ness based overloading resolution is 
used. For applications which can be compiled with nvcc, this cannot happen, 
therefore it is less common and less urgent.

The second issue can happen to applications which can be compiled with nvcc, 
therefore is more imminent.

Fixing the second issue is relatively straightforward. It does not need to 
introduce new AST types for host-ness. Also it is orthogonal to fixing the 
first issue.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-25 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

In D56411#1406212 , @yaxunl wrote:

> I would like to fix the validation issue only and leave the overload 
> resolution issue for future.


As I understand it, the "validation issue" is just that you'd like a diagnostic 
to be emitted when resolving the template argument in order to force SFINAE to 
pick a different template.  I think that's actually just the 
overload-resolution issue.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-21 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 187832.
yaxunl added a comment.

I would like to fix the validation issue only and leave the overload resolution 
issue for future.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411

Files:
  lib/Sema/SemaCUDA.cpp
  lib/Sema/SemaExpr.cpp
  test/SemaCUDA/call-device-fn-from-host.cu
  test/SemaCUDA/call-host-fn-from-device.cu


Index: test/SemaCUDA/call-host-fn-from-device.cu
===
--- test/SemaCUDA/call-host-fn-from-device.cu
+++ test/SemaCUDA/call-host-fn-from-device.cu
@@ -56,14 +56,14 @@
 }
 
 template  __host__ __device__ void hd2() { host_fn(); }
-// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ 
__device__ function}}
+// expected-error@-1 2 {{reference to __host__ function 'host_fn' in __host__ 
__device__ function}}
 __global__ void kernel() { hd2(); }
 
 __host__ __device__ void hd() { host_fn(); }
 // expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ 
__device__ function}}
 
 template  __host__ __device__ void hd3() { host_fn(); }
-// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ 
__device__ function}}
+// expected-error@-1 2 {{reference to __host__ function 'host_fn' in __host__ 
__device__ function}}
 __device__ void device_fn() { hd3(); }
 
 // No error because this is never instantiated.
Index: test/SemaCUDA/call-device-fn-from-host.cu
===
--- test/SemaCUDA/call-device-fn-from-host.cu
+++ test/SemaCUDA/call-device-fn-from-host.cu
@@ -37,7 +37,7 @@
 }
 
 template  __host__ __device__ void hd2() { device_fn(); }
-// expected-error@-1 {{reference to __device__ function 'device_fn' in 
__host__ __device__ function}}
+// expected-error@-1 2 {{reference to __device__ function 'device_fn' in 
__host__ __device__ function}}
 void host_fn() { hd2(); }
 
 __host__ __device__ void hd() { device_fn(); }
@@ -90,3 +90,8 @@
 static __host__ __device__ void hd_func() { device_fn(); }
 __global__ void kernel() { hd_func(); }
 void host_func(void) { kernel<<<1, 1>>>(); }
+
+// Should allow host function call kernel template with device function 
argument.
+__device__ void f();
+template __global__ void t() { F(); }
+__host__ void g() { t<<<1,1>>>(); }
Index: lib/Sema/SemaExpr.cpp
===
--- lib/Sema/SemaExpr.cpp
+++ lib/Sema/SemaExpr.cpp
@@ -14760,6 +14760,9 @@
   if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType()))
 ResolveExceptionSpec(Loc, FPT);
 
+  if (getLangOpts().CUDA)
+CheckCUDACall(Loc, Func);
+
   // If we don't need to mark the function as used, and we don't need to
   // try to provide a definition, there's nothing more to do.
   if ((Func->isUsed(/*CheckUsedAttr=*/false) || !OdrUse) &&
Index: lib/Sema/SemaCUDA.cpp
===
--- lib/Sema/SemaCUDA.cpp
+++ lib/Sema/SemaCUDA.cpp
@@ -675,6 +675,11 @@
 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
   assert(getLangOpts().CUDA && "Should only be called during CUDA 
compilation");
   assert(Callee && "Callee may not be null.");
+
+  auto  = ExprEvalContexts.back();
+  if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
+return true;
+
   // FIXME: Is bailing out early correct here?  Should we instead assume that
   // the caller is a global initializer?
   FunctionDecl *Caller = dyn_cast(CurContext);


Index: test/SemaCUDA/call-host-fn-from-device.cu
===
--- test/SemaCUDA/call-host-fn-from-device.cu
+++ test/SemaCUDA/call-host-fn-from-device.cu
@@ -56,14 +56,14 @@
 }
 
 template  __host__ __device__ void hd2() { host_fn(); }
-// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
+// expected-error@-1 2 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
 __global__ void kernel() { hd2(); }
 
 __host__ __device__ void hd() { host_fn(); }
 // expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
 
 template  __host__ __device__ void hd3() { host_fn(); }
-// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
+// expected-error@-1 2 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
 __device__ void device_fn() { hd3(); }
 
 // No error because this is never instantiated.
Index: test/SemaCUDA/call-device-fn-from-host.cu
===
--- test/SemaCUDA/call-device-fn-from-host.cu
+++ test/SemaCUDA/call-device-fn-from-host.cu
@@ -37,7 +37,7 @@
 }
 
 template  __host__ __device__ void hd2() { device_fn(); }
-// expected-error@-1 {{reference to __device__ function 'device_fn' in __host__ __device__ 

[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-19 Thread Artem Belevich via Phabricator via cfe-commits
tra added a subscriber: rsmith.
tra added a comment.

In D56411#1400300 , @rjmccall wrote:

> Okay, but it's not great design to have a kind of overloading that can't be 
> resolved to an exact intended declaration even by an explicit cast.  That's 
> why I think making *optional* host/device typing is a good idea.  And I 
> strongly want to caution you against doing language design by just 
> incrementally hacking at the compiler to progressively make more test-cases 
> work, which is what it feels like you're doing.


+1. IMO for templates to work sensibly in this situations `__host__` / 
`__device__` must be part of the type.

I.e. extending the example above,

  __host__ int f() { return 1;}
  __device__ int f() { return 2;}
  template __kernel__ void t() { F(); }
  __host__ void g() { t<<<1,1>>>(); }
  __global__ void g() { t<<<1,1>>>(); } // technically legal in CUDA, though 
clang does not support it yet.

IMO, t in `__host__` g() should be different from t in `__device__` g(). 
Which implies that 'device-ness' must be part of the F's type so we would have 
two different instantiations, which is what we want to see in the AST.
Calling context if somewhat irrelevant for template instantiations. E.g. one 
could've explicitly instantiated the template in the global scope.

@rsmith Any suggestions how we could deal with this situation in a principled 
way?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-15 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

Okay, but it's not great design to have a kind of overloading that can't be 
resolved to an exact intended declaration even by an explicit cast.  That's why 
I think making *optional* host/device typing is a good idea.  And I strongly 
want to caution you against doing language design by just incrementally hacking 
at the compiler to progressively make more test-cases work, which is what it 
feels like you're doing.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-15 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1400251 , @rjmccall wrote:

> It is totally unreasonable, at the time you are resolving a template 
> argument, to investigate how the corresponding template parameter is used 
> within the template and use that to shape how the template argument is 
> resolved.  That is simply not how the C++ template model works.  Given that 
> CODA doesn't distinguish between host and device functions in the type 
> system, if you are going to have a rule here, it has to be based on, at most, 
> (1) the current semantic context (which may not even be a function), (2) the 
> template being specialized, and (3) the declarations in the template-argument 
> set.
>
> As I've said before on a previous patch, I think the *best* rule would be to 
> recognize a hard difference between host and device function types, probably 
> by making function types default to being host function types and requiring 
> function pointers that can store device function pointers to be explicitly 
> annotated.  However, that would not be source-compatible with ordinary CUDA, 
> which is presumably unacceptable.
>
> The second-best rule would be to preserve compatibility by making an 
> unannotated function type still be "unknown whether host or device", but to 
> also allow the creation of explicitly host-only and device-only function 
> types.  For source compatibility, DREs to functions would formally have the 
> unknown function type.  Converting a pointer to an unknown function into a 
> pointer to a host function would do some basic checking on the operand 
> expression (basically to verify that it's not obviously a device function), 
> and resolving an overload set in the context of a host-only function pointer 
> type would do the obvious filtering.
>
> Otherwise, you're going to be stuck where you are right now, which is that 
> you're messing around with heuristics because somebody added a language 
> extension that isn't actually very well thought out.  But if that's what you 
> have to do, it's what you have to do.  For this specific question, where you 
> are trying to resolve an overloaded template argument, I think there are 
> basically two sensible options.
>
> - You can filter the overloads by the host-ness of the template.  This makes 
> some sense, because it's probably most likely that a function template that 
> takes a function as a template argument is going to call it — but not 
> necessarily, because it very well might decide instead to call over to the 
> device to invoke the function.  Also, not all templates have a "host-ness"; 
> that's pretty much exclusive to function templates.
> - You can filter the overload by the host-ness of the current context.  
> Again, this makes some sense because it's likely that a host function is 
> trying to pass down a host function — but again, it's not hard to think of 
> exceptions.  And again, this has the problem that the context isn't always a 
> function and so doesn't necessarily have a host-ness. Any sort of additional 
> template-specific guidance seems doomed to gradually turn into the second 
> design I mentioned above where you have the ability to be more specific about 
> function types.
>
>   For the time being, this is still a Clang extension, and while Artem 
> mentioned that NVIDIA is investigating it, that's presumably still an 
> investigation and we still have an opportunity to shape their thinking.  So I 
> would really recommend taking the second approach, or maybe even trying to 
> convince them to take the first.  (How common is higher-order programming on 
> the device, anyway, that you can't break source compatibility for it?)  For 
> this specific line of inquiry, that would probably mean not trying to 
> automatically use any particular filter on the overload set but instead just 
> relying on the programmer to annotation what kind of function they want.


I have seen important machine learning frameworks heavily using function type 
template parameters. If we make host-ness part of type system. Those templates 
expecting device function template parameters have to be rewritten, otherwise 
they won't compile. I don't think it is an easy task to persuade developers to 
make that change, since nvcc does not require that.

However, since this host-ness based overloading resolution is already in place 
and used by existing code, I do not want to break it. I consider your 
suggestion about host-ness based heuristic overloading resolution most viable 
for the current situation: take the host-ness of function templates as the 
first heuristic if the function under resolution is a function template 
argument, otherwise take the host-ness of the current context as the next 
heuristic.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org

[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-15 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

It is totally unreasonable, at the time you are resolving a template argument, 
to investigate how the corresponding template parameter is used within the 
template and use that to shape how the template argument is resolved.  That is 
simply not how the C++ template model works.  Given that CODA doesn't 
distinguish between host and device functions in the type system, if you are 
going to have a rule here, it has to be based on, at most, (1) the current 
semantic context (which may not even be a function), (2) the template being 
specialized, and (3) the declarations in the template-argument set.

As I've said before on a previous patch, I think the *best* rule would be to 
recognize a hard difference between host and device function types, probably by 
making function types default to being host function types and requiring 
function pointers that can store device function pointers to be explicitly 
annotated.  However, that would not be source-compatible with ordinary CUDA, 
which is presumably unacceptable.

The second-best rule would be to preserve compatibility by making an 
unannotated function type still be "unknown whether host or device", but to 
also allow the creation of explicitly host-only and device-only function types. 
 For source compatibility, DREs to functions would formally have the unknown 
function type.  Converting a pointer to an unknown function into a pointer to a 
host function would do some basic checking on the operand expression (basically 
to verify that it's not obviously a device function), and resolving an overload 
set in the context of a host-only function pointer type would do the obvious 
filtering.

Otherwise, you're going to be stuck where you are right now, which is that 
you're messing around with heuristics because somebody added a language 
extension that isn't actually very well thought out.  But if that's what you 
have to do, it's what you have to do.  For this specific question, where you 
are trying to resolve an overloaded template argument, I think there are 
basically two sensible options.

- You can filter the overloads by the host-ness of the template.  This makes 
some sense, because it's probably most likely that a function template that 
takes a function as a template argument is going to call it — but not 
necessarily, because it very well might decide instead to call over to the 
device to invoke the function.  Also, not all templates have a "host-ness"; 
that's pretty much exclusive to function templates.
- You can filter the overload by the host-ness of the current context.  Again, 
this makes some sense because it's likely that a host function is trying to 
pass down a host function — but again, it's not hard to think of exceptions.  
And again, this has the problem that the context isn't always a function and so 
doesn't necessarily have a host-ness.

Any sort of additional template-specific guidance seems doomed to gradually 
turn into the second design I mentioned above where you have the ability to be 
more specific about function types.

For the time being, this is still a Clang extension, and while Artem mentioned 
that NVIDIA is investigating it, that's presumably still an investigation and 
we still have an opportunity to shape their thinking.  So I would really 
recommend taking the second approach, or maybe even trying to convince them to 
take the first.  (How common is higher-order programming on the device, anyway, 
that you can't break source compatibility for it?)  For this specific line of 
inquiry, that would probably mean not trying to automatically use any 
particular filter on the overload set but instead just relying on the 
programmer to annotation what kind of function they want.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-15 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1398586 , @rjmccall wrote:

> But what we've just been talking about is not a validity rule, it's an 
> overload-resolution rule.  It's not *invalid* to use a device function as a 
> template argument to a host function template (or to a class template, which 
> of course is neither host nor device).  All you need to do is to resolve 
> otherwise-intractable overload ambiguities by matching with the host-ness of 
> the current context, which there's probably already code to do for when an 
> overload set is used as e.g. a function argument.


OK I found the code for resolving the function type template argument. 
Basically CheckTemplateArgument calls ResolveAddressOfOverloadedFunction, which 
creates an AddressOfFunctionResolver. The constructor of 
AddressOfFunctionResolver calls AddMatchingNonTemplateFunctions to the 
candidate set, where host-ness of CUDA function is checked to decide whether a 
function is added as candidate

https://github.com/llvm-mirror/clang/blob/master/lib/Sema/SemaOverload.cpp#L11174

However, as shown in the above link, there is one issue on that line, which is 
better demonstrated by the follow testcase

  __host__ int f() { return 1;}
  __device__ int f() { return 2;}
  template __kernel__ void t() { F(); }
  __host__ void g() { t<<<1,1>>>(); } 

In t, f should resolve to `__device__ f` since the true user of f is not g, 
but template t, or whatever is in t. Since t is a kernel, and kernel can only 
call device function, therefore we know that f should resolve to `__device__ f` 
instead of `__host__ f`.

However, currently clang resolves f to `__host__ f`, because it thinks the 
caller is S.CurContext, whereas S.CurContext is g.

The problem is that although f is reference in g, but it is not called by g. In 
this case, f is passed to a kernel template, and a kernel template can call 
device function, therefore f can be a device function.

The issue is that S.CurContext is not conveying the real caller or user of f in 
AddressOfFunctionResolver. To convey that information, a new member 
TemplateUser may need to be added to AddressOfFunctionResolver so that it knows 
that it is resolving a template argument and which template is using that 
argument.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-14 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

But what we've just been talking about is not a validity rule, it's an 
overload-resolution rule.  It's not *invalid* to use a device function as a 
template argument to a host function template (or to a class template, which of 
course is neither host nor device).  All you need to do is to resolve 
otherwise-intractable overload ambiguities by matching with the host-ness of 
the current context, which there's probably already code to do for when an 
overload set is used as e.g. a function argument.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-14 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1398329 , @rjmccall wrote:

> In D56411#1398328 , @rjmccall wrote:
>
> > In D56411#1398291 , @tra wrote:
> >
> > > >> That said, does CUDA have a general rule resolving `__host__` vs. 
> > > >> `__device__` overloads based on context?  And does it allow 
> > > >> overloading based solely on `__host__` vs. `__device__`?
> > >
> > > NVCC does not. Clang does. See https://goo.gl/EXnymm for the details.
> > >
> > > AFAICT, NVIDIA is starting to consider adopting Clang's approach:
> > >  http://lists.llvm.org/pipermail/cfe-dev/2018-November/060070.html 
> > > (original message from Bryce apparently didn't make it to the cfe-dev 
> > > archive)
> >
> >
> > Okay.  Probably the template-argument rule ought to be the same as the 
> > address-of-function rule, which I assume means that there's a final pass 
> > that resolves ambiguities in favor of functions that can be used from the 
> > current context, to the extent that that's meaningful.  It's hard to tell 
> > because that document does not appear to include a formal specification.
>
>
> Regardless, that has no effect on this patch.


The check for host/device to resolve template argument already exists in clang 
before this patch. This patch is trying to fix a bug in that check.
e.g.

  __device__ void f();
  __host__ void f();
  template __global__ void kernel() { F(); }
  __host__ void g() { kernel<<<1,1>>>(); }

Template kernel is trying to resove f, it is supposed to get `__device__ f` but 
it gets `__host__ f`, because
Sema::CheckCUDACall thinks the caller of f is g but actually the caller of f is 
the template kernel.

This check cannot be deferred to template instantiation since it is too late. 
It has to be done in
a constant evalucation context where template argument is checked. Since there 
is no existing way
to tell Sema::CheckCUDACall that clang is checking template argument, the 
template is passed through
a newly added member to ExpressionEvaluationContextRecord.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-14 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1398291 , @tra wrote:

> >> That said, does CUDA have a general rule resolving `__host__` vs. 
> >> `__device__` overloads based on context?  And does it allow overloading 
> >> based solely on `__host__` vs. `__device__`?
>
> NVCC does not. Clang does. See https://goo.gl/EXnymm for the details.
>
> AFAICT, NVIDIA is starting to consider adopting Clang's approach:
>  http://lists.llvm.org/pipermail/cfe-dev/2018-November/060070.html (original 
> message from Bryce apparently didn't make it to the cfe-dev archive)


So my concern about checking host/device compatibility in template 
instantiation is still valid.

I verified the following code is valid with clang

  #define __device__ __attribute__((device))
  
  __device__ void f();
  
  void f();
  
  __device__ void g() {
f();
  }
  
  template __device__ void t() {
F();
  }
  
  __device__ void h() {
t();
  }

To be able to resolve function type template argument based on host/device 
attribute, we need to do the check before template instantiation.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-14 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

In D56411#1398328 , @rjmccall wrote:

> In D56411#1398291 , @tra wrote:
>
> > >> That said, does CUDA have a general rule resolving `__host__` vs. 
> > >> `__device__` overloads based on context?  And does it allow overloading 
> > >> based solely on `__host__` vs. `__device__`?
> >
> > NVCC does not. Clang does. See https://goo.gl/EXnymm for the details.
> >
> > AFAICT, NVIDIA is starting to consider adopting Clang's approach:
> >  http://lists.llvm.org/pipermail/cfe-dev/2018-November/060070.html 
> > (original message from Bryce apparently didn't make it to the cfe-dev 
> > archive)
>
>
> Okay.  Probably the template-argument rule ought to be the same as the 
> address-of-function rule, which I assume means that there's a final pass that 
> resolves ambiguities in favor of functions that can be used from the current 
> context, to the extent that that's meaningful.  It's hard to tell because 
> that document does not appear to include a formal specification.


Regardless, that has no effect on this patch.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-14 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

In D56411#1398291 , @tra wrote:

> >> That said, does CUDA have a general rule resolving `__host__` vs. 
> >> `__device__` overloads based on context?  And does it allow overloading 
> >> based solely on `__host__` vs. `__device__`?
>
> NVCC does not. Clang does. See https://goo.gl/EXnymm for the details.
>
> AFAICT, NVIDIA is starting to consider adopting Clang's approach:
>  http://lists.llvm.org/pipermail/cfe-dev/2018-November/060070.html (original 
> message from Bryce apparently didn't make it to the cfe-dev archive)


Okay.  Probably the template-argument rule ought to be the same as the 
address-of-function rule, which I assume means that there's a final pass that 
resolves ambiguities in favor of functions that can be used from the current 
context, to the extent that that's meaningful.  It's hard to tell because that 
document does not appear to include a formal specification.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-14 Thread Artem Belevich via Phabricator via cfe-commits
tra added a comment.

>> That said, does CUDA have a general rule resolving `__host__` vs. 
>> `__device__` overloads based on context?  And does it allow overloading 
>> based solely on `__host__` vs. `__device__`?

NVCC does not. Clang does. See https://goo.gl/EXnymm for the details.

AFAICT, NVIDIA is starting to consider adopting Clang's approach:
http://lists.llvm.org/pipermail/cfe-dev/2018-November/060070.html (original 
message from Bryce apparently didn't make it to the cfe-dev archive)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-14 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1398103 , @rjmccall wrote:

> In D56411#1398097 , @yaxunl wrote:
>
> > In D56411#1365878 , @yaxunl wrote:
> >
> > > In D56411#1365745 , @rjmccall 
> > > wrote:
> > >
> > > > In D56411#1365727 , @yaxunl 
> > > > wrote:
> > > >
> > > > > In D56411#1360010 , 
> > > > > @rjmccall wrote:
> > > > >
> > > > > > I think the diagnostic should come during instantiation when you 
> > > > > > find an evaluated use of a host function within a device function.
> > > > >
> > > > >
> > > > > It seems the body of function template is checked only during parsing 
> > > > > of the definition of the template itself. When a function
> > > > >  template is instantiated, the body of the instantiated function is 
> > > > > not checked again.
> > > >
> > > >
> > > > No, that's not correct.  However, it's checked somewhat differently, 
> > > > and it's possible that the existing diagnostic is not set up to fire 
> > > > along all common paths.  Try moving the diagnostic to 
> > > > `MarkFunctionReferenced`, and note that `OdrUse` will be `false` in all 
> > > > the unevaluated contexts.
> > >
> > >
> > > You are right. After I disable current diagnostic, I saw 
> > > PerformPendingInstantiations at the end of parsing the TU, where the AST 
> > > of the instantiated function is iterated and MarkFunctionReferenced is 
> > > called. I will try to fix my patch as suggested. Thanks.
> >
> >
> > I got one concern. If we want to do overload resolution of function type 
> > template argument based on host or device, we need to do that before 
> > template instantiation, right?
> >
> > e.g. we have two functions having the same name f and type, but one is 
> > `__host__` and the other is `__device__`, and we pass it as a template 
> > argument to a template function g. We want to choose `__device__ f` if g 
> > itself is `__device__` and `__host__ f` if g itself is `__host__`. If we 
> > want to do this we have to do the check before template instantiation, 
> > right?
>
>
> Yes, you would need to check that when resolving the overload to a single 
> declaration.  That would be separate from diagnosing uses.
>
> That said, does CUDA have a general rule resolving `__host__` vs. 
> `__device__` overloads based on context?  And does it allow overloading based 
> solely on `__host__` vs. `__device__`?


https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#function-declaration-specifiers
 does not talk about that.

Experimenting with nvcc shows that two functions cannot differ only by 
host/device attr, otherwise it is treated as redefinition of one function.

So I withdraw my concern.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-14 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

In D56411#1398097 , @yaxunl wrote:

> In D56411#1365878 , @yaxunl wrote:
>
> > In D56411#1365745 , @rjmccall 
> > wrote:
> >
> > > In D56411#1365727 , @yaxunl 
> > > wrote:
> > >
> > > > In D56411#1360010 , @rjmccall 
> > > > wrote:
> > > >
> > > > > I think the diagnostic should come during instantiation when you find 
> > > > > an evaluated use of a host function within a device function.
> > > >
> > > >
> > > > It seems the body of function template is checked only during parsing 
> > > > of the definition of the template itself. When a function
> > > >  template is instantiated, the body of the instantiated function is not 
> > > > checked again.
> > >
> > >
> > > No, that's not correct.  However, it's checked somewhat differently, and 
> > > it's possible that the existing diagnostic is not set up to fire along 
> > > all common paths.  Try moving the diagnostic to `MarkFunctionReferenced`, 
> > > and note that `OdrUse` will be `false` in all the unevaluated contexts.
> >
> >
> > You are right. After I disable current diagnostic, I saw 
> > PerformPendingInstantiations at the end of parsing the TU, where the AST of 
> > the instantiated function is iterated and MarkFunctionReferenced is called. 
> > I will try to fix my patch as suggested. Thanks.
>
>
> I got one concern. If we want to do overload resolution of function type 
> template argument based on host or device, we need to do that before template 
> instantiation, right?
>
> e.g. we have two functions having the same name f and type, but one is 
> `__host__` and the other is `__device__`, and we pass it as a template 
> argument to a template function g. We want to choose `__device__ f` if g 
> itself is `__device__` and `__host__ f` if g itself is `__host__`. If we want 
> to do this we have to do the check before template instantiation, right?


Yes, you would need to check that when resolving the overload to a single 
declaration.  That would be separate from diagnosing uses.

That said, does CUDA have a general rule resolving `__host__` vs. `__device__` 
overloads based on context?  And does it allow overloading based solely on 
`__host__` vs. `__device__`?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-02-14 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1365878 , @yaxunl wrote:

> In D56411#1365745 , @rjmccall wrote:
>
> > In D56411#1365727 , @yaxunl wrote:
> >
> > > In D56411#1360010 , @rjmccall 
> > > wrote:
> > >
> > > > I think the diagnostic should come during instantiation when you find 
> > > > an evaluated use of a host function within a device function.
> > >
> > >
> > > It seems the body of function template is checked only during parsing of 
> > > the definition of the template itself. When a function
> > >  template is instantiated, the body of the instantiated function is not 
> > > checked again.
> >
> >
> > No, that's not correct.  However, it's checked somewhat differently, and 
> > it's possible that the existing diagnostic is not set up to fire along all 
> > common paths.  Try moving the diagnostic to `MarkFunctionReferenced`, and 
> > note that `OdrUse` will be `false` in all the unevaluated contexts.
>
>
> You are right. After I disable current diagnostic, I saw 
> PerformPendingInstantiations at the end of parsing the TU, where the AST of 
> the instantiated function is iterated and MarkFunctionReferenced is called. I 
> will try to fix my patch as suggested. Thanks.


I got one concern. If we want to do overload resolution of function type 
template argument based on host or device, we need to do that before template 
instantiation, right?

e.g. we have two functions having the same name f and type, but one is 
`__host__` and the other is `__device__`, and we pass it as a template argument 
to a template function g. We want to choose `__device__ f` if g itself is 
`__device__` and `__host__ f` if g itself is `__host__`. If we want to do this 
we have to do the check before template instantiation, right?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-24 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

In D56411#1369906 , @yaxunl wrote:

> In D56411#1365745 , @rjmccall wrote:
>
> > In D56411#1365727 , @yaxunl wrote:
> >
> > > In D56411#1360010 , @rjmccall 
> > > wrote:
> > >
> > > > I think the diagnostic should come during instantiation when you find 
> > > > an evaluated use of a host function within a device function.
> > >
> > >
> > > It seems the body of function template is checked only during parsing of 
> > > the definition of the template itself. When a function
> > >  template is instantiated, the body of the instantiated function is not 
> > > checked again.
> >
> >
> > No, that's not correct.  However, it's checked somewhat differently, and 
> > it's possible that the existing diagnostic is not set up to fire along all 
> > common paths.  Try moving the diagnostic to `MarkFunctionReferenced`, and 
> > note that `OdrUse` will be `false` in all the unevaluated contexts.
>
>
> I got regression in the folowing test when checking CheckCUDACall in 
> MarkFunctionReferenced:
>
>   typedef struct {
> template  void *foo() { return 0; }
>   
> void foo() {
>   foo<0>();
> }
>   } A;
>   
>   
>
> Basically clang does not allow getting linkage of foo<0> before 
> ActOnTypedefDeclarator, quoting SemaDecl.cpp line 4171
>
>   // If we've already computed linkage for the anonymous tag, then
>   // adding a typedef name for the anonymous decl can change that
>   // linkage, which might be a serious problem.  Diagnose this as
>   // unsupported and ignore the typedef name.  TODO: we should
>   // pursue this as a language defect and establish a formal rule
>   // for how to handle it.
>   if (TagFromDeclSpec->hasLinkageBeenComputed()) {
> Diag(NewTD->getLocation(), diag::err_typedef_changes_linkage);
>   
>   
>
> However, CheckCUDACall needs to call GetGVALinkageForFunction on the callee 
> to know if it will be emitted,
>  which causes the linkage of the anonymous struct to be cached and triggers 
> err_typedef_changes_linkage.


Sounds like you were missing a case in the diagnostic, then.

Can you check whether you're in an `inline` function before you check the 
linkage?  It's a bit of a hack but it might work.  You have logic to look for 
evaluated references in used inline functions anyway, right?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-24 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1365745 , @rjmccall wrote:

> In D56411#1365727 , @yaxunl wrote:
>
> > In D56411#1360010 , @rjmccall 
> > wrote:
> >
> > > I think the diagnostic should come during instantiation when you find an 
> > > evaluated use of a host function within a device function.
> >
> >
> > It seems the body of function template is checked only during parsing of 
> > the definition of the template itself. When a function
> >  template is instantiated, the body of the instantiated function is not 
> > checked again.
>
>
> No, that's not correct.  However, it's checked somewhat differently, and it's 
> possible that the existing diagnostic is not set up to fire along all common 
> paths.  Try moving the diagnostic to `MarkFunctionReferenced`, and note that 
> `OdrUse` will be `false` in all the unevaluated contexts.


I got regression in the folowing test when checking CheckCUDACall in 
MarkFunctionReferenced:

  typedef struct {
template  void *foo() { return 0; }
  
void foo() {
  foo<0>();
}
  } A;

Basically clang does not allow getting linkage of foo<0> before 
ActOnTypedefDeclarator, quoting SemaDecl.cpp line 4171

  // If we've already computed linkage for the anonymous tag, then
  // adding a typedef name for the anonymous decl can change that
  // linkage, which might be a serious problem.  Diagnose this as
  // unsupported and ignore the typedef name.  TODO: we should
  // pursue this as a language defect and establish a formal rule
  // for how to handle it.
  if (TagFromDeclSpec->hasLinkageBeenComputed()) {
Diag(NewTD->getLocation(), diag::err_typedef_changes_linkage);

However, CheckCUDACall needs to call GetGVALinkageForFunction on the callee to 
know if it will be emitted,
which causes the linkage of the anonymous struct to be cached and triggers 
err_typedef_changes_linkage.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-21 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1365745 , @rjmccall wrote:

> In D56411#1365727 , @yaxunl wrote:
>
> > In D56411#1360010 , @rjmccall 
> > wrote:
> >
> > > I think the diagnostic should come during instantiation when you find an 
> > > evaluated use of a host function within a device function.
> >
> >
> > It seems the body of function template is checked only during parsing of 
> > the definition of the template itself. When a function
> >  template is instantiated, the body of the instantiated function is not 
> > checked again.
>
>
> No, that's not correct.  However, it's checked somewhat differently, and it's 
> possible that the existing diagnostic is not set up to fire along all common 
> paths.  Try moving the diagnostic to `MarkFunctionReferenced`, and note that 
> `OdrUse` will be `false` in all the unevaluated contexts.


You are right. After I disable current diagnostic, I saw 
PerformPendingInstantiations at the end of parsing the TU, where the AST of the 
instantiated function is iterated and MarkFunctionReferenced is called. I will 
try to fix my patch as suggested. Thanks.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-21 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

In D56411#1365727 , @yaxunl wrote:

> In D56411#1360010 , @rjmccall wrote:
>
> > I think the diagnostic should come during instantiation when you find an 
> > evaluated use of a host function within a device function.
>
>
> It seems the body of function template is checked only during parsing of the 
> definition of the template itself. When a function
>  template is instantiated, the body of the instantiated function is not 
> checked again.


No, that's not correct.  However, it's checked somewhat differently, and it's 
possible that the existing diagnostic is not set up to fire along all common 
paths.  Try moving the diagnostic to `MarkFunctionReferenced`, and note that 
`OdrUse` will be `false` in all the unevaluated contexts.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-21 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1360010 , @rjmccall wrote:

> I think the diagnostic should come during instantiation when you find an 
> evaluated use of a host function within a device function.


It seems the body of function template is checked only during parsing of the 
definition of the template itself. When a function
template is instantiated, the body of the instantiated function is not checked 
again.

I could push an evaluated context, then iterate the AST of the instantiated 
template function to check function call expressions.
However, this will increase the compilation time. Can I do the check in codegen 
stage where the function call instruction is emitted?
That will add extra AST iteration.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-16 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

I think the diagnostic should come during instantiation when you find an 
evaluated use of a host function within a device function.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-16 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1359275 , @rjmccall wrote:

> In D56411#1352602 , @yaxunl wrote:
>
> > In D56411#1352332 , @rjmccall 
> > wrote:
> >
> > > This patch still doesn't make any sense.  You don't need to do any 
> > > special validation when passing a function as a template argument.  When 
> > > Sema instantiates the template definition, it'll rebuild the expressions 
> > > that refer to the template parameter, which will trigger the normal 
> > > checking for whether those expressions are illegally referencing a host 
> > > function from the device, etc.  All you need to do is suppress that 
> > > checking (whether it happens in a template definition or not) for 
> > > references from non-potentially-evaluated contexts.
> >
> >
> > If you look at line 6583 of lib/Sema/SemaTemplate.cpp, you will see clang 
> > does the check if the function needs overloading resolution. However, clang 
> > missed the check if the function does not need overloading resolution. 
> > That's why I need to add the check at line 6593. All the other stuff is 
> > just to help make this check.
> >
> > why clang does not do the reference check when there is no overloading 
> > resolution?
>
>
> We should have already done the check for a non-overloaded function reference 
> as part of building the DRE.  See `Sema::BuildDeclarationNameExpr`.  Template 
> argument checking can resolve an overload set based on the type of the 
> template parameter, so overload sets have to be treated specially there.
>
> > I think in usual cases clang already does that check during template 
> > argument parsing, so it does not need to do that again at line 6593. 
> > Unfortunately, for CUDA host/device check, it has to be skipped in template 
> > argument parsing and deferred to line 6593.
>
> Again, you really should not ever impose this restriction in template 
> arguments.


Sorry I do not quite get it. Are you suggesting there should be no diagnostics 
in the lit test kernel-template-with-func-arg.cu? Or do you think they should 
be diagnosed but should be done in a different way than the current approach? 
Thanks.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-15 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

In D56411#1352602 , @yaxunl wrote:

> In D56411#1352332 , @rjmccall wrote:
>
> > This patch still doesn't make any sense.  You don't need to do any special 
> > validation when passing a function as a template argument.  When Sema 
> > instantiates the template definition, it'll rebuild the expressions that 
> > refer to the template parameter, which will trigger the normal checking for 
> > whether those expressions are illegally referencing a host function from 
> > the device, etc.  All you need to do is suppress that checking (whether it 
> > happens in a template definition or not) for references from 
> > non-potentially-evaluated contexts.
>
>
> If you look at line 6583 of lib/Sema/SemaTemplate.cpp, you will see clang 
> does the check if the function needs overloading resolution. However, clang 
> missed the check if the function does not need overloading resolution. That's 
> why I need to add the check at line 6593. All the other stuff is just to help 
> make this check.
>
> why clang does not do the reference check when there is no overloading 
> resolution?


We should have already done the check for a non-overloaded function reference 
as part of building the DRE.  See `Sema::BuildDeclarationNameExpr`.  Template 
argument checking can resolve an overload set based on the type of the template 
parameter, so overload sets have to be treated specially there.

> I think in usual cases clang already does that check during template argument 
> parsing, so it does not need to do that again at line 6593. Unfortunately, 
> for CUDA host/device check, it has to be skipped in template argument parsing 
> and deferred to line 6593.

Again, you really should not ever impose this restriction in template arguments.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-10 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1352332 , @rjmccall wrote:

> This patch still doesn't make any sense.  You don't need to do any special 
> validation when passing a function as a template argument.  When Sema 
> instantiates the template definition, it'll rebuild the expressions that 
> refer to the template parameter, which will trigger the normal checking for 
> whether those expressions are illegally referencing a host function from the 
> device, etc.  All you need to do is suppress that checking (whether it 
> happens in a template definition or not) for references from 
> non-potentially-evaluated contexts.


If you look at line 6583 of lib/Sema/SemaTemplate.cpp, you will see clang does 
the check if the function needs overloading resolution. However, clang missed 
the check if the function does not need overloading resolution. That's why I 
need to add the check at line 6593. All the other stuff is just to help make 
this check.

why clang does not do the reference check when there is no overloading 
resolution?

I think in usual cases clang already does that check during template argument 
parsing, so it does not need to do that again at line 6593. Unfortunately, for 
CUDA host/device check, it has to be skipped in template argument parsing and 
deferred to line 6593.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-09 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

This patch still doesn't make any sense.  You don't need to do any special 
validation when passing a function as a template argument.  When Sema 
instantiates the template definition, it'll rebuild the expressions that refer 
to the template parameter, which will trigger the normal checking for whether 
those expressions are illegally referencing a host function from the device, 
etc.  All you need to do is suppress that checking (whether it happens in a 
template definition or not) for references from non-potentially-evaluated 
contexts.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-09 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1351824 , @rjmccall wrote:

> But why?  Why do you want to limit this to just template arguments instead of 
> all sorts of similar contexts?


I updated the patch to disable the check for unevaluated expr context and const 
evaluated context, except the deferred check.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-09 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 180960.
yaxunl added a comment.

disable the check for more general cases.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411

Files:
  include/clang/Sema/Sema.h
  lib/Sema/SemaCUDA.cpp
  lib/Sema/SemaTemplate.cpp
  test/SemaCUDA/kernel-template-with-func-arg.cu

Index: test/SemaCUDA/kernel-template-with-func-arg.cu
===
--- /dev/null
+++ test/SemaCUDA/kernel-template-with-func-arg.cu
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#include "Inputs/cuda.h"
+
+struct C {
+  __device__ void devfun() {}
+  void hostfun() {}
+  template __device__ void devtempfun() {}
+  __device__ __host__ void devhostfun() {}
+};
+
+__device__ void devfun() {}
+__host__ void hostfun() {}
+template __device__ void devtempfun() {}
+__device__ __host__ void devhostfun() {}
+
+template  __global__ void kernel() { devF();}
+template  __global__ void kernel2(T *p) { (p->*devF)(); }
+
+template<> __global__ void kernel();
+template<> __global__ void kernel(); // expected-error {{no function template matches function template specialization 'kernel'}}
+  // expected-note@-5 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+template<> __global__ void kernel >();
+template<> __global__ void kernel();
+
+template<> __global__ void kernel<>();
+template<> __global__ void kernel<>(); // expected-error {{no function template matches function template specialization 'kernel'}}
+   // expected-note@-11 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+template<> __global__ void kernel< >();
+template<> __global__ void kernel<>();
+
+template<> __global__ void kernel2(C *p);
+template<> __global__ void kernel2(C *p); // expected-error {{no function template matches function template specialization 'kernel2'}}
+  // expected-note@-16 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+template<> __global__ void kernel2 >(C *p);
+template<> __global__ void kernel2(C *p);
+
+void fun() {
+  kernel<><<<1,1>>>();
+  kernel<><<<1,1>>>(); // expected-error {{no matching function for call to 'kernel'}}
+   // expected-note@-24 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+  kernel< ><<<1,1>>>();
+  kernel<><<<1,1>>>();
+
+  kernel<<<1,1>>>();
+  kernel<<<1,1>>>(); // expected-error {{no matching function for call to 'kernel'}}
+  // expected-note@-30 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+  kernel ><<<1,1>>>();
+  kernel<<<1,1>>>();
+
+  C a;
+  kernel2<<<1,1>>>();
+  kernel2<<<1,1>>>(); // expected-error {{no matching function for call to 'kernel2'}}
+// expected-note@-36 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+  kernel2 ><<<1,1>>>();
+  kernel2<<<1,1>>>();
+}
Index: lib/Sema/SemaTemplate.cpp
===
--- lib/Sema/SemaTemplate.cpp
+++ lib/Sema/SemaTemplate.cpp
@@ -4534,6 +4534,7 @@
 
   EnterExpressionEvaluationContext ConstantEvaluated(
   SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
+  SemaRef.ExprEvalContexts.back().Template = Template;
   return SemaRef.SubstExpr(Param->getDefaultArgument(), TemplateArgLists);
 }
 
@@ -4784,8 +4785,8 @@
   TemplateArgument Result;
   unsigned CurSFINAEErrors = NumSFINAEErrors;
   ExprResult Res =
-CheckTemplateArgument(NTTP, NTTPType, Arg.getArgument().getAsExpr(),
-  Result, CTAK);
+  CheckTemplateArgument(NTTP, NTTPType, Arg.getArgument().getAsExpr(),
+Result, CTAK, dyn_cast(Template));
   if (Res.isInvalid())
 return true;
   // If the current template argument causes an error, give up now.
@@ -6154,6 +6155,22 @@
   return true;
 }
 
+namespace {
+FunctionDecl *GetFunctionDecl(Expr *Arg) {
+  Expr *E = Arg;
+  if (UnaryOperator *UO = dyn_cast(E)) {
+E = UO ? UO->getSubExpr() : nullptr;
+  }
+  if (DeclRefExpr *DRE = dyn_cast_or_null(E)) {
+ValueDecl *Entity = DRE ? DRE->getDecl() : nullptr;
+if (Entity) {
+  if (auto Callee = dyn_cast(Entity))
+return Callee;
+}
+  }
+  return nullptr;
+}
+} // namespace
 /// Check a template argument against its corresponding
 /// non-type template parameter.
 ///
@@ -6164,7 +6181,8 @@
 ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
QualType ParamType, Expr *Arg,
 

[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-09 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

But why?  Why do you want to limit this to just template arguments instead of 
all sorts of similar contexts?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-09 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1349342 , @rjmccall wrote:

> Sema won't necessarily have resolved a template decl when parsing a template 
> argument list, so trying to propagate that decl down to indicate that we're 
> resolving a template argument is not a good approach.
>
> I was going to suggest recording that we're within a template argument in the 
> current `ExpressionEvaluationContextRecord`, but in fact there's an even 
> simpler and more general solution: there's no reason to enforce this 
> restriction in *any* unevaluated context.  If someone wants to refer to a 
> device function within a `decltype` or `sizeof` operand, that should be fine. 
>  So you should just conditionalize the diagnostic on whether this is within 
> an unevaluated context.


For the deferred device/host check I still need to know the template decl. I 
have updated the patch to pass it through ExpressionEvaluationContextRecord.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-09 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 180888.
yaxunl added a comment.

Passing template decl by ExpressionEvaluationContextRecord.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411

Files:
  include/clang/Sema/Sema.h
  lib/Sema/SemaCUDA.cpp
  lib/Sema/SemaTemplate.cpp
  test/SemaCUDA/kernel-template-with-func-arg.cu

Index: test/SemaCUDA/kernel-template-with-func-arg.cu
===
--- /dev/null
+++ test/SemaCUDA/kernel-template-with-func-arg.cu
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#include "Inputs/cuda.h"
+
+struct C {
+  __device__ void devfun() {}
+  void hostfun() {}
+  template __device__ void devtempfun() {}
+  __device__ __host__ void devhostfun() {}
+};
+
+__device__ void devfun() {}
+__host__ void hostfun() {}
+template __device__ void devtempfun() {}
+__device__ __host__ void devhostfun() {}
+
+template  __global__ void kernel() { devF();}
+template  __global__ void kernel2(T *p) { (p->*devF)(); }
+
+template<> __global__ void kernel();
+template<> __global__ void kernel(); // expected-error {{no function template matches function template specialization 'kernel'}}
+  // expected-note@-5 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+template<> __global__ void kernel >();
+template<> __global__ void kernel();
+
+template<> __global__ void kernel<>();
+template<> __global__ void kernel<>(); // expected-error {{no function template matches function template specialization 'kernel'}}
+   // expected-note@-11 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+template<> __global__ void kernel< >();
+template<> __global__ void kernel<>();
+
+template<> __global__ void kernel2(C *p);
+template<> __global__ void kernel2(C *p); // expected-error {{no function template matches function template specialization 'kernel2'}}
+  // expected-note@-16 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+template<> __global__ void kernel2 >(C *p);
+template<> __global__ void kernel2(C *p);
+
+void fun() {
+  kernel<><<<1,1>>>();
+  kernel<><<<1,1>>>(); // expected-error {{no matching function for call to 'kernel'}}
+   // expected-note@-24 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+  kernel< ><<<1,1>>>();
+  kernel<><<<1,1>>>();
+
+  kernel<<<1,1>>>();
+  kernel<<<1,1>>>(); // expected-error {{no matching function for call to 'kernel'}}
+  // expected-note@-30 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+  kernel ><<<1,1>>>();
+  kernel<<<1,1>>>();
+
+  C a;
+  kernel2<<<1,1>>>();
+  kernel2<<<1,1>>>(); // expected-error {{no matching function for call to 'kernel2'}}
+// expected-note@-36 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+  kernel2 ><<<1,1>>>();
+  kernel2<<<1,1>>>();
+}
Index: lib/Sema/SemaTemplate.cpp
===
--- lib/Sema/SemaTemplate.cpp
+++ lib/Sema/SemaTemplate.cpp
@@ -4534,6 +4534,7 @@
 
   EnterExpressionEvaluationContext ConstantEvaluated(
   SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
+  SemaRef.ExprEvalContexts.back().Template = Template;
   return SemaRef.SubstExpr(Param->getDefaultArgument(), TemplateArgLists);
 }
 
@@ -4784,8 +4785,8 @@
   TemplateArgument Result;
   unsigned CurSFINAEErrors = NumSFINAEErrors;
   ExprResult Res =
-CheckTemplateArgument(NTTP, NTTPType, Arg.getArgument().getAsExpr(),
-  Result, CTAK);
+  CheckTemplateArgument(NTTP, NTTPType, Arg.getArgument().getAsExpr(),
+Result, CTAK, dyn_cast(Template));
   if (Res.isInvalid())
 return true;
   // If the current template argument causes an error, give up now.
@@ -6154,6 +6155,22 @@
   return true;
 }
 
+namespace {
+FunctionDecl *GetFunctionDecl(Expr *Arg) {
+  Expr *E = Arg;
+  if (UnaryOperator *UO = dyn_cast(E)) {
+E = UO ? UO->getSubExpr() : nullptr;
+  }
+  if (DeclRefExpr *DRE = dyn_cast_or_null(E)) {
+ValueDecl *Entity = DRE ? DRE->getDecl() : nullptr;
+if (Entity) {
+  if (auto Callee = dyn_cast(Entity))
+return Callee;
+}
+  }
+  return nullptr;
+}
+} // namespace
 /// Check a template argument against its corresponding
 /// non-type template parameter.
 ///
@@ -6164,7 +6181,8 @@
 ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
QualType ParamType, Expr 

[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-09 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 180848.
yaxunl added a comment.

Add test for `__host__ __device__`.
Removing the flag IsParsingTemplateArgument in Sema. Instead, check 
ExprEvalContexts
for disabling checking device/host consistency.
I did not use ExprEvalContext Unevaluated to condition the check because
the issue happens with ExprEvalContext ConstantEvaluated. Also we do not want to
totally remove the check, we just want to defer the check until the arg 
evaluation is done.
When the deferred check is performed, ExprEvalContext is still in 
ConstantEvaluated but
its kind is no longer EK_TemplateArgument, therefore we can use the expr kind 
to condition
the check.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411

Files:
  include/clang/Sema/Sema.h
  lib/Sema/SemaCUDA.cpp
  lib/Sema/SemaExpr.cpp
  lib/Sema/SemaTemplate.cpp

Index: lib/Sema/SemaTemplate.cpp
===
--- lib/Sema/SemaTemplate.cpp
+++ lib/Sema/SemaTemplate.cpp
@@ -4784,8 +4784,8 @@
   TemplateArgument Result;
   unsigned CurSFINAEErrors = NumSFINAEErrors;
   ExprResult Res =
-CheckTemplateArgument(NTTP, NTTPType, Arg.getArgument().getAsExpr(),
-  Result, CTAK);
+  CheckTemplateArgument(NTTP, NTTPType, Arg.getArgument().getAsExpr(),
+Result, CTAK, dyn_cast(Template));
   if (Res.isInvalid())
 return true;
   // If the current template argument causes an error, give up now.
@@ -6154,6 +6154,27 @@
   return true;
 }
 
+namespace {
+bool CheckCUDATemplateArgument(Sema , Expr *Arg, TemplateDecl *Template) {
+  if (Template) {
+Expr *E = Arg;
+if (UnaryOperator *UO = dyn_cast(E)) {
+  E = UO ? UO->getSubExpr() : nullptr;
+}
+if (DeclRefExpr *DRE = dyn_cast_or_null(E)) {
+  ValueDecl *Entity = DRE ? DRE->getDecl() : nullptr;
+  if (Entity) {
+if (auto Callee = dyn_cast(Entity))
+  if (auto Caller =
+  dyn_cast(Template->getTemplatedDecl()))
+if (!S.CheckCUDACall(Arg->getBeginLoc(), Callee, Caller))
+  return false;
+  }
+}
+  }
+  return true;
+}
+} // namespace
 /// Check a template argument against its corresponding
 /// non-type template parameter.
 ///
@@ -6164,7 +6185,8 @@
 ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
QualType ParamType, Expr *Arg,
TemplateArgument ,
-   CheckTemplateArgumentKind CTAK) {
+   CheckTemplateArgumentKind CTAK,
+   TemplateDecl *Template) {
   SourceLocation StartLoc = Arg->getBeginLoc();
 
   // If the parameter type somehow involves auto, deduce the type now.
@@ -6561,7 +6583,11 @@
   if (FunctionDecl *Fn = ResolveAddressOfOverloadedFunction(Arg, ParamType,
 true,
 FoundResult)) {
-if (DiagnoseUseOfDecl(Fn, Arg->getBeginLoc()))
+if (DiagnoseUseOfDecl(Fn, Arg->getBeginLoc(),
+  /*UnknownObjCClass=*/nullptr,
+  /*ObjCPropertyAccess=*/false,
+  /*AvoidPartialAvailabilityChecks=*/false,
+  /*ClassReciever=*/nullptr, Template))
   return ExprError();
 
 Arg = FixOverloadedFunctionReference(Arg, FoundResult, Fn);
@@ -6570,6 +6596,9 @@
 return ExprError();
 }
 
+if (!CheckCUDATemplateArgument(*this, Arg, Template))
+  return ExprError();
+
 if (!ParamType->isMemberPointerType()) {
   if (CheckTemplateArgumentAddressOfObjectOrFunction(*this, Param,
  ParamType,
Index: lib/Sema/SemaExpr.cpp
===
--- lib/Sema/SemaExpr.cpp
+++ lib/Sema/SemaExpr.cpp
@@ -213,7 +213,8 @@
  const ObjCInterfaceDecl *UnknownObjCClass,
  bool ObjCPropertyAccess,
  bool AvoidPartialAvailabilityChecks,
- ObjCInterfaceDecl *ClassReceiver) {
+ ObjCInterfaceDecl *ClassReceiver,
+ TemplateDecl *Template) {
   SourceLocation Loc = Locs.front();
   if (getLangOpts().CPlusPlus && isa(D)) {
 // If there were any diagnostics suppressed by template argument deduction,
@@ -270,7 +271,11 @@
 DeduceReturnType(FD, Loc))
   return true;
 
-if (getLangOpts().CUDA && !CheckCUDACall(Loc, FD))
+if (getLangOpts().CUDA &&
+!CheckCUDACall(
+Loc, FD,
+Template ? dyn_cast(Template->getTemplatedDecl())
+ 

[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-09 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1350233 , @jlebar wrote:

>   __host__ void bar() {}
>   __device__ int bar() { return 0; }
>   __host__ __device__ void foo() { int x = bar(); }
>   template  __global__ void kernel() { devF();}
>  
>   kernel();
>
>
>
>
> > we DTRT for this case. Here __host__ bar needs to return int since foo() 
> > expects that. will add a test for that.
>
> `__host__ bar()` should not need to return int if `foo` is inline (or 
> templated), because then we should never codegen `foo` for host.  I guess my 
> question is, we should be sure that `kernel()` does not force an 
> inline/templated `foo` to be codegen'ed for host.  (Sorry that wasn't more 
> clear before.)


Sorry I am not quite get it. bar() is a `__host__` function with definition, so 
clang does codegen for it. clang also does codegen for foo() since it has 
`__host__ __device__` attribute.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-08 Thread Justin Lebar via Phabricator via cfe-commits
jlebar added a comment.

  __host__ void bar() {}
  __device__ int bar() { return 0; }
  __host__ __device__ void foo() { int x = bar(); }
  template  __global__ void kernel() { devF();}
  
  kernel();



> we DTRT for this case. Here __host__ bar needs to return int since foo() 
> expects that. will add a test for that.

`__host__ bar()` should not need to return int if `foo` is inline (or 
templated), because then we should never codegen `foo` for host.  I guess my 
question is, we should be sure that `kernel()` does not force an 
inline/templated `foo` to be codegen'ed for host.  (Sorry that wasn't more 
clear before.)

> I think n() should be resolved in the containing function context. n itself 
> is not template argument. the result of n() is.

Yes, that's a fair way to think about it.  It just is a bit weird that in this 
context `` refers to one function but `n()` refers to another.  Maybe that's 
unavoidable.  :shrug:

  __host__ void bar() {}
  __device__ int bar() { return 0; }
  __device__ auto baz() -> decltype(foo()) {} // which n() does it call?  
Presumably host, but:
  __device__ auto baz() -> decltype(bar()) {}  // does baz return void or int?  
Presumably...the device one, int?
  Now mix in templates and sizeof and...yeah. Rife for opportunities. :)

> I think this example is different from the issue which this patch tries to 
> address.

Agreed.

> Therefore I tend to suggest we keep things as they are, i.e., bar is 
> host/device resolved in its containing function context.

I'm not sure what is the containing function context in these examples, since 
all of the definitions don't have a containing function.

Currently `baz()` returns void, but it sort of seems to me like the decltype 
should morally be executed within a `__device__` context?

Anyway I know much of this is a distraction from your patch.  So long as we 
have `__host__ __device__` tests I'm happy here.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-08 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D56411#1349364 , @jlebar wrote:

> Without reading the patch in detail (sorry) but looking mainly at the 
> testcase: It looks like we're not checking how overloading and `__host__ 
> __device__` functions play into this.  Maybe there are some additional 
> edge-cases to explore/check.


will add test for `__host__` `__device__`.

> Just some examples:
> 
> Will we DTRT and parse `bar` call as calling the `device` overload of `bar` in
> 
>   __host__ void bar() {}
>   __device__ int bar() { return 0; }
>   __host__ __device__ void foo() { int x = bar(); }
>   template  __global__ void kernel() { devF();}
>   
>   kernel();
> 
> 
> ?  Also will we know that we don't have to codegen `foo` for host (so `foo` 
> is actually able to do things that only device functions can)?

we DTRT for this case. Here `__host__` bar needs to return int since foo() 
expects that. will add a test for that.

> Another one: How should the following template be instantiated?
> 
>   __host__ constexpr int n() { return 0; }
>   __device__ constexpr int n() { return 1; }
>   template  __global__ void kernel() {}
>   
>   kernel
> 
> 
> Presumably the call to `n` should be the host one?  That seems correct to me, 
> but then it's pretty odd that a function pointer template argument would 
> point to a *device* function.  Maybe that's the right thing, but I bet I can 
> come up with something weird, like:

I think n() should be resolved in the containing function context. n itself is 
not template argument. the result of n() is.

> 
> 
>   __host__ void bar() {}
>   __device__ int bar() { return 0; }
>   __device__ auto baz() -> decltype(foo()) {} // which n() does it call? 
>  Presumably host, but:
>   __device__ auto baz() -> decltype(bar()) {}  // does baz return void or 
> int?  Presumably...the device one, int?
> 
> 
> Now mix in templates and sizeof and...yeah.  Rife for opportunities.  :)

I think this example is different from the issue which this patch tries to 
address. In the case of function type template parameter, it is less 
controversial about host/device resolution. The function argument is supposed 
to be called by the function template, therefore its host/device attribute 
should be consistent with the function template. Whereas in the above decltype 
example, such reqirement does not exist. Therefore I tend to suggest we keep 
things as they are, i.e., bar is host/device resolved in its containing 
function context.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-08 Thread Justin Lebar via Phabricator via cfe-commits
jlebar added a comment.

Without reading the patch in detail (sorry) but looking mainly at the testcase: 
It looks like we're not checking how overloading and `__host__ __device__` 
functions play into this.  Maybe there are some additional edge-cases to 
explore/check.

Just some examples:

Will we DTRT and parse `bar` call as calling the `device` overload of `bar` in

  __host__ void bar() {}
  __device__ int bar() { return 0; }
  __host__ __device__ void foo() { int x = bar(); }
  template  __global__ void kernel() { devF();}
  
  kernel();

?  Also will we know that we don't have to codegen `foo` for host (so `foo` is 
actually able to do things that only device functions can)?

Another one: How should the following template be instantiated?

  __host__ constexpr int n() { return 0; }
  __device__ constexpr int n() { return 1; }
  template  __global__ void kernel() {}
  
  kernel

Presumably the call to `n` should be the host one?  That seems correct to me, 
but then it's pretty odd that a function pointer template argument would point 
to a *device* function.  Maybe that's the right thing, but I bet I can come up 
with something weird, like:

  __host__ void bar() {}
  __device__ int bar() { return 0; }
  __device__ auto baz() -> decltype(foo()) {} // which n() does it call?  
Presumably host, but:
  __device__ auto baz() -> decltype(bar()) {}  // does baz return void or int?  
Presumably...the device one, int?

Now mix in templates and sizeof and...yeah.  Rife for opportunities.  :)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-07 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

Sema won't necessarily have resolved a template decl when parsing a template 
argument list, so trying to propagate that decl down to indicate that we're 
resolving a template argument is not a good approach.

I was going to suggest recording that we're within a template argument in the 
current `ExpressionEvaluationContextRecord`, but in fact there's an even 
simpler and more general solution: there's no reason to enforce this 
restriction in *any* unevaluated context.  If someone wants to refer to a 
device function within a `decltype` or `sizeof` operand, that should be fine.  
So you should just conditionalize the diagnostic on whether this is within an 
unevaluated context.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56411/new/

https://reviews.llvm.org/D56411



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56411: [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-01-07 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl created this revision.
yaxunl added reviewers: tra, rjmccall.

If a kernel template has a function as its template parameter, a device 
function should be
allowed as template argument since a kernel can call a device function. However,
currently if the kernel template is instantiated in a host function, clang will 
emit an error
message saying the device function is an invalid candidate for the template 
parameter.

This happens because clang checks the reference to the device function during 
parsing
the template arguments. At this point, the template is not instantiated yet. 
Clang incorrectly
assumes the device function is called by the host function and emits the error 
message.

This patch fixes the issue by disabling checking of device function during 
parsing template
arguments and deferring the check to the instantion of the template. At that 
point, the
template decl is already available, therefore the check can be done against the 
instantiated
function template decl.


https://reviews.llvm.org/D56411

Files:
  include/clang/Sema/Sema.h
  lib/Parse/ParseTemplate.cpp
  lib/Sema/SemaCUDA.cpp
  lib/Sema/SemaExpr.cpp
  lib/Sema/SemaTemplate.cpp
  test/SemaCUDA/kernel-template-with-device-func-arg.cu

Index: test/SemaCUDA/kernel-template-with-device-func-arg.cu
===
--- /dev/null
+++ test/SemaCUDA/kernel-template-with-device-func-arg.cu
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#include "Inputs/cuda.h"
+
+struct C {
+  __device__ void devfun() {}
+  void hostfun() {}
+  template __device__ void devtempfun() {}
+};
+
+__device__ void devfun() {}
+__host__ void hostfun() {}
+template __device__ void devtempfun() {}
+
+template  __global__ void kernel() { devF();}
+template  __global__ void kernel2(T *p) { (p->*devF)(); }
+
+template<> __global__ void kernel();
+template<> __global__ void kernel(); // expected-error {{no function template matches function template specialization 'kernel'}}
+  // expected-note@-5 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+template<> __global__ void kernel >();
+
+template<> __global__ void kernel<>();
+template<> __global__ void kernel<>(); // expected-error {{no function template matches function template specialization 'kernel'}}
+   // expected-note@-10 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+template<> __global__ void kernel< >();
+
+template<> __global__ void kernel2(C *p);
+template<> __global__ void kernel2(C *p); // expected-error {{no function template matches function template specialization 'kernel2'}}
+  // expected-note@-14 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+template<> __global__ void kernel2 >(C *p);
+
+void fun() {
+  kernel<><<<1,1>>>();
+  kernel<><<<1,1>>>(); // expected-error {{no matching function for call to 'kernel'}}
+   // expected-note@-21 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+  kernel< ><<<1,1>>>();
+
+  kernel<<<1,1>>>();
+  kernel<<<1,1>>>(); // expected-error {{no matching function for call to 'kernel'}}
+  // expected-note@-26 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+  kernel ><<<1,1>>>();
+
+  C a;
+  kernel2<<<1,1>>>();
+  kernel2<<<1,1>>>(); // expected-error {{no matching function for call to 'kernel2'}}
+// expected-note@-31 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}}
+  kernel2 ><<<1,1>>>();
+}
Index: lib/Sema/SemaTemplate.cpp
===
--- lib/Sema/SemaTemplate.cpp
+++ lib/Sema/SemaTemplate.cpp
@@ -4753,8 +4753,8 @@
   TemplateArgument Result;
   unsigned CurSFINAEErrors = NumSFINAEErrors;
   ExprResult Res =
-CheckTemplateArgument(NTTP, NTTPType, Arg.getArgument().getAsExpr(),
-  Result, CTAK);
+  CheckTemplateArgument(NTTP, NTTPType, Arg.getArgument().getAsExpr(),
+Result, CTAK, dyn_cast(Template));
   if (Res.isInvalid())
 return true;
   // If the current template argument causes an error, give up now.
@@ -6123,6 +6123,27 @@
   return true;
 }
 
+namespace {
+bool CheckCUDATemplateArgument(Sema , Expr *Arg, TemplateDecl *Template) {
+  if (Template) {
+Expr *E = Arg;
+if (UnaryOperator *UO = dyn_cast(E)) {
+  E = UO ? UO->getSubExpr() : nullptr;
+}
+if (DeclRefExpr *DRE = dyn_cast_or_null(E)) {
+  ValueDecl *Entity = DRE ? DRE->getDecl() : nullptr;
+  if