[PATCH] D154300: [CUDA][HIP] Fix template argument deduction

2023-08-08 Thread Yaxun Liu via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGea72a4e6547f: [CUDA][HIP] Fix template argument deduction 
(authored by yaxunl).
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154300/new/

https://reviews.llvm.org/D154300

Files:
  clang/lib/Sema/SemaOverload.cpp
  clang/test/SemaCUDA/template-arg-deduction.cu

Index: clang/test/SemaCUDA/template-arg-deduction.cu
===
--- /dev/null
+++ clang/test/SemaCUDA/template-arg-deduction.cu
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
+
+// expected-no-diagnostics
+
+#include "Inputs/cuda.h"
+
+void foo();
+__device__ void foo();
+
+template
+void host_temp(F f);
+
+template
+__device__ void device_temp(F f);
+
+void host_caller() {
+  host_temp(foo);
+}
+
+__global__ void kernel_caller() {
+  device_temp(foo);
+}
+
+__device__ void device_caller() {
+  device_temp(foo);
+}
Index: clang/lib/Sema/SemaOverload.cpp
===
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -12770,6 +12770,13 @@
   DeclAccessPair DAP;
   SmallVector AmbiguousDecls;
 
+  // Return positive for better, negative for worse, 0 for equal preference.
+  auto CheckCUDAPreference = [&](FunctionDecl *FD1, FunctionDecl *FD2) {
+FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
+return static_cast(IdentifyCUDAPreference(Caller, FD1)) -
+   static_cast(IdentifyCUDAPreference(Caller, FD2));
+  };
+
   auto CheckMoreConstrained = [&](FunctionDecl *FD1,
   FunctionDecl *FD2) -> std::optional {
 if (FunctionDecl *MF = FD1->getInstantiatedFromMemberFunction())
@@ -12800,9 +12807,31 @@
 if (!checkAddressOfFunctionIsAvailable(FD))
   continue;
 
+// If we found a better result, update Result.
+auto FoundBetter = [&]() {
+  IsResultAmbiguous = false;
+  DAP = I.getPair();
+  Result = FD;
+};
+
 // We have more than one result - see if it is more constrained than the
 // previous one.
 if (Result) {
+  // Check CUDA preference first. If the candidates have differennt CUDA
+  // preference, choose the one with higher CUDA preference. Otherwise,
+  // choose the one with more constraints.
+  if (getLangOpts().CUDA) {
+int PreferenceByCUDA = CheckCUDAPreference(FD, Result);
+// FD has different preference than Result.
+if (PreferenceByCUDA != 0) {
+  // FD is more preferable than Result.
+  if (PreferenceByCUDA > 0)
+FoundBetter();
+  continue;
+}
+  }
+  // FD has the same CUDA prefernece than Result. Continue check
+  // constraints.
   std::optional MoreConstrainedThanPrevious =
   CheckMoreConstrained(FD, Result);
   if (!MoreConstrainedThanPrevious) {
@@ -12814,9 +12843,7 @@
 continue;
   // FD is more constrained - replace Result with it.
 }
-IsResultAmbiguous = false;
-DAP = I.getPair();
-Result = FD;
+FoundBetter();
   }
 
   if (IsResultAmbiguous)
@@ -12826,9 +12853,15 @@
 SmallVector ResultAC;
 // We skipped over some ambiguous declarations which might be ambiguous with
 // the selected result.
-for (FunctionDecl *Skipped : AmbiguousDecls)
+for (FunctionDecl *Skipped : AmbiguousDecls) {
+  // If skipped candidate has different CUDA preference than the result,
+  // there is no ambiguity. Otherwise check whether they have different
+  // constraints.
+  if (getLangOpts().CUDA && CheckCUDAPreference(Skipped, Result) != 0)
+continue;
   if (!CheckMoreConstrained(Skipped, Result))
 return nullptr;
+}
 Pair = DAP;
   }
   return Result;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154300: [CUDA][HIP] Fix template argument deduction

2023-08-07 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 547901.
yaxunl marked an inline comment as done.
yaxunl added a comment.

revised by comments


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154300/new/

https://reviews.llvm.org/D154300

Files:
  clang/lib/Sema/SemaOverload.cpp
  clang/test/SemaCUDA/template-arg-deduction.cu

Index: clang/test/SemaCUDA/template-arg-deduction.cu
===
--- /dev/null
+++ clang/test/SemaCUDA/template-arg-deduction.cu
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
+
+// expected-no-diagnostics
+
+#include "Inputs/cuda.h"
+
+void foo();
+__device__ void foo();
+
+template
+void host_temp(F f);
+
+template
+__device__ void device_temp(F f);
+
+void host_caller() {
+  host_temp(foo);
+}
+
+__global__ void kernel_caller() {
+  device_temp(foo);
+}
+
+__device__ void device_caller() {
+  device_temp(foo);
+}
Index: clang/lib/Sema/SemaOverload.cpp
===
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -12750,6 +12750,13 @@
   DeclAccessPair DAP;
   SmallVector AmbiguousDecls;
 
+  // Return positive for better, negative for worse, 0 for equal preference.
+  auto CheckCUDAPreference = [&](FunctionDecl *FD1, FunctionDecl *FD2) {
+FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
+return static_cast(IdentifyCUDAPreference(Caller, FD1)) -
+   static_cast(IdentifyCUDAPreference(Caller, FD2));
+  };
+
   auto CheckMoreConstrained = [&](FunctionDecl *FD1,
   FunctionDecl *FD2) -> std::optional {
 if (FunctionDecl *MF = FD1->getInstantiatedFromMemberFunction())
@@ -12780,9 +12787,31 @@
 if (!checkAddressOfFunctionIsAvailable(FD))
   continue;
 
+// If we found a better result, update Result.
+auto FoundBetter = [&]() {
+  IsResultAmbiguous = false;
+  DAP = I.getPair();
+  Result = FD;
+};
+
 // We have more than one result - see if it is more constrained than the
 // previous one.
 if (Result) {
+  // Check CUDA preference first. If the candidates have differennt CUDA
+  // preference, choose the one with higher CUDA preference. Otherwise,
+  // choose the one with more constraints.
+  if (getLangOpts().CUDA) {
+int PreferenceByCUDA = CheckCUDAPreference(FD, Result);
+// FD has different preference than Result.
+if (PreferenceByCUDA != 0) {
+  // FD is more preferable than Result.
+  if (PreferenceByCUDA > 0)
+FoundBetter();
+  continue;
+}
+  }
+  // FD has the same CUDA prefernece than Result. Continue check
+  // constraints.
   std::optional MoreConstrainedThanPrevious =
   CheckMoreConstrained(FD, Result);
   if (!MoreConstrainedThanPrevious) {
@@ -12794,9 +12823,7 @@
 continue;
   // FD is more constrained - replace Result with it.
 }
-IsResultAmbiguous = false;
-DAP = I.getPair();
-Result = FD;
+FoundBetter();
   }
 
   if (IsResultAmbiguous)
@@ -12806,9 +12833,15 @@
 SmallVector ResultAC;
 // We skipped over some ambiguous declarations which might be ambiguous with
 // the selected result.
-for (FunctionDecl *Skipped : AmbiguousDecls)
+for (FunctionDecl *Skipped : AmbiguousDecls) {
+  // If skipped candidate has different CUDA preference than the result,
+  // there is no ambiguity. Otherwise check whether they have different
+  // constraints.
+  if (getLangOpts().CUDA && CheckCUDAPreference(Skipped, Result) != 0)
+continue;
   if (!CheckMoreConstrained(Skipped, Result))
 return nullptr;
+}
 Pair = DAP;
   }
   return Result;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154300: [CUDA][HIP] Fix template argument deduction

2023-08-07 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl marked an inline comment as done.
yaxunl added inline comments.



Comment at: clang/lib/Sema/SemaOverload.cpp:12758-12764
+std::optional MorePreferableByCUDA =
+CheckCUDAPreference(FD, Result);
+// If FD has different CUDA preference than Result.
+if (MorePreferableByCUDA) {
+  // FD is less preferable than Result.
+  if (!*MorePreferableByCUDA)
+continue;

tra wrote:
> Maybe `CheckCUDAPreference` should return -1/0/1 or an enum. std::optional 
> does not seem to be very readable here.
> 
> E.g. `if(MorePreferableByCUDA)` sounds like it's going to be satisfied when 
> FD is a better choice than Result, but it's not the case.
> I think this would be easier to follow:
> ```
> if (CheckCUDAPreference(FD, Result) <= 0) // or `!= CP_BETTER`
>  continue;
> ```
> 
will use an integer for that.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154300/new/

https://reviews.llvm.org/D154300

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154300: [CUDA][HIP] Fix template argument deduction

2023-07-11 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: clang/lib/Sema/SemaOverload.cpp:12758-12764
+std::optional MorePreferableByCUDA =
+CheckCUDAPreference(FD, Result);
+// If FD has different CUDA preference than Result.
+if (MorePreferableByCUDA) {
+  // FD is less preferable than Result.
+  if (!*MorePreferableByCUDA)
+continue;

Maybe `CheckCUDAPreference` should return -1/0/1 or an enum. std::optional does 
not seem to be very readable here.

E.g. `if(MorePreferableByCUDA)` sounds like it's going to be satisfied when FD 
is a better choice than Result, but it's not the case.
I think this would be easier to follow:
```
if (CheckCUDAPreference(FD, Result) <= 0) // or `!= CP_BETTER`
 continue;
```



CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154300/new/

https://reviews.llvm.org/D154300

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154300: [CUDA][HIP] Fix template argument deduction

2023-07-02 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl created this revision.
yaxunl added reviewers: tra, rsmith.
Herald added subscribers: mattd, carlosgalvezp.
Herald added a project: All.
yaxunl requested review of this revision.

nvcc allows using std::malloc and std::free in device code.
When std::malloc or std::free is passed as a template
function argument with template argument deduction,
there is no diagnostics. e.g.

  #include 
  
  __global__ void kern() {
  void *p = std::malloc(1);
  std::free(p);
  }
  int main()
  {
  
  std::shared_ptr a;
  a = std::shared_ptr(
(float*)std::malloc(sizeof(float) * 100),
std::free
  );
  return 0;
  }

However, the same code fails to compile with clang
(https://godbolt.org/z/1roGvo6YY). The reason is
that clang does not have logic to choose a function
argument from an overloaded set of candidates
based on host/device attributes for template argument
deduction.

Currently, clang does have a logic to choose a candidate
based on the constraints of the candidates. This patch
extends that logic to account for the CUDA host/device-based
preference.


https://reviews.llvm.org/D154300

Files:
  clang/lib/Sema/SemaOverload.cpp
  clang/test/SemaCUDA/template-arg-deduction.cu

Index: clang/test/SemaCUDA/template-arg-deduction.cu
===
--- /dev/null
+++ clang/test/SemaCUDA/template-arg-deduction.cu
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
+
+// expected-no-diagnostics
+
+#include "Inputs/cuda.h"
+
+void foo();
+__device__ void foo();
+
+template
+void host_temp(F f);
+
+template
+__device__ void device_temp(F f);
+
+void host_caller() {
+  host_temp(foo);
+}
+
+__global__ void kernel_caller() {
+  device_temp(foo);
+}
+
+__device__ void device_caller() {
+  device_temp(foo);
+}
Index: clang/lib/Sema/SemaOverload.cpp
===
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -12697,6 +12697,20 @@
   DeclAccessPair DAP;
   SmallVector AmbiguousDecls;
 
+  auto CheckCUDAPreference = [&](FunctionDecl *FD1,
+ FunctionDecl *FD2) -> std::optional {
+FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
+int Preference1 = IdentifyCUDAPreference(Caller, FD1);
+int Preference2 = IdentifyCUDAPreference(Caller, FD2);
+if (Preference1 > Preference2) {
+  return true;
+} else if (Preference1 < Preference2) {
+  return false;
+} else {
+  return std::nullopt;
+}
+  };
+
   auto CheckMoreConstrained = [&](FunctionDecl *FD1,
   FunctionDecl *FD2) -> std::optional {
 if (FunctionDecl *MF = FD1->getInstantiatedFromMemberFunction())
@@ -12727,9 +12741,33 @@
 if (!checkAddressOfFunctionIsAvailable(FD))
   continue;
 
+// If we found a better result, update Result.
+auto FoundBetter = [&]() {
+  IsResultAmbiguous = false;
+  DAP = I.getPair();
+  Result = FD;
+};
+
 // We have more than one result - see if it is more constrained than the
 // previous one.
 if (Result) {
+  // Check CUDA preference first. If the candidates have differennt CUDA
+  // preference, choose the one with higher CUDA preference. Otherwise,
+  // choose the one with more constraints.
+  if (getLangOpts().CUDA) {
+std::optional MorePreferableByCUDA =
+CheckCUDAPreference(FD, Result);
+// If FD has different CUDA preference than Result.
+if (MorePreferableByCUDA) {
+  // FD is less preferable than Result.
+  if (!*MorePreferableByCUDA)
+continue;
+  // FD is more preferable than Result.
+  FoundBetter();
+}
+  }
+  // FD has the same CUDA prefernece than Result. Continue check
+  // constraints.
   std::optional MoreConstrainedThanPrevious =
   CheckMoreConstrained(FD, Result);
   if (!MoreConstrainedThanPrevious) {
@@ -12741,9 +12779,7 @@
 continue;
   // FD is more constrained - replace Result with it.
 }
-IsResultAmbiguous = false;
-DAP = I.getPair();
-Result = FD;
+FoundBetter();
   }
 
   if (IsResultAmbiguous)
@@ -12753,9 +12789,15 @@
 SmallVector ResultAC;
 // We skipped over some ambiguous declarations which might be ambiguous with
 // the selected result.
-for (FunctionDecl *Skipped : AmbiguousDecls)
+for (FunctionDecl *Skipped : AmbiguousDecls) {
+  // If skipped candidate has different CUDA preference than the result,
+  // there is no ambiguity. Otherwise check whether they have different
+  // constraints.
+  if (getLangOpts().CUDA && CheckCUDAPreference(Skipped, Result))
+continue;
   if (!CheckMoreConstrained(Skipped, Result))
 return nullptr;
+}