[PATCH] D154300: [CUDA][HIP] Fix template argument deduction
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rGea72a4e6547f: [CUDA][HIP] Fix template argument deduction (authored by yaxunl). Herald added a project: clang. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D154300/new/ https://reviews.llvm.org/D154300 Files: clang/lib/Sema/SemaOverload.cpp clang/test/SemaCUDA/template-arg-deduction.cu Index: clang/test/SemaCUDA/template-arg-deduction.cu === --- /dev/null +++ clang/test/SemaCUDA/template-arg-deduction.cu @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s + +// expected-no-diagnostics + +#include "Inputs/cuda.h" + +void foo(); +__device__ void foo(); + +template +void host_temp(F f); + +template +__device__ void device_temp(F f); + +void host_caller() { + host_temp(foo); +} + +__global__ void kernel_caller() { + device_temp(foo); +} + +__device__ void device_caller() { + device_temp(foo); +} Index: clang/lib/Sema/SemaOverload.cpp === --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -12770,6 +12770,13 @@ DeclAccessPair DAP; SmallVector AmbiguousDecls; + // Return positive for better, negative for worse, 0 for equal preference. + auto CheckCUDAPreference = [&](FunctionDecl *FD1, FunctionDecl *FD2) { +FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); +return static_cast(IdentifyCUDAPreference(Caller, FD1)) - + static_cast(IdentifyCUDAPreference(Caller, FD2)); + }; + auto CheckMoreConstrained = [&](FunctionDecl *FD1, FunctionDecl *FD2) -> std::optional { if (FunctionDecl *MF = FD1->getInstantiatedFromMemberFunction()) @@ -12800,9 +12807,31 @@ if (!checkAddressOfFunctionIsAvailable(FD)) continue; +// If we found a better result, update Result. +auto FoundBetter = [&]() { + IsResultAmbiguous = false; + DAP = I.getPair(); + Result = FD; +}; + // We have more than one result - see if it is more constrained than the // previous one. if (Result) { + // Check CUDA preference first. If the candidates have differennt CUDA + // preference, choose the one with higher CUDA preference. Otherwise, + // choose the one with more constraints. + if (getLangOpts().CUDA) { +int PreferenceByCUDA = CheckCUDAPreference(FD, Result); +// FD has different preference than Result. +if (PreferenceByCUDA != 0) { + // FD is more preferable than Result. + if (PreferenceByCUDA > 0) +FoundBetter(); + continue; +} + } + // FD has the same CUDA prefernece than Result. Continue check + // constraints. std::optional MoreConstrainedThanPrevious = CheckMoreConstrained(FD, Result); if (!MoreConstrainedThanPrevious) { @@ -12814,9 +12843,7 @@ continue; // FD is more constrained - replace Result with it. } -IsResultAmbiguous = false; -DAP = I.getPair(); -Result = FD; +FoundBetter(); } if (IsResultAmbiguous) @@ -12826,9 +12853,15 @@ SmallVector ResultAC; // We skipped over some ambiguous declarations which might be ambiguous with // the selected result. -for (FunctionDecl *Skipped : AmbiguousDecls) +for (FunctionDecl *Skipped : AmbiguousDecls) { + // If skipped candidate has different CUDA preference than the result, + // there is no ambiguity. Otherwise check whether they have different + // constraints. + if (getLangOpts().CUDA && CheckCUDAPreference(Skipped, Result) != 0) +continue; if (!CheckMoreConstrained(Skipped, Result)) return nullptr; +} Pair = DAP; } return Result; ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D154300: [CUDA][HIP] Fix template argument deduction
yaxunl updated this revision to Diff 547901. yaxunl marked an inline comment as done. yaxunl added a comment. revised by comments CHANGES SINCE LAST ACTION https://reviews.llvm.org/D154300/new/ https://reviews.llvm.org/D154300 Files: clang/lib/Sema/SemaOverload.cpp clang/test/SemaCUDA/template-arg-deduction.cu Index: clang/test/SemaCUDA/template-arg-deduction.cu === --- /dev/null +++ clang/test/SemaCUDA/template-arg-deduction.cu @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s + +// expected-no-diagnostics + +#include "Inputs/cuda.h" + +void foo(); +__device__ void foo(); + +template +void host_temp(F f); + +template +__device__ void device_temp(F f); + +void host_caller() { + host_temp(foo); +} + +__global__ void kernel_caller() { + device_temp(foo); +} + +__device__ void device_caller() { + device_temp(foo); +} Index: clang/lib/Sema/SemaOverload.cpp === --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -12750,6 +12750,13 @@ DeclAccessPair DAP; SmallVector AmbiguousDecls; + // Return positive for better, negative for worse, 0 for equal preference. + auto CheckCUDAPreference = [&](FunctionDecl *FD1, FunctionDecl *FD2) { +FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); +return static_cast(IdentifyCUDAPreference(Caller, FD1)) - + static_cast(IdentifyCUDAPreference(Caller, FD2)); + }; + auto CheckMoreConstrained = [&](FunctionDecl *FD1, FunctionDecl *FD2) -> std::optional { if (FunctionDecl *MF = FD1->getInstantiatedFromMemberFunction()) @@ -12780,9 +12787,31 @@ if (!checkAddressOfFunctionIsAvailable(FD)) continue; +// If we found a better result, update Result. +auto FoundBetter = [&]() { + IsResultAmbiguous = false; + DAP = I.getPair(); + Result = FD; +}; + // We have more than one result - see if it is more constrained than the // previous one. if (Result) { + // Check CUDA preference first. If the candidates have differennt CUDA + // preference, choose the one with higher CUDA preference. Otherwise, + // choose the one with more constraints. + if (getLangOpts().CUDA) { +int PreferenceByCUDA = CheckCUDAPreference(FD, Result); +// FD has different preference than Result. +if (PreferenceByCUDA != 0) { + // FD is more preferable than Result. + if (PreferenceByCUDA > 0) +FoundBetter(); + continue; +} + } + // FD has the same CUDA prefernece than Result. Continue check + // constraints. std::optional MoreConstrainedThanPrevious = CheckMoreConstrained(FD, Result); if (!MoreConstrainedThanPrevious) { @@ -12794,9 +12823,7 @@ continue; // FD is more constrained - replace Result with it. } -IsResultAmbiguous = false; -DAP = I.getPair(); -Result = FD; +FoundBetter(); } if (IsResultAmbiguous) @@ -12806,9 +12833,15 @@ SmallVector ResultAC; // We skipped over some ambiguous declarations which might be ambiguous with // the selected result. -for (FunctionDecl *Skipped : AmbiguousDecls) +for (FunctionDecl *Skipped : AmbiguousDecls) { + // If skipped candidate has different CUDA preference than the result, + // there is no ambiguity. Otherwise check whether they have different + // constraints. + if (getLangOpts().CUDA && CheckCUDAPreference(Skipped, Result) != 0) +continue; if (!CheckMoreConstrained(Skipped, Result)) return nullptr; +} Pair = DAP; } return Result; ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D154300: [CUDA][HIP] Fix template argument deduction
yaxunl marked an inline comment as done. yaxunl added inline comments. Comment at: clang/lib/Sema/SemaOverload.cpp:12758-12764 +std::optional MorePreferableByCUDA = +CheckCUDAPreference(FD, Result); +// If FD has different CUDA preference than Result. +if (MorePreferableByCUDA) { + // FD is less preferable than Result. + if (!*MorePreferableByCUDA) +continue; tra wrote: > Maybe `CheckCUDAPreference` should return -1/0/1 or an enum. std::optional > does not seem to be very readable here. > > E.g. `if(MorePreferableByCUDA)` sounds like it's going to be satisfied when > FD is a better choice than Result, but it's not the case. > I think this would be easier to follow: > ``` > if (CheckCUDAPreference(FD, Result) <= 0) // or `!= CP_BETTER` > continue; > ``` > will use an integer for that. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D154300/new/ https://reviews.llvm.org/D154300 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D154300: [CUDA][HIP] Fix template argument deduction
tra added inline comments. Comment at: clang/lib/Sema/SemaOverload.cpp:12758-12764 +std::optional MorePreferableByCUDA = +CheckCUDAPreference(FD, Result); +// If FD has different CUDA preference than Result. +if (MorePreferableByCUDA) { + // FD is less preferable than Result. + if (!*MorePreferableByCUDA) +continue; Maybe `CheckCUDAPreference` should return -1/0/1 or an enum. std::optional does not seem to be very readable here. E.g. `if(MorePreferableByCUDA)` sounds like it's going to be satisfied when FD is a better choice than Result, but it's not the case. I think this would be easier to follow: ``` if (CheckCUDAPreference(FD, Result) <= 0) // or `!= CP_BETTER` continue; ``` CHANGES SINCE LAST ACTION https://reviews.llvm.org/D154300/new/ https://reviews.llvm.org/D154300 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D154300: [CUDA][HIP] Fix template argument deduction
yaxunl created this revision. yaxunl added reviewers: tra, rsmith. Herald added subscribers: mattd, carlosgalvezp. Herald added a project: All. yaxunl requested review of this revision. nvcc allows using std::malloc and std::free in device code. When std::malloc or std::free is passed as a template function argument with template argument deduction, there is no diagnostics. e.g. #include __global__ void kern() { void *p = std::malloc(1); std::free(p); } int main() { std::shared_ptr a; a = std::shared_ptr( (float*)std::malloc(sizeof(float) * 100), std::free ); return 0; } However, the same code fails to compile with clang (https://godbolt.org/z/1roGvo6YY). The reason is that clang does not have logic to choose a function argument from an overloaded set of candidates based on host/device attributes for template argument deduction. Currently, clang does have a logic to choose a candidate based on the constraints of the candidates. This patch extends that logic to account for the CUDA host/device-based preference. https://reviews.llvm.org/D154300 Files: clang/lib/Sema/SemaOverload.cpp clang/test/SemaCUDA/template-arg-deduction.cu Index: clang/test/SemaCUDA/template-arg-deduction.cu === --- /dev/null +++ clang/test/SemaCUDA/template-arg-deduction.cu @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s + +// expected-no-diagnostics + +#include "Inputs/cuda.h" + +void foo(); +__device__ void foo(); + +template +void host_temp(F f); + +template +__device__ void device_temp(F f); + +void host_caller() { + host_temp(foo); +} + +__global__ void kernel_caller() { + device_temp(foo); +} + +__device__ void device_caller() { + device_temp(foo); +} Index: clang/lib/Sema/SemaOverload.cpp === --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -12697,6 +12697,20 @@ DeclAccessPair DAP; SmallVector AmbiguousDecls; + auto CheckCUDAPreference = [&](FunctionDecl *FD1, + FunctionDecl *FD2) -> std::optional { +FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); +int Preference1 = IdentifyCUDAPreference(Caller, FD1); +int Preference2 = IdentifyCUDAPreference(Caller, FD2); +if (Preference1 > Preference2) { + return true; +} else if (Preference1 < Preference2) { + return false; +} else { + return std::nullopt; +} + }; + auto CheckMoreConstrained = [&](FunctionDecl *FD1, FunctionDecl *FD2) -> std::optional { if (FunctionDecl *MF = FD1->getInstantiatedFromMemberFunction()) @@ -12727,9 +12741,33 @@ if (!checkAddressOfFunctionIsAvailable(FD)) continue; +// If we found a better result, update Result. +auto FoundBetter = [&]() { + IsResultAmbiguous = false; + DAP = I.getPair(); + Result = FD; +}; + // We have more than one result - see if it is more constrained than the // previous one. if (Result) { + // Check CUDA preference first. If the candidates have differennt CUDA + // preference, choose the one with higher CUDA preference. Otherwise, + // choose the one with more constraints. + if (getLangOpts().CUDA) { +std::optional MorePreferableByCUDA = +CheckCUDAPreference(FD, Result); +// If FD has different CUDA preference than Result. +if (MorePreferableByCUDA) { + // FD is less preferable than Result. + if (!*MorePreferableByCUDA) +continue; + // FD is more preferable than Result. + FoundBetter(); +} + } + // FD has the same CUDA prefernece than Result. Continue check + // constraints. std::optional MoreConstrainedThanPrevious = CheckMoreConstrained(FD, Result); if (!MoreConstrainedThanPrevious) { @@ -12741,9 +12779,7 @@ continue; // FD is more constrained - replace Result with it. } -IsResultAmbiguous = false; -DAP = I.getPair(); -Result = FD; +FoundBetter(); } if (IsResultAmbiguous) @@ -12753,9 +12789,15 @@ SmallVector ResultAC; // We skipped over some ambiguous declarations which might be ambiguous with // the selected result. -for (FunctionDecl *Skipped : AmbiguousDecls) +for (FunctionDecl *Skipped : AmbiguousDecls) { + // If skipped candidate has different CUDA preference than the result, + // there is no ambiguity. Otherwise check whether they have different + // constraints. + if (getLangOpts().CUDA && CheckCUDAPreference(Skipped, Result)) +continue; if (!CheckMoreConstrained(Skipped, Result)) return nullptr; +}