Author: Weibo He Date: 2026-05-08T15:18:43+08:00 New Revision: 76c69de591b388ffb9fbf57bae337cafde811008
URL: https://github.com/llvm/llvm-project/commit/76c69de591b388ffb9fbf57bae337cafde811008 DIFF: https://github.com/llvm/llvm-project/commit/76c69de591b388ffb9fbf57bae337cafde811008.diff LOG: [CUDA/HIP] Do not check function calls in discarded statement (#194606) Previously, calling a host-device mismatch function inside a discarded `if constexpr` branch would trigger an error. This patch recognizes that discarded statements are never instantiated and allows such code. Added: Modified: clang/docs/ReleaseNotes.rst clang/lib/Sema/SemaCUDA.cpp clang/test/SemaCUDA/call-device-fn-from-host.cu clang/test/SemaCUDA/call-host-fn-from-device.cu clang/test/SemaCUDA/device-kernel-call.cu Removed: ################################################################################ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c83a1bd0ab2e9..ac462e3bf4732 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -481,6 +481,9 @@ Improvements to Clang's diagnostics - Removed the body of lambdas from some diagnostic messages. +- Fixed false positive host-device mismatch errors in discarded `if constexpr` branches for CUDA/HIP; + such calls are now correctly skipped. + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index c086f9a32ce4e..9e05de941f335 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -84,10 +84,6 @@ ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc, return ExprError( Diag(LLLLoc, diag::err_cuda_device_kernel_launch_not_supported)); - if (IsDeviceKernelCall && !getLangOpts().GPURelocatableDeviceCode) - return ExprError( - Diag(LLLLoc, diag::err_cuda_device_kernel_launch_require_rdc)); - FunctionDecl *ConfigDecl = IsDeviceKernelCall ? getASTContext().getcudaLaunchDeviceDecl() : getASTContext().getcudaConfigureCallDecl(); @@ -990,7 +986,8 @@ bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) { assert(Callee && "Callee may not be null."); const auto &ExprEvalCtx = SemaRef.currentEvaluationContext(); - if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) + if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated() || + ExprEvalCtx.isDiscardedStatementContext()) return true; // C++ deduction guides participate in overload resolution but are not @@ -1026,9 +1023,20 @@ bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) { } }(); + bool IsDeviceKernelCall = Callee == getASTContext().getcudaLaunchDeviceDecl(); + bool CallerHD = Caller && Caller->hasAttr<CUDAHostAttr>() && + Caller->hasAttr<CUDADeviceAttr>(); + bool CallerDiscard = SemaRef.getEmissionStatus(Caller) == + Sema::FunctionEmissionStatus::TemplateDiscarded; + bool RDC = getLangOpts().GPURelocatableDeviceCode; + if (IsDeviceKernelCall && !(CallerHD && CallerDiscard) && !RDC) { + Diag(Loc, diag::err_cuda_device_kernel_launch_require_rdc); + return false; + } + if (DiagKind == SemaDiagnosticBuilder::K_Nop) { // For -fgpu-rdc, keep track of external kernels used by host functions. - if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode && + if (getLangOpts().CUDAIsDevice && RDC && Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() && (!Caller || (!Caller->getDescribedFunctionTemplate() && getASTContext().GetGVALinkageForFunction(Caller) == diff --git a/clang/test/SemaCUDA/call-device-fn-from-host.cu b/clang/test/SemaCUDA/call-device-fn-from-host.cu index 4d66fccd84d53..64394c7a4d958 100644 --- a/clang/test/SemaCUDA/call-device-fn-from-host.cu +++ b/clang/test/SemaCUDA/call-device-fn-from-host.cu @@ -3,6 +3,11 @@ // RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \ // RUN: -verify=expected,omp -verify-ignore-unexpected=note -fopenmp +// RUN: %clang_cc1 %s --std=c++17 -triple x86_64-unknown-linux -emit-llvm -o - \ +// RUN: -verify -verify-ignore-unexpected=note +// RUN: %clang_cc1 %s --std=c++17 -triple x86_64-unknown-linux -emit-llvm -o - \ +// RUN: -verify=expected,omp -verify-ignore-unexpected=note -fopenmp + // Note: This test won't work with -fsyntax-only, because some of these errors // are emitted during codegen. @@ -97,3 +102,18 @@ void host_func(void) { kernel<<<1, 1>>>(); } __device__ void f(); template<void(*F)()> __global__ void t() { F(); } __host__ void g() { t<f><<<1,1>>>(); } + +#if __cplusplus >= 201703L +namespace template_if_constexpr { + template<bool B> + __host__ __device__ void fn() { + if constexpr (B) + device_fn(); + } + + void call() { + fn<false>(); + fn<true>(); // expected-error@-5 {{reference to __device__ function 'device_fn' in __host__ __device__ function}} + } +} +#endif diff --git a/clang/test/SemaCUDA/call-host-fn-from-device.cu b/clang/test/SemaCUDA/call-host-fn-from-device.cu index acdd291b66457..d172cd966c823 100644 --- a/clang/test/SemaCUDA/call-host-fn-from-device.cu +++ b/clang/test/SemaCUDA/call-host-fn-from-device.cu @@ -1,6 +1,9 @@ // RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device \ // RUN: -emit-llvm -o /dev/null -verify -verify-ignore-unexpected=note +// RUN: %clang_cc1 %s --std=c++17 -triple nvptx-unknown-unknown -fcuda-is-device \ +// RUN: -emit-llvm -o /dev/null -verify -verify-ignore-unexpected=note + // Note: This test won't work with -fsyntax-only, because some of these errors // are emitted during codegen. @@ -138,3 +141,18 @@ __host__ __device__ void TmplStruct<int>::fn<int>() { host_fn(); } // expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}} __device__ void double_specialization() { TmplStruct<int>().fn<int>(); } + +#if __cplusplus >= 201703L +namespace template_if_constexpr { + template<bool B> + __host__ __device__ void fn() { + if constexpr (B) + host_fn(); + } + + __device__ void call() { + fn<false>(); + fn<true>(); // expected-error@-5 {{reference to __host__ function 'host_fn' in __host__ __device__ function}} + } +} +#endif diff --git a/clang/test/SemaCUDA/device-kernel-call.cu b/clang/test/SemaCUDA/device-kernel-call.cu index 856cbd88404e6..7511cf148a077 100644 --- a/clang/test/SemaCUDA/device-kernel-call.cu +++ b/clang/test/SemaCUDA/device-kernel-call.cu @@ -13,3 +13,19 @@ __global__ void g1(void) { // nordc-error@-1 {{kernel launch from __device__ or __global__ function requires relocatable device code (i.e. requires -fgpu-rdc)}} // hip-error@-2 {{device-side kernel call/launch is not supported}} } + +namespace template_if_constexpr { + template<bool B> + __host__ __device__ void fn() { + if constexpr (B) + g2<<<1, 1>>>(42); + // hip-error@-1 {{device-side kernel call/launch is not supported}} + } + + void call() { + fn<false>(); + fn<true>(); + // nordc-error@-7 {{kernel launch from __device__ or __global__ function requires relocatable device code (i.e. requires -fgpu-rdc)}} + // nordc-note@-2 {{in instantiation of function template specialization 'template_if_constexpr::fn<true>' requested here}} + } +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
