Author: Weibo He
Date: 2026-05-08T15:18:43+08:00
New Revision: 76c69de591b388ffb9fbf57bae337cafde811008

URL: 
https://github.com/llvm/llvm-project/commit/76c69de591b388ffb9fbf57bae337cafde811008
DIFF: 
https://github.com/llvm/llvm-project/commit/76c69de591b388ffb9fbf57bae337cafde811008.diff

LOG: [CUDA/HIP] Do not check function calls in discarded statement (#194606)

Previously, calling a host-device mismatch function inside a discarded
`if constexpr` branch would trigger an error. This patch recognizes that
discarded statements are never instantiated and allows such code.

Added: 
    

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/lib/Sema/SemaCUDA.cpp
    clang/test/SemaCUDA/call-device-fn-from-host.cu
    clang/test/SemaCUDA/call-host-fn-from-device.cu
    clang/test/SemaCUDA/device-kernel-call.cu

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c83a1bd0ab2e9..ac462e3bf4732 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -481,6 +481,9 @@ Improvements to Clang's diagnostics
 
 - Removed the body of lambdas from some diagnostic messages.
 
+- Fixed false positive host-device mismatch errors in discarded `if constexpr` 
branches for CUDA/HIP;
+  such calls are now correctly skipped.
+
 Improvements to Clang's time-trace
 ----------------------------------
 

diff  --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index c086f9a32ce4e..9e05de941f335 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -84,10 +84,6 @@ ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, 
SourceLocation LLLLoc,
     return ExprError(
         Diag(LLLLoc, diag::err_cuda_device_kernel_launch_not_supported));
 
-  if (IsDeviceKernelCall && !getLangOpts().GPURelocatableDeviceCode)
-    return ExprError(
-        Diag(LLLLoc, diag::err_cuda_device_kernel_launch_require_rdc));
-
   FunctionDecl *ConfigDecl = IsDeviceKernelCall
                                  ? getASTContext().getcudaLaunchDeviceDecl()
                                  : getASTContext().getcudaConfigureCallDecl();
@@ -990,7 +986,8 @@ bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl 
*Callee) {
   assert(Callee && "Callee may not be null.");
 
   const auto &ExprEvalCtx = SemaRef.currentEvaluationContext();
-  if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
+  if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated() ||
+      ExprEvalCtx.isDiscardedStatementContext())
     return true;
 
   // C++ deduction guides participate in overload resolution but are not
@@ -1026,9 +1023,20 @@ bool SemaCUDA::CheckCall(SourceLocation Loc, 
FunctionDecl *Callee) {
     }
   }();
 
+  bool IsDeviceKernelCall = Callee == 
getASTContext().getcudaLaunchDeviceDecl();
+  bool CallerHD = Caller && Caller->hasAttr<CUDAHostAttr>() &&
+                  Caller->hasAttr<CUDADeviceAttr>();
+  bool CallerDiscard = SemaRef.getEmissionStatus(Caller) ==
+                       Sema::FunctionEmissionStatus::TemplateDiscarded;
+  bool RDC = getLangOpts().GPURelocatableDeviceCode;
+  if (IsDeviceKernelCall && !(CallerHD && CallerDiscard) && !RDC) {
+    Diag(Loc, diag::err_cuda_device_kernel_launch_require_rdc);
+    return false;
+  }
+
   if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
     // For -fgpu-rdc, keep track of external kernels used by host functions.
-    if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode &&
+    if (getLangOpts().CUDAIsDevice && RDC &&
         Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() &&
         (!Caller || (!Caller->getDescribedFunctionTemplate() &&
                      getASTContext().GetGVALinkageForFunction(Caller) ==

diff  --git a/clang/test/SemaCUDA/call-device-fn-from-host.cu 
b/clang/test/SemaCUDA/call-device-fn-from-host.cu
index 4d66fccd84d53..64394c7a4d958 100644
--- a/clang/test/SemaCUDA/call-device-fn-from-host.cu
+++ b/clang/test/SemaCUDA/call-device-fn-from-host.cu
@@ -3,6 +3,11 @@
 // RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - 
\
 // RUN:   -verify=expected,omp -verify-ignore-unexpected=note -fopenmp
 
+// RUN: %clang_cc1 %s --std=c++17 -triple x86_64-unknown-linux -emit-llvm -o - 
\
+// RUN:   -verify -verify-ignore-unexpected=note
+// RUN: %clang_cc1 %s --std=c++17 -triple x86_64-unknown-linux -emit-llvm -o - 
\
+// RUN:   -verify=expected,omp -verify-ignore-unexpected=note -fopenmp
+
 // Note: This test won't work with -fsyntax-only, because some of these errors
 // are emitted during codegen.
 
@@ -97,3 +102,18 @@ void host_func(void) { kernel<<<1, 1>>>(); }
 __device__ void f();
 template<void(*F)()> __global__ void t() { F(); }
 __host__ void g() { t<f><<<1,1>>>(); }
+
+#if __cplusplus >= 201703L
+namespace template_if_constexpr {
+  template<bool B>
+  __host__ __device__ void fn() {
+    if constexpr (B)
+      device_fn();
+  }
+
+  void call() {
+    fn<false>();
+    fn<true>(); // expected-error@-5 {{reference to __device__ function 
'device_fn' in __host__ __device__ function}}
+  }
+}
+#endif

diff  --git a/clang/test/SemaCUDA/call-host-fn-from-device.cu 
b/clang/test/SemaCUDA/call-host-fn-from-device.cu
index acdd291b66457..d172cd966c823 100644
--- a/clang/test/SemaCUDA/call-host-fn-from-device.cu
+++ b/clang/test/SemaCUDA/call-host-fn-from-device.cu
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown 
-fcuda-is-device \
 // RUN:   -emit-llvm -o /dev/null -verify -verify-ignore-unexpected=note
 
+// RUN: %clang_cc1 %s --std=c++17 -triple nvptx-unknown-unknown 
-fcuda-is-device \
+// RUN:   -emit-llvm -o /dev/null -verify -verify-ignore-unexpected=note
+
 // Note: This test won't work with -fsyntax-only, because some of these errors
 // are emitted during codegen.
 
@@ -138,3 +141,18 @@ __host__ __device__ void TmplStruct<int>::fn<int>() { 
host_fn(); }
 // expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ 
__device__ function}}
 
 __device__ void double_specialization() { TmplStruct<int>().fn<int>(); }
+
+#if __cplusplus >= 201703L
+namespace template_if_constexpr {
+  template<bool B>
+  __host__ __device__ void fn() {
+    if constexpr (B)
+      host_fn();
+  }
+
+  __device__ void call() {
+    fn<false>();
+    fn<true>(); // expected-error@-5 {{reference to __host__ function 
'host_fn' in __host__ __device__ function}}
+  }
+}
+#endif

diff  --git a/clang/test/SemaCUDA/device-kernel-call.cu 
b/clang/test/SemaCUDA/device-kernel-call.cu
index 856cbd88404e6..7511cf148a077 100644
--- a/clang/test/SemaCUDA/device-kernel-call.cu
+++ b/clang/test/SemaCUDA/device-kernel-call.cu
@@ -13,3 +13,19 @@ __global__ void g1(void) {
   // nordc-error@-1 {{kernel launch from __device__ or __global__ function 
requires relocatable device code (i.e. requires -fgpu-rdc)}}
   // hip-error@-2 {{device-side kernel call/launch is not supported}}
 }
+
+namespace template_if_constexpr {
+  template<bool B>
+  __host__ __device__ void fn() {
+    if constexpr (B)
+      g2<<<1, 1>>>(42);
+    // hip-error@-1 {{device-side kernel call/launch is not supported}}
+  }
+
+  void call() {
+    fn<false>();
+    fn<true>();
+    // nordc-error@-7 {{kernel launch from __device__ or __global__ function 
requires relocatable device code (i.e. requires -fgpu-rdc)}}
+    // nordc-note@-2 {{in instantiation of function template specialization 
'template_if_constexpr::fn<true>' requested here}}
+  }
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to