https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/205828
>From 8efe485c1d4377584a2f5c7781546c0371413124 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" <[email protected]> Date: Tue, 23 Jun 2026 11:42:56 -0400 Subject: [PATCH] [CUDA][HIP] Defer diagnostics for unused implicit H+D functions Implicit H+D attributes are usually added to host functions by speculative, optimistic heuristics. For example, constexpr functions are implicitly marked H+D on the assumption that they call only constexpr functions, and therefore work for both host and device. In practice, a constexpr function can still call a non-constexpr host function on a runtime path, which makes the body unusable for device code. PR #197214 fixed this for implicit H+D functions forced by explicit instantiation: defer device diagnostics until the end of the translation unit, then either report them for a real device caller or discard them and use the trap-body fallback if CodeGen still needs a device symbol. The same reasoning applies to any implicit H+D function. Device-side compilation is speculative until real device code reaches the function, so defer these diagnostics generally. This avoids rejecting host-only uses while preserving diagnostics for real device callers. --- clang/lib/CodeGen/CGCXX.cpp | 10 +-- clang/lib/Sema/Sema.cpp | 13 ++-- clang/lib/Sema/SemaBase.cpp | 8 +- clang/lib/Sema/SemaCUDA.cpp | 38 +++++----- .../implicit-hd-deferred-host-call-unused.cu | 74 +++++++++++++++++++ .../implicit-hd-deferred-overload-unused.cu | 59 +++++++++++++++ 6 files changed, 165 insertions(+), 37 deletions(-) create mode 100644 clang/test/SemaCUDA/implicit-hd-deferred-host-call-unused.cu create mode 100644 clang/test/SemaCUDA/implicit-hd-deferred-overload-unused.cu diff --git a/clang/lib/CodeGen/CGCXX.cpp b/clang/lib/CodeGen/CGCXX.cpp index 8112783850bad..65274b1c57aba 100644 --- a/clang/lib/CodeGen/CGCXX.cpp +++ b/clang/lib/CodeGen/CGCXX.cpp @@ -237,13 +237,9 @@ void CodeGenModule::EmitDefinitionAsAlias(GlobalDecl AliasDecl, SetCommonAttributes(AliasDecl, Alias); } -// For an implicit __host__ __device__ destructor, this trap body is reachable -// only when a host-allocated object is destroyed on the device through the -// vtable. HIP documents that pattern as invalid: an object with virtual -// member functions constructed on the host cannot be destroyed on the device. -// Device-side construction either pulls the dtor in as an organic device -// caller (errors surface in Sema) or compiles cleanly (the real body is -// emitted, no trap). +// Invalid implicit H+D functions get a trap body when CodeGen still needs a +// device symbol, such as a vtable slot or explicit instantiation symbol. +// Organic device use surfaces the original Sema diagnostics instead. bool CodeGenModule::tryEmitCUDADeviceInvalidFunctionBody(GlobalDecl GD, llvm::Function *Fn) { if (!getLangOpts().CUDAIsDevice) diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 78fbc9e31842d..6b45711817c53 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -2103,14 +2103,15 @@ void Sema::emitDeferredDiags() { ExternalSource->ReadDeclsToCheckForDeferredDiags( DeclsToCheckForDeferredDiags); - // For each implicit-H+D-explicit-inst function with deferred errors but no - // organic device caller, drop the diagnostics and mark for a trap body. - auto ClassifyImplicitHDExplicitInst = [&]() { + // For selected implicit-H+D functions with deferred device errors but no + // organic device caller, drop diagnostics and mark a trap body if CodeGen + // still needs a device symbol. + auto ClassifyImplicitHDDeviceDiags = [&]() { if (!LangOpts.CUDAIsDevice) return; for (auto &Pair : DeviceDeferredDiags) { const FunctionDecl *FD = Pair.first; - if (!SemaCUDA::isImplicitHDExplicitInstantiation(FD)) + if (!SemaCUDA::isImplicitHostDeviceFunction(FD)) continue; if (CUDA().DeviceKnownEmittedFns.count(FD)) continue; @@ -2129,14 +2130,14 @@ void Sema::emitDeferredDiags() { if ((DeviceDeferredDiags.empty() && !LangOpts.OpenMP) || DeclsToCheckForDeferredDiags.empty()) { - ClassifyImplicitHDExplicitInst(); + ClassifyImplicitHDDeviceDiags(); return; } DeferredDiagnosticsEmitter DDE(*this); for (auto *D : DeclsToCheckForDeferredDiags) DDE.checkRecordedDecl(D); - ClassifyImplicitHDExplicitInst(); + ClassifyImplicitHDDeviceDiags(); DDE.emitCollectedDiags(); } diff --git a/clang/lib/Sema/SemaBase.cpp b/clang/lib/Sema/SemaBase.cpp index 5524ff50fce85..92d114eaaf89f 100644 --- a/clang/lib/Sema/SemaBase.cpp +++ b/clang/lib/Sema/SemaBase.cpp @@ -65,12 +65,12 @@ Sema::SemaDiagnosticBuilder SemaBase::Diag(SourceLocation Loc, bool ShouldDefer = getLangOpts().CUDA && getLangOpts().GPUDeferDiag && DiagnosticIDs::isDeferrable(DiagID) && (SemaRef.DeferDiags || !IsError); - // Even without -fgpu-defer-diag, defer device-side errors inside an - // implicit-H+D explicit instantiation so end-of-TU classification can - // choose between surfacing them or emitting a trap body. + // Even without -fgpu-defer-diag, defer device-side errors inside selected + // implicit-H+D functions so end-of-TU classification can choose between + // surfacing them, discarding them, or emitting a trap body. if (!ShouldDefer && getLangOpts().CUDA && getLangOpts().CUDAIsDevice && DiagnosticIDs::isDeferrable(DiagID) && - SemaCUDA::isImplicitHDExplicitInstantiation( + SemaCUDA::isImplicitHostDeviceFunction( SemaRef.getCurFunctionDecl(/*AllowLambda=*/true))) ShouldDefer = true; auto SetIsLastErrorImmediate = [&](bool Flag) { diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index a2a088ab7c3ab..52850649c312e 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -926,7 +926,7 @@ SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc, if (SemaRef.IsLastErrorImmediate && getDiagnostics().getDiagnosticIDs()->isNote(DiagID)) return SemaDiagnosticBuilder::K_Immediate; - if (isImplicitHDExplicitInstantiation(CurFunContext)) + if (isImplicitHostDeviceFunction(CurFunContext)) return SemaDiagnosticBuilder::K_Deferred; return (SemaRef.getEmissionStatus(CurFunContext) == Sema::FunctionEmissionStatus::Emitted) @@ -995,25 +995,23 @@ bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) { // Otherwise, mark the call in our call graph so we can traverse it later. bool CallerKnownEmitted = SemaRef.getEmissionStatus(Caller) == Sema::FunctionEmissionStatus::Emitted; - bool CallerIsImplicitHDExplicitInst = - isImplicitHDExplicitInstantiation(Caller); - SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, - CallerKnownEmitted, - CallerIsImplicitHDExplicitInst] { - switch (IdentifyPreference(Caller, Callee)) { - case CFP_Never: - case CFP_WrongSide: - assert(Caller && "Never/wrongSide calls require a non-null caller"); - // If we know the caller will be emitted, we know this wrong-side call - // will be emitted, so it's an immediate error. Otherwise, defer the - // error until we know the caller is emitted. - return (CallerKnownEmitted && !CallerIsImplicitHDExplicitInst) - ? SemaDiagnosticBuilder::K_ImmediateWithCallStack - : SemaDiagnosticBuilder::K_Deferred; - default: - return SemaDiagnosticBuilder::K_Nop; - } - }(); + bool DeferImplicitHDDeviceDiag = isImplicitHostDeviceFunction(Caller); + SemaDiagnosticBuilder::Kind DiagKind = + [this, Caller, Callee, CallerKnownEmitted, DeferImplicitHDDeviceDiag] { + switch (IdentifyPreference(Caller, Callee)) { + case CFP_Never: + case CFP_WrongSide: + assert(Caller && "Never/wrongSide calls require a non-null caller"); + // If we know the caller will be emitted, we know this wrong-side call + // will be emitted, so it's an immediate error. Otherwise, defer the + // error until we know the caller is emitted. + return (CallerKnownEmitted && !DeferImplicitHDDeviceDiag) + ? SemaDiagnosticBuilder::K_ImmediateWithCallStack + : SemaDiagnosticBuilder::K_Deferred; + default: + return SemaDiagnosticBuilder::K_Nop; + } + }(); bool IsDeviceKernelCall = Callee == getASTContext().getcudaLaunchDeviceDecl(); bool CallerHD = Caller && Caller->hasAttr<CUDAHostAttr>() && diff --git a/clang/test/SemaCUDA/implicit-hd-deferred-host-call-unused.cu b/clang/test/SemaCUDA/implicit-hd-deferred-host-call-unused.cu new file mode 100644 index 0000000000000..c30b91b399545 --- /dev/null +++ b/clang/test/SemaCUDA/implicit-hd-deferred-host-call-unused.cu @@ -0,0 +1,74 @@ +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \ +// RUN: -foffload-implicit-host-device-templates -std=c++14 \ +// RUN: -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \ +// RUN: -foffload-implicit-host-device-templates -std=c++14 \ +// RUN: -fsyntax-only -verify %s + +#include "Inputs/cuda.h" + +__host__ constexpr int host_only_constexpr_unused() { return 1; } + +constexpr int constexpr_unused(int x) { + return x + host_only_constexpr_unused(); +} + +extern "C" int host_only_template_unused(); + +template <typename T> int template_unused(T x) { + return x + host_only_template_unused(); +} + +extern "C" int host_only_forced_unused(); + +#pragma clang force_cuda_host_device begin +int forced_unused(int x) { + return x + host_only_forced_unused(); +} +#pragma clang force_cuda_host_device end + +void host_context() { + (void)constexpr_unused(1); + (void)template_unused(1); + (void)forced_unused(1); +} + +__host__ constexpr int host_only_constexpr_used() { return 1; } +// expected-note@-1 {{'host_only_constexpr_used' declared here}} + +constexpr int constexpr_used(int x) { + return x + host_only_constexpr_used(); + // expected-error@-1 {{reference to __host__ function 'host_only_constexpr_used' in __host__ __device__ function}} +} + +extern "C" int host_only_template_used(); +// expected-note@-1 {{'host_only_template_used' declared here}} + +template <typename T> int template_used(T x) { + return x + host_only_template_used(); + // expected-error@-1 {{reference to __host__ function 'host_only_template_used' in __host__ __device__ function}} +} + +extern "C" int host_only_forced_used(); +// expected-note@-1 {{'host_only_forced_used' declared here}} + +#pragma clang force_cuda_host_device begin +int forced_used(int x) { + return x + host_only_forced_used(); + // expected-error@-1 {{reference to __host__ function 'host_only_forced_used' in __host__ __device__ function}} +} +#pragma clang force_cuda_host_device end + +__device__ int device_caller() { + return constexpr_used(1) + template_used(1) + forced_used(1); + // expected-note@-1 {{called by 'device_caller'}} + // expected-note@-2 {{called by 'device_caller'}} + // expected-note@-3 {{called by 'device_caller'}} +} + +__global__ void kernel(int *out) { + *out = device_caller(); + // expected-note@-1 {{called by 'kernel'}} + // expected-note@-2 {{called by 'kernel'}} + // expected-note@-3 {{called by 'kernel'}} +} diff --git a/clang/test/SemaCUDA/implicit-hd-deferred-overload-unused.cu b/clang/test/SemaCUDA/implicit-hd-deferred-overload-unused.cu new file mode 100644 index 0000000000000..6a18b66a6b35c --- /dev/null +++ b/clang/test/SemaCUDA/implicit-hd-deferred-overload-unused.cu @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \ +// RUN: -foffload-implicit-host-device-templates -std=c++14 \ +// RUN: -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \ +// RUN: -foffload-implicit-host-device-templates -std=c++14 \ +// RUN: -fsyntax-only -verify %s + +#include "Inputs/cuda.h" + +__host__ __device__ constexpr int pick_constexpr_unused(long); +__host__ __device__ constexpr int pick_constexpr_unused(unsigned long); + +constexpr int constexpr_unused(int x) { + return pick_constexpr_unused(x); +} + +__host__ __device__ constexpr int pick_constexpr_used(long); +// expected-note@-1 {{candidate function}} +__host__ __device__ constexpr int pick_constexpr_used(unsigned long); +// expected-note@-1 {{candidate function}} + +constexpr int constexpr_used(int x) { + return pick_constexpr_used(x); + // expected-error@-1 {{call to 'pick_constexpr_used' is ambiguous}} +} + +__host__ __device__ int pick_template_unused(long); +__host__ __device__ int pick_template_unused(unsigned long); + +template <typename T> int template_unused(T x) { + return pick_template_unused(x); +} + +void host_only() { + (void)constexpr_unused(1); + (void)template_unused(1); +} + +__host__ __device__ int pick_template_used(long); +// expected-note@-1 {{candidate function}} +__host__ __device__ int pick_template_used(unsigned long); +// expected-note@-1 {{candidate function}} + +template <typename T> int template_used(T x) { + return pick_template_used(x); + // expected-error@-1 {{call to 'pick_template_used' is ambiguous}} +} + +__device__ int device_caller() { + return constexpr_used(1) + template_used(1); + // expected-note@-1 {{called by 'device_caller'}} + // expected-note@-2 {{called by 'device_caller'}} +} + +__global__ void kernel(int *out) { + *out = device_caller(); + // expected-note@-1 {{called by 'kernel'}} + // expected-note@-2 {{called by 'kernel'}} +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
