Author: David Rivera Date: 2026-02-03T06:18:39-05:00 New Revision: a35b5940b44e4e3b9b1be98ca1b3e0b2743943ff
URL: https://github.com/llvm/llvm-project/commit/a35b5940b44e4e3b9b1be98ca1b3e0b2743943ff DIFF: https://github.com/llvm/llvm-project/commit/a35b5940b44e4e3b9b1be98ca1b3e0b2743943ff.diff LOG: [CIR][CUDA][HIP] Add NVPTX target info and CUDA/HIP global emission filtering (#177827) related: #175871 This patch adds foundational infra for device-side CUDA/HIP compilation by introducing NVPTX target info and implementing the global emission filtering logic. NVPTX Target Info to allows us to compile against that triple: - Add NVPTXABIInfo and NVPTXTargetCIRGenInfo classes - Wire up nvptx and nvptx64 triples in getTargetCIRGenInfo() - Add createNVPTXTargetCIRGenInfo() factory function CUDA/HIP Global Emission Filtering (most of this is boilerplate from the AST) This basically narrows down to: - Skip host-only functions (no `__device__` attribute) when `-fcuda-is-device` - Skip device-only functions (device without host) on host side - Always emit ` __global__` kernels and `__host__` `__device__` functions on both sides - Add `shouldEmitCUDAGlobalVar()` to handle variable emission (device/constant/shared variables) - Handle special cases: implicit host/device templates, lambda call operators Added: clang/test/CIR/CodeGenCUDA/filter-decl.cu clang/test/CIR/CodeGenCUDA/nvptx-basic.cu Modified: clang/lib/CIR/CodeGen/CIRGenModule.cpp clang/lib/CIR/CodeGen/CIRGenModule.h clang/lib/CIR/CodeGen/TargetInfo.cpp clang/lib/CIR/CodeGen/TargetInfo.h Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index fd99a47b8b445..508e941517c3d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -17,6 +17,7 @@ #include "CIRGenFunction.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/ASTLambda.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclOpenACC.h" #include "clang/AST/GlobalDecl.h" @@ -29,6 +30,7 @@ #include "clang/CIR/MissingFeatures.h" #include "CIRGenFunctionInfo.h" +#include "TargetInfo.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Location.h" #include "mlir/IR/MLIRContext.h" @@ -252,6 +254,10 @@ const TargetCIRGenInfo &CIRGenModule::getTargetCIRGenInfo() { return *theTargetCIRGenInfo; } } + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + theTargetCIRGenInfo = createNVPTXTargetCIRGenInfo(genTypes); + return *theTargetCIRGenInfo; } } @@ -368,6 +374,28 @@ void CIRGenModule::emitDeferred() { } } +template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *decl) { + if (!decl) + return false; + if (auto *attr = decl->getAttr<AttrT>()) + return attr->isImplicit(); + return decl->isImplicit(); +} + +// TODO(cir): This should be shared with OG Codegen. +bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl *global) const { + assert(langOpts.CUDA && "Should not be called by non-CUDA languages"); + // We need to emit host-side 'shadows' for all global + // device-side variables because the CUDA runtime needs their + // size and host-side address in order to provide access to + // their device-side incarnations. + return !langOpts.CUDAIsDevice || global->hasAttr<CUDADeviceAttr>() || + global->hasAttr<CUDAConstantAttr>() || + global->hasAttr<CUDASharedAttr>() || + global->getType()->isCUDADeviceBuiltinSurfaceType() || + global->getType()->isCUDADeviceBuiltinTextureType(); +} + void CIRGenModule::emitGlobal(clang::GlobalDecl gd) { if (const auto *cd = dyn_cast<clang::OpenACCConstructDecl>(gd.getDecl())) { emitGlobalOpenACCDecl(cd); @@ -382,6 +410,36 @@ void CIRGenModule::emitGlobal(clang::GlobalDecl gd) { const auto *global = cast<ValueDecl>(gd.getDecl()); + // If this is CUDA, be selective about which declarations we emit. + // Non-constexpr non-lambda implicit host device functions are not emitted + // unless they are used on device side. + if (langOpts.CUDA) { + assert((isa<FunctionDecl>(global) || isa<VarDecl>(global)) && + "Expected Variable or Function"); + if (const auto *varDecl = dyn_cast<VarDecl>(global)) { + if (!shouldEmitCUDAGlobalVar(varDecl)) + return; + // TODO(cir): This should be shared with OG Codegen. + } else if (langOpts.CUDAIsDevice) { + const auto *functionDecl = dyn_cast<FunctionDecl>(global); + if ((!global->hasAttr<CUDADeviceAttr>() || + (langOpts.OffloadImplicitHostDeviceTemplates && + hasImplicitAttr<CUDAHostAttr>(functionDecl) && + hasImplicitAttr<CUDADeviceAttr>(functionDecl) && + !functionDecl->isConstexpr() && + !isLambdaCallOperator(functionDecl) && + !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count( + functionDecl))) && + !global->hasAttr<CUDAGlobalAttr>() && + !(langOpts.HIPStdPar && isa<FunctionDecl>(global) && + !global->hasAttr<CUDAHostAttr>())) + return; + // Device-only functions are the only things we skip. + } else if (!global->hasAttr<CUDAHostAttr>() && + global->hasAttr<CUDADeviceAttr>()) + return; + } + if (const auto *fd = dyn_cast<FunctionDecl>(global)) { // Update deferred annotations with the latest declaration if the function // was already used or defined. diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 46ef0ad426ae1..88b66129a8348 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -567,6 +567,10 @@ class CIRGenModule : public CIRGenTypeCache { static void setInitializer(cir::GlobalOp &op, mlir::Attribute value); + // Whether a global variable should be emitted by CUDA/HIP host/device + // related attributes. + bool shouldEmitCUDAGlobalVar(const VarDecl *global) const; + void replaceUsesOfNonProtoTypeWithRealFunction(mlir::Operation *old, cir::FuncOp newFn); diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index 377c532e492d9..dc29dc0204c19 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -56,6 +56,25 @@ class X8664TargetCIRGenInfo : public TargetCIRGenInfo { } // namespace +namespace { + +class NVPTXABIInfo : public ABIInfo { +public: + NVPTXABIInfo(CIRGenTypes &cgt) : ABIInfo(cgt) {} +}; + +class NVPTXTargetCIRGenInfo : public TargetCIRGenInfo { +public: + NVPTXTargetCIRGenInfo(CIRGenTypes &cgt) + : TargetCIRGenInfo(std::make_unique<NVPTXABIInfo>(cgt)) {} +}; +} // namespace + +std::unique_ptr<TargetCIRGenInfo> +clang::CIRGen::createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt) { + return std::make_unique<NVPTXTargetCIRGenInfo>(cgt); +} + std::unique_ptr<TargetCIRGenInfo> clang::CIRGen::createX8664TargetCIRGenInfo(CIRGenTypes &cgt) { return std::make_unique<X8664TargetCIRGenInfo>(cgt); diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index 9535ba94fb08b..bab838692e215 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -124,6 +124,8 @@ class TargetCIRGenInfo { std::unique_ptr<TargetCIRGenInfo> createX8664TargetCIRGenInfo(CIRGenTypes &cgt); +std::unique_ptr<TargetCIRGenInfo> createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt); + } // namespace clang::CIRGen #endif // LLVM_CLANG_LIB_CIR_TARGETINFO_H diff --git a/clang/test/CIR/CodeGenCUDA/filter-decl.cu b/clang/test/CIR/CodeGenCUDA/filter-decl.cu new file mode 100644 index 0000000000000..a2a7e9ba9a1b0 --- /dev/null +++ b/clang/test/CIR/CodeGenCUDA/filter-decl.cu @@ -0,0 +1,55 @@ +// Based on clang/test/CodeGenCUDA/filter-decl.cu tailored for CIR current capabilities. +// Tests that host/device functions are emitted only on the appropriate side. + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-sdk-version=9.2 \ +// RUN: -x cuda -emit-cir %s -o %t.host.cir +// RUN: FileCheck --input-file=%t.host.cir %s --check-prefix=CIR-HOST + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -fcuda-is-device -emit-cir %s -o %t.device.cir +// RUN: FileCheck --input-file=%t.device.cir %s --check-prefix=CIR-DEVICE + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-sdk-version=9.2 \ +// RUN: -x cuda -emit-llvm %s -o %t.host.ll +// RUN: FileCheck --input-file=%t.host.ll %s --check-prefix=OGCG-HOST + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t.device.ll +// RUN: FileCheck --input-file=%t.device.ll %s --check-prefix=OGCG-DEVICE + +#include "Inputs/cuda.h" + +// Implicit host function (no attribute) — host only +// CIR-HOST: cir.func {{.*}} @_Z20implicithostonlyfuncv() +// CIR-DEVICE-NOT: @_Z20implicithostonlyfuncv +// OGCG-HOST: define{{.*}} void @_Z20implicithostonlyfuncv() +// OGCG-DEVICE-NOT: @_Z20implicithostonlyfuncv +void implicithostonlyfunc(void) {} + +// Explicit __host__ function — host only +// CIR-HOST: cir.func {{.*}} @_Z20explicithostonlyfuncv() +// CIR-DEVICE-NOT: @_Z20explicithostonlyfuncv +// OGCG-HOST: define{{.*}} void @_Z20explicithostonlyfuncv() +// OGCG-DEVICE-NOT: @_Z20explicithostonlyfuncv +__host__ void explicithostonlyfunc(void) {} + +// __device__ function — device only +// CIR-HOST-NOT: @_Z14deviceonlyfuncv +// CIR-DEVICE: cir.func {{.*}} @_Z14deviceonlyfuncv() +// OGCG-HOST-NOT: @_Z14deviceonlyfuncv +// OGCG-DEVICE: define{{.*}} void @_Z14deviceonlyfuncv() +__device__ void deviceonlyfunc(void) {} + +// __host__ __device__ function — both sides +// CIR-HOST: cir.func {{.*}} @_Z14hostdevicefuncv() +// CIR-DEVICE: cir.func {{.*}} @_Z14hostdevicefuncv() +// OGCG-HOST: define{{.*}} void @_Z14hostdevicefuncv() +// OGCG-DEVICE: define{{.*}} void @_Z14hostdevicefuncv() +__host__ __device__ void hostdevicefunc(void) {} + +// __global__ kernel — both sides (stub on host, kernel on device) +// CIR-HOST: cir.func {{.*}} @_Z25__device_stub__globalfuncv() +// CIR-DEVICE: cir.func {{.*}} @_Z10globalfuncv() +// OGCG-HOST: define{{.*}} void @_Z25__device_stub__globalfuncv() +// OGCG-DEVICE: define{{.*}} void @_Z10globalfuncv() +__global__ void globalfunc(void) {} diff --git a/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu b/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu new file mode 100644 index 0000000000000..99f0164a18506 --- /dev/null +++ b/clang/test/CIR/CodeGenCUDA/nvptx-basic.cu @@ -0,0 +1,30 @@ +// Based on clang/test/CodeGenCUDA/ptx-kernels.cu tailored for CIR current capabilities. +// Tests basic device-side compilation with NVPTX target. + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s + +#include "Inputs/cuda.h" + +// CHECK: cir.func {{.*}} @device_function() +extern "C" +__device__ void device_function() {} + +// CHECK: cir.func {{.*}} @global_function() +// CHECK: cir.call @device_function() +extern "C" +__global__ void global_function() { + device_function(); +} + +// Template kernel with explicit instantiation +template <typename T> __global__ void templated_kernel(T param) {} +template __global__ void templated_kernel<int>(int); +// CHECK: cir.func {{.*}} @_Z16templated_kernelIiEvT_ + +// Anonymous namespace kernel +namespace { +__global__ void anonymous_ns_kernel() {} +// CHECK: cir.func {{.*}} @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
