yaxunl updated this revision to Diff 282952. yaxunl retitled this revision from "[CUDA][HIP] Support accessing static device variable in host code" to "[CUDA][HIP] Support accessing static device variable in host code for -fno-gpu-rdc". yaxunl edited the summary of this revision. yaxunl added a comment.
revised for -fno-gpu-rdc case by Michael's comments. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D80858/new/ https://reviews.llvm.org/D80858 Files: clang/include/clang/AST/ASTContext.h clang/lib/AST/ASTContext.cpp clang/lib/Sema/SemaExpr.cpp clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
Index: clang/test/CodeGenCUDA/static-device-var-no-rdc.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/static-device-var-no-rdc.cu @@ -0,0 +1,86 @@ +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=DEV %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=HOST %s + +#include "Inputs/cuda.h" + +// Test function scope static device variable, which should not be externalized. +// DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1 + +// Check a static device variable referenced by host function is externalized. +// DEV-DAG: @_ZL1x = addrspace(1) externally_initialized global i32 0 +// HOST-DAG: @_ZL1x = internal global i32 undef +// HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00" + +static __device__ int x; + +// Check a static device variables referenced only by device functions and kernels +// is not externalized. +// DEV-DAG: @_ZL2x2 = internal addrspace(1) global i32 0 +static __device__ int x2; + +// Check a static device variable referenced by host device function is externalized. +// DEV-DAG: @_ZL2x3 = addrspace(1) externally_initialized global i32 0 +static __device__ int x3; + +// Check a static device variable referenced in file scope is externalized. +// DEV-DAG: @_ZL2x4 = addrspace(1) externally_initialized global i32 0 +static __device__ int x4; +int& x4_ref = x4; + +// Check a static constant variable referenced by host is externalized. +// DEV-DAG: @_ZL1y = addrspace(4) externally_initialized global i32 0 +// HOST-DAG: @_ZL1y = internal global i32 undef +// HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" + +static __constant__ int y; + +// Test static host variable, which should not be externalized nor registered. +// HOST-DAG: @_ZL1z = internal global i32 0 +// DEV-NOT: @_ZL1z +static int z; + +// Test static device variable in inline function, which should not be +// externalized nor registered. +// DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat + +inline __device__ void devfun(const int ** b) { + const static int p = 2; + b[0] = &p; + b[1] = &x2; +} + +__global__ void kernel(int *a, const int **b) { + const static int w = 1; + a[0] = x; + a[1] = y; + a[2] = x2; + a[3] = x3; + a[4] = x4; + b[0] = &w; + devfun(b); +} + +__host__ __device__ void hdf(int *a) { + a[0] = x3; +} + +int* getDeviceSymbol(int *x); + +void foo(int *a) { + getDeviceSymbol(&x); + getDeviceSymbol(&y); + z = 123; +} + +// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]] +// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]] +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p Index: clang/lib/Sema/SemaExpr.cpp =================================================================== --- clang/lib/Sema/SemaExpr.cpp +++ clang/lib/Sema/SemaExpr.cpp @@ -17873,6 +17873,25 @@ if (Var->isInvalidDecl()) return; + // Record a CUDA/HIP static device/constant variable if it is referenced + // by host code. This is done conservatively, when the variable is referenced + // in any of the following contexts: + // - a non-function context + // - a host function + // - a host device function + // This also requires the reference of the static device/constant variable by + // host code to be visible in the device compilation for the compiler to be + // able to externalize the static device/constant variable. + if ((Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>()) && + Var->isFileVarDecl() && Var->getStorageClass() == SC_Static) { + auto *CurContext = SemaRef.CurContext; + if (!CurContext || !isa<FunctionDecl>(CurContext) || + cast<FunctionDecl>(CurContext)->hasAttr<CUDAHostAttr>() || + (!cast<FunctionDecl>(CurContext)->hasAttr<CUDADeviceAttr>() && + !cast<FunctionDecl>(CurContext)->hasAttr<CUDAGlobalAttr>())) + SemaRef.getASTContext().CUDAStaticDeviceVarReferencedByHost.insert(Var); + } + auto *MSI = Var->getMemberSpecializationInfo(); TemplateSpecializationKind TSK = MSI ? MSI->getTemplateSpecializationKind() : Var->getTemplateSpecializationKind(); Index: clang/lib/AST/ASTContext.cpp =================================================================== --- clang/lib/AST/ASTContext.cpp +++ clang/lib/AST/ASTContext.cpp @@ -10273,12 +10273,17 @@ } else if (D->hasAttr<DLLExportAttr>()) { if (L == GVA_DiscardableODR) return GVA_StrongODR; - } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice && - D->hasAttr<CUDAGlobalAttr>()) { + } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) { // Device-side functions with __global__ attribute must always be // visible externally so they can be launched from host. - if (L == GVA_DiscardableODR || L == GVA_Internal) + if (D->hasAttr<CUDAGlobalAttr>() && + (L == GVA_DiscardableODR || L == GVA_Internal)) return GVA_StrongODR; + // Single source offloading languages like CUDA/HIP need to be able to + // access static device variables from host code of the same compilation + // unit. This is done by externalizing the static variable. + if (Context.shouldExternalizeStaticVar(D)) + return GVA_StrongExternal; } return L; } @@ -11164,3 +11169,11 @@ return DB << Section.Decl; return DB << "a prior #pragma section"; } + +bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const { + return !getLangOpts().GPURelocatableDeviceCode && + (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) && + isa<VarDecl>(D) && cast<VarDecl>(D)->isFileVarDecl() && + cast<VarDecl>(D)->getStorageClass() == SC_Static && + CUDAStaticDeviceVarReferencedByHost.count(cast<VarDecl>(D)); +} Index: clang/include/clang/AST/ASTContext.h =================================================================== --- clang/include/clang/AST/ASTContext.h +++ clang/include/clang/AST/ASTContext.h @@ -43,6 +43,7 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/MapVector.h" @@ -999,6 +1000,9 @@ // Implicitly-declared type 'struct _GUID'. mutable TagDecl *MSGuidTagDecl = nullptr; + /// Keep track of CUDA/HIP static device variables referenced by host code. + llvm::DenseSet<const VarDecl *> CUDAStaticDeviceVarReferencedByHost; + ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents, SelectorTable &sels, Builtin::Context &builtins); ASTContext(const ASTContext &) = delete; @@ -3018,6 +3022,9 @@ /// Return a new OMPTraitInfo object owned by this context. OMPTraitInfo &getNewOMPTraitInfo(); + /// Whether a C++ static variable should be externalized. + bool shouldExternalizeStaticVar(const Decl *D) const; + private: /// All OMPTraitInfo objects live in this collection, one per /// `pragma omp [begin] declare variant` directive.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits