yaxunl created this revision.
yaxunl added a reviewer: tra.
yaxunl requested review of this revision.

Currently non-ODR-use of static device variable by host code
causes static device variable to be emitted and registered,
which should not.

This patch fixes that.


https://reviews.llvm.org/D102237

Files:
  clang/lib/Sema/SemaExpr.cpp
  clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
  clang/test/CodeGenCUDA/static-device-var-rdc.cu

Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu
===================================================================
--- clang/test/CodeGenCUDA/static-device-var-rdc.cu
+++ clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -2,19 +2,19 @@
 // REQUIRES: amdgpu-registered-target
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=DEV,INT-DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=HOST,INT-HOST %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
 // RUN: cat %t.dev | FileCheck -check-prefixes=DEV,EXT-DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host
 // RUN: cat %t.host | FileCheck -check-prefixes=HOST,EXT-HOST %s
 
 // Check host and device compilations use the same postfixes for static
@@ -64,6 +64,11 @@
 // DEV-NOT: @_ZL1z
 static int z;
 
+// Test non-ODR-use of static device variable is not emitted or registered.
+// DEV-NOT: @_ZL1u
+// HOST-NOT: @_ZL1u
+static __device__ int u;
+
 // Test static device variable in inline function, which should not be
 // externalized nor registered.
 // DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat
@@ -77,6 +82,7 @@
   const static int w = 1;
   a[0] = x;
   a[1] = y;
+  a[2] = sizeof(u);
   b[0] = &w;
   b[1] = &x2;
   devfun(b);
@@ -88,6 +94,7 @@
   getDeviceSymbol(&x);
   getDeviceSymbol(&y);
   z = 123;
+  decltype(u) tmp;
 }
 
 // HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
@@ -95,3 +102,4 @@
 // HOST-NOT: __hipRegisterVar({{.*}}@_ZL2x2
 // HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
 // HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZL1u
Index: clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
===================================================================
--- clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
+++ clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
@@ -72,6 +72,12 @@
 
 static __device__ int w;
 
+// Test non-ODR-use of static device var should not be emitted or registered.
+// DEV-NOT: @_ZL1u
+// HOST-NOT: @_ZL1u
+
+static __device__ int u;
+
 inline __device__ void devfun(const int ** b) {
   const static int p = 2;
   b[0] = &p;
@@ -88,6 +94,7 @@
   a[3] = x3;
   a[4] = x4;
   a[5] = x5;
+  a[6] = sizeof(u);
   b[0] = &w;
   b[1] = &z2;
   b[2] = &local_static_constant;
@@ -108,10 +115,12 @@
   getDeviceSymbol(&w);
   z = 123;
   a[0] = &z2;
+  decltype(u) tmp;
 }
 
 // HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
 // HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]]
 // HOST: __hipRegisterVar({{.*}}@_ZL1w {{.*}}@[[DEVNAMEW]]
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZL1u
 // HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
 // HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
Index: clang/lib/Sema/SemaExpr.cpp
===================================================================
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -17126,10 +17126,7 @@
     CaptureType, DeclRefType,
     FunctionScopeIndexToStopAt);
 
-  // Diagnose ODR-use of host global variables in device functions. Reference
-  // of device global variables in host functions is allowed through shadow
-  // variables therefore it is not diagnosed.
-  if (SemaRef.LangOpts.CUDA && SemaRef.LangOpts.CUDAIsDevice) {
+  if (SemaRef.LangOpts.CUDA) {
     auto *FD = dyn_cast_or_null<FunctionDecl>(SemaRef.CurContext);
     auto Target = SemaRef.IdentifyCUDATarget(FD);
     auto IsEmittedOnDeviceSide = [](VarDecl *Var) {
@@ -17145,9 +17142,28 @@
       }
       return false;
     };
-    if (Var && Var->hasGlobalStorage() && !IsEmittedOnDeviceSide(Var)) {
-      SemaRef.targetDiag(Loc, diag::err_ref_bad_target)
-          << /*host*/ 2 << /*variable*/ 1 << Var << Target;
+    if (Var && Var->hasGlobalStorage()) {
+      if (SemaRef.LangOpts.CUDAIsDevice && !IsEmittedOnDeviceSide(Var)) {
+        // Diagnose ODR-use of host global variables in device functions.
+        // Reference of device global variables in host functions is allowed
+        // through shadow variables therefore it is not diagnosed.
+        SemaRef.targetDiag(Loc, diag::err_ref_bad_target)
+            << /*host*/ 2 << /*variable*/ 1 << Var << Target;
+      } else if (Target == Sema::CFT_Host || Target == Sema::CFT_HostDevice) {
+        // Record a CUDA/HIP static device/constant variable if it is referenced
+        // by host code. This is done conservatively, when the variable is
+        // referenced in any of the following contexts:
+        //   - a non-function context
+        //   - a host function
+        //   - a host device function
+        // This also requires the reference of the static device/constant
+        // variable by host code to be visible in the device compilation for the
+        // compiler to be able to externalize the static device/constant
+        // variable.
+        if (SemaRef.getASTContext().mayExternalizeStaticVar(Var))
+          SemaRef.getASTContext().CUDAStaticDeviceVarReferencedByHost.insert(
+              Var);
+      }
     }
   }
 
@@ -18313,24 +18329,6 @@
   if (Var->isInvalidDecl())
     return;
 
-  // Record a CUDA/HIP static device/constant variable if it is referenced
-  // by host code. This is done conservatively, when the variable is referenced
-  // in any of the following contexts:
-  //   - a non-function context
-  //   - a host function
-  //   - a host device function
-  // This also requires the reference of the static device/constant variable by
-  // host code to be visible in the device compilation for the compiler to be
-  // able to externalize the static device/constant variable.
-  if (SemaRef.getASTContext().mayExternalizeStaticVar(Var)) {
-    auto *CurContext = SemaRef.CurContext;
-    if (!CurContext || !isa<FunctionDecl>(CurContext) ||
-        cast<FunctionDecl>(CurContext)->hasAttr<CUDAHostAttr>() ||
-        (!cast<FunctionDecl>(CurContext)->hasAttr<CUDADeviceAttr>() &&
-         !cast<FunctionDecl>(CurContext)->hasAttr<CUDAGlobalAttr>()))
-      SemaRef.getASTContext().CUDAStaticDeviceVarReferencedByHost.insert(Var);
-  }
-
   auto *MSI = Var->getMemberSpecializationInfo();
   TemplateSpecializationKind TSK = MSI ? MSI->getTemplateSpecializationKind()
                                        : Var->getTemplateSpecializationKind();
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to