yaxunl updated this revision to Diff 229137.
yaxunl added a comment.

Attempt to prefix the kernel stub name on the fly.

If we do not want to create two Decl's during parsing, and do not want to 
change the mangler, it seems the least invasive way to get the prefixed kernel 
name is to change it on the fly then change it back.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68578/new/

https://reviews.llvm.org/D68578

Files:
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/CodeGen/CGCUDARuntime.h
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/kernel-stub-name.cu
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===================================================================
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -36,4 +36,4 @@
   }(p);
 }
 // HOST: @__hip_register_globals
-// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
Index: clang/test/CodeGenCUDA/kernel-stub-name.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -6,15 +6,44 @@
 
 #include "Inputs/cuda.h"
 
+extern "C" __global__ void ckernel() {}
+
+namespace ns {
+__global__ void nskernel() {}
+} // namespace ns
+
 template<class T>
 __global__ void kernelfunc() {}
 
+// Device side kernel names
+
+// CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00"
+// CHECK: @[[NSKERN:[0-9]*]] = {{.*}} c"_ZN2ns8nskernelEv\00"
+// CHECK: @[[TKERN:[0-9]*]] = {{.*}} c"_Z10kernelfuncIiEvv\00"
+
+// Non-template kernel stub functions
+
+// CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]]
+// CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]]
+
 // CHECK-LABEL: define{{.*}}@_Z8hostfuncv()
-// CHECK: call void @[[STUB:_Z10kernelfuncIiEvv.stub]]()
-void hostfunc(void) { kernelfunc<int><<<1, 1>>>(); }
+// CHECK: call void @[[CSTUB]]()
+// CHECK: call void @[[NSSTUB]]()
+// CHECK: call void @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]]()
+void hostfunc(void) {
+  ckernel<<<1, 1>>>();
+  ns::nskernel<<<1, 1>>>();
+  kernelfunc<int><<<1, 1>>>();
+}
+
+// Template kernel stub functions
 
-// CHECK: define{{.*}}@[[STUB]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[STUB]]
+// CHECK: define{{.*}}@[[TSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]]
 
 // CHECK-LABEL: define{{.*}}@__hip_register_globals
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[STUB]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[CSTUB]]{{.*}}@[[CKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[NSSTUB]]{{.*}}@[[NSKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[TSTUB]]{{.*}}@[[TKERN]]
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1090,13 +1090,25 @@
 
   // Keep the first result in the case of a mangling collision.
   const auto *ND = cast<NamedDecl>(GD.getDecl());
-  std::string MangledName = getMangledNameImpl(*this, GD, ND);
+  std::string MangledName;
 
   // Adjust kernel stub mangling as we may need to be able to differentiate
   // them from the kernel itself (e.g., for HIP).
-  if (auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
-    if (!getLangOpts().CUDAIsDevice && FD->hasAttr<CUDAGlobalAttr>())
-      MangledName = getCUDARuntime().getDeviceStubName(MangledName);
+  if (ND && ND->hasAttr<CUDAGlobalAttr>() && !getLangOpts().CUDAIsDevice &&
+      getLangOpts().HIP) {
+    auto *FD = const_cast<NamedDecl *>((ND));
+    if (auto *TD = cast<FunctionDecl>(FD)->getPrimaryTemplate())
+      FD = TD->getTemplatedDecl();
+    auto OldDeclName = FD->getDeclName();
+    auto NewNameStr = std::string("__device_stub__") + OldDeclName.getAsString();
+    auto *NewId = &Context.Idents.get(NewNameStr);
+    auto NewDeclName = DeclarationName(NewId);
+    FD->setDeclName(NewDeclName);
+    MangledName = getMangledNameImpl(*this, GD, ND);
+    FD->setDeclName(OldDeclName);
+  } else {
+    MangledName = getMangledNameImpl(*this, GD, ND);
+  }
 
   auto Result = Manglings.insert(std::make_pair(MangledName, GD));
   return MangledDeclNames[CanonicalGD] = Result.first->first();
Index: clang/lib/CodeGen/CGCUDARuntime.h
===================================================================
--- clang/lib/CodeGen/CGCUDARuntime.h
+++ clang/lib/CodeGen/CGCUDARuntime.h
@@ -25,6 +25,7 @@
 namespace clang {
 
 class CUDAKernelCallExpr;
+class FunctionDecl;
 class VarDecl;
 
 namespace CodeGen {
@@ -65,9 +66,6 @@
   /// Returns a module cleanup function or nullptr if it's not needed.
   /// Must be called after ModuleCtorFunction
   virtual llvm::Function *makeModuleDtorFunction() = 0;
-
-  /// Construct and return the stub name of a kernel.
-  virtual std::string getDeviceStubName(llvm::StringRef Name) const = 0;
 };
 
 /// Creates an instance of a CUDA runtime class.
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -132,8 +132,6 @@
   llvm::Function *makeModuleCtorFunction() override;
   /// Creates module destructor function
   llvm::Function *makeModuleDtorFunction() override;
-  /// Construct and return the stub name of a kernel.
-  std::string getDeviceStubName(llvm::StringRef Name) const override;
 };
 
 }
@@ -219,21 +217,6 @@
 
 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
                                      FunctionArgList &Args) {
-  // Ensure either we have different ABIs between host and device compilations,
-  // says host compilation following MSVC ABI but device compilation follows
-  // Itanium C++ ABI or, if they follow the same ABI, kernel names after
-  // mangling should be the same after name stubbing. The later checking is
-  // very important as the device kernel name being mangled in host-compilation
-  // is used to resolve the device binaries to be executed. Inconsistent naming
-  // result in undefined behavior. Even though we cannot check that naming
-  // directly between host- and device-compilations, the host- and
-  // device-mangling in host compilation could help catching certain ones.
-  assert((CGF.CGM.getContext().getAuxTargetInfo() &&
-          (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() !=
-           CGF.CGM.getContext().getTargetInfo().getCXXABI())) ||
-         getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) ==
-             CGF.CurFn->getName());
-
   EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
   if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
                          CudaFeature::CUDA_USES_NEW_LAUNCH) ||
@@ -797,12 +780,6 @@
   return ModuleDtorFunc;
 }
 
-std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const {
-  if (!CGM.getLangOpts().HIP)
-    return Name;
-  return (Name + ".stub").str();
-}
-
 CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
   return new CGNVCUDARuntime(CGM);
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to