yaxunl created this revision.
yaxunl added reviewers: tra, rjmccall.

HIP provide host API to allow C/C++ programs to
launch kernel. A C/C++ program can declare a HIP
kernel as an external function and pass it to
the kernel launching API. When linked with object
files built from HIP programs. These external functions
will resolve to symbols with the same name in HIP
programs so that kernels with the same name can be
found and launched.

This requires clang to emit symbols with the same
name as kernels in object files and use them to
identify kernels, instead of using device stub
functions to identify kernels, since device stub
function has different names than kernels.

This patch lets clang emits a void* type global
variable for each kernel in host IR, which is
called kernel handle. The kernel handle has the
same mangled name as kernel by host ABI. It is
passed to __hipRegisterFunction and kernel launching
functions for identifying kernels.


https://reviews.llvm.org/D77743

Files:
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/test/CodeGenCUDA/Inputs/cuda.h
  clang/test/CodeGenCUDA/cxx-call-kernel.cpp
  clang/test/CodeGenCUDA/kernel-stub-name.cu
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===================================================================
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -36,4 +36,4 @@
   }(p);
 }
 // HOST: @__hip_register_globals
-// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
Index: clang/test/CodeGenCUDA/kernel-stub-name.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -6,6 +6,12 @@
 
 #include "Inputs/cuda.h"
 
+// Kernel handles
+
+// CHECK: @[[HCKERN:ckernel]] = constant i8* null
+// CHECK: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant i8* null
+// CHECK: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant i8* null
+
 extern "C" __global__ void ckernel() {}
 
 namespace ns {
@@ -26,9 +32,9 @@
 // Non-template kernel stub functions
 
 // CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]]
 // CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]]
 
 // CHECK-LABEL: define{{.*}}@_Z8hostfuncv()
 // CHECK: call void @[[CSTUB]]()
@@ -45,11 +51,11 @@
 // Template kernel stub functions
 
 // CHECK: define{{.*}}@[[TSTUB]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]]
 
 // CHECK: declare{{.*}}@[[DSTUB]]
 
 // CHECK-LABEL: define{{.*}}@__hip_register_globals
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[CSTUB]]{{.*}}@[[CKERN]]
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[NSSTUB]]{{.*}}@[[NSKERN]]
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[TSTUB]]{{.*}}@[[TKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]]
Index: clang/test/CodeGenCUDA/cxx-call-kernel.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/cxx-call-kernel.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -x hip -emit-llvm-bc %s -o %t.hip.bc
+// RUN: %clang_cc1 -mlink-builtin-bitcode %t.hip.bc -DHIP_PLATFORM -emit-llvm \
+// RUN:   %s -o - | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// CHECK: @_Z2g1i = internal constant i8* null
+#if __HIP__
+__global__ void g1(int x) {}
+#else
+extern void g1(int x);
+
+// CHECK: call i32 @hipLaunchKernel{{.*}}@_Z2g1i
+void test() {
+  hipLaunchKernel((void*)g1, 1, 1, nullptr, 0, 0);
+}
+
+// CHECK: __hipRegisterFunction{{.*}}@_Z2g1i
+#endif
Index: clang/test/CodeGenCUDA/Inputs/cuda.h
===================================================================
--- clang/test/CodeGenCUDA/Inputs/cuda.h
+++ clang/test/CodeGenCUDA/Inputs/cuda.h
@@ -2,19 +2,28 @@
 
 #include <stddef.h>
 
+#if __HIP__ || __CUDA__
 #define __constant__ __attribute__((constant))
 #define __device__ __attribute__((device))
 #define __global__ __attribute__((global))
 #define __host__ __attribute__((host))
 #define __shared__ __attribute__((shared))
 #define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
+#else
+#define __constant__
+#define __device__
+#define __global__
+#define __host__
+#define __shared__
+#define __launch_bounds__(...)
+#endif
 
 struct dim3 {
   unsigned x, y, z;
   __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
 };
 
-#ifdef __HIP__
+#if __HIP__ || HIP_PLATFORM
 typedef struct hipStream *hipStream_t;
 typedef enum hipError {} hipError_t;
 int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -41,12 +41,16 @@
   llvm::LLVMContext &Context;
   /// Convenience reference to the current module
   llvm::Module &TheModule;
-  /// Keeps track of kernel launch stubs emitted in this module
+  /// Keeps track of kernel launch stubs and handles emitted in this module
   struct KernelInfo {
-    llvm::Function *Kernel;
+    llvm::Function *Kernel; // stub function to help launch kernel
     const Decl *D;
   };
   llvm::SmallVector<KernelInfo, 16> EmittedKernels;
+  // Map a device stub function to a symbol for identifying kernel in host code.
+  // For CUDA, the symbol for identifying the kernel is the same as the device
+  // stub function. For HIP, they are different.
+  llvm::DenseMap<llvm::Function *, llvm::GlobalValue *> KernelHandles;
   struct VarInfo {
     llvm::GlobalVariable *Var;
     const VarDecl *D;
@@ -240,6 +244,18 @@
 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
                                      FunctionArgList &Args) {
   EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
+  llvm::GlobalValue *KernelHandle = CGF.CurFn;
+  if (CGF.getLangOpts().HIP) {
+    auto Linkage = CGF.CurFn->getLinkage();
+    auto *Var = new llvm::GlobalVariable(
+        TheModule, VoidPtrTy, /*isConstant=*/true, Linkage,
+        /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrTy),
+        CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl),
+                                      KernelReferenceKind::Kernel)));
+    Var->setAlignment(CGM.getPointerAlign().getAsAlign());
+    KernelHandle = Var;
+  }
+  KernelHandles[CGF.CurFn] = KernelHandle;
   if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
                          CudaFeature::CUDA_USES_NEW_LAUNCH) ||
       CGF.getLangOpts().HIPUseNewLaunchAPI)
@@ -318,7 +334,8 @@
                                ShmemSize.getPointer(), Stream.getPointer()});
 
   // Emit the call to cudaLaunch
-  llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy);
+  llvm::Value *Kernel =
+      CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], VoidPtrTy);
   CallArgList LaunchKernelArgs;
   LaunchKernelArgs.add(RValue::get(Kernel),
                        cudaLaunchKernelFD->getParamDecl(0)->getType());
@@ -375,7 +392,8 @@
 
   // Emit the call to cudaLaunch
   llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
-  llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
+  llvm::Value *Arg =
+      CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], CharPtrTy);
   CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
   CGF.EmitBranch(EndBlock);
 
@@ -428,7 +446,7 @@
     llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
     llvm::Value *Args[] = {
         &GpuBinaryHandlePtr,
-        Builder.CreateBitCast(I.Kernel, VoidPtrTy),
+        Builder.CreateBitCast(KernelHandles[I.Kernel], VoidPtrTy),
         KernelName,
         KernelName,
         llvm::ConstantInt::get(IntTy, -1),
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to