llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-codegen

<details>
<summary>Changes</summary>

CUDA device code needs to be registered to the runtime before kernels can be 
launched. This is done through a global constructor. User code in Clang 
interpreter, is also executed through `global_ctors`. This patch ensures 
kernels can be launched in the same iteration it is defined in by making the 
registration first in the list.

This allows `#include`-ing a large portion of code that defines device 
functions and also launches kernels in clang-repl. 

---
Full diff: https://github.com/llvm/llvm-project/pull/66658.diff


2 Files Affected:

- (modified) clang/lib/CodeGen/CodeGenModule.cpp (+1-1) 
- (added) clang/test/Interpreter/CUDA/launch-same-ptu.cu (+21) 


``````````diff
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 8b0c9340775cbe9..783865409c778f5 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -794,7 +794,7 @@ void CodeGenModule::Release() {
       AddGlobalCtor(ObjCInitFunction);
   if (Context.getLangOpts().CUDA && CUDARuntime) {
     if (llvm::Function *CudaCtorFunction = CUDARuntime->finalizeModule())
-      AddGlobalCtor(CudaCtorFunction);
+      AddGlobalCtor(CudaCtorFunction, 0);
   }
   if (OpenMPRuntime) {
     if (llvm::Function *OpenMPRequiresDirectiveRegFun =
diff --git a/clang/test/Interpreter/CUDA/launch-same-ptu.cu 
b/clang/test/Interpreter/CUDA/launch-same-ptu.cu
new file mode 100644
index 000000000000000..93e203a47212fbf
--- /dev/null
+++ b/clang/test/Interpreter/CUDA/launch-same-ptu.cu
@@ -0,0 +1,21 @@
+// Tests __device__ function calls
+// RUN: cat %s | clang-repl --cuda | FileCheck %s
+
+extern "C" int printf(const char*, ...);
+
+int var;
+int* devptr = nullptr;
+printf("cudaMalloc: %d\n", cudaMalloc((void **) &devptr, sizeof(int)));
+// CHECK: cudaMalloc: 0
+
+__device__ inline void test_device(int* value) { *value = 42; } __global__ 
void test_kernel(int* value) { test_device(value); } 
test_kernel<<<1,1>>>(devptr);
+printf("CUDA Error: %d\n", cudaGetLastError());
+// CHECK-NEXT: CUDA Error: 0
+
+printf("cudaMemcpy: %d\n", cudaMemcpy(&var, devptr, sizeof(int), 
cudaMemcpyDeviceToHost));
+// CHECK-NEXT: cudaMemcpy: 0
+
+printf("Value: %d\n", var);
+// CHECK-NEXT: Value: 42
+
+%quit

``````````

</details>


https://github.com/llvm/llvm-project/pull/66658
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to