llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen <details> <summary>Changes</summary> CUDA device code needs to be registered to the runtime before kernels can be launched. This is done through a global constructor. User code in Clang interpreter, is also executed through `global_ctors`. This patch ensures kernels can be launched in the same iteration it is defined in by making the registration first in the list. This allows `#include`-ing a large portion of code that defines device functions and also launches kernels in clang-repl. --- Full diff: https://github.com/llvm/llvm-project/pull/66658.diff 2 Files Affected: - (modified) clang/lib/CodeGen/CodeGenModule.cpp (+1-1) - (added) clang/test/Interpreter/CUDA/launch-same-ptu.cu (+21) ``````````diff diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 8b0c9340775cbe9..783865409c778f5 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -794,7 +794,7 @@ void CodeGenModule::Release() { AddGlobalCtor(ObjCInitFunction); if (Context.getLangOpts().CUDA && CUDARuntime) { if (llvm::Function *CudaCtorFunction = CUDARuntime->finalizeModule()) - AddGlobalCtor(CudaCtorFunction); + AddGlobalCtor(CudaCtorFunction, 0); } if (OpenMPRuntime) { if (llvm::Function *OpenMPRequiresDirectiveRegFun = diff --git a/clang/test/Interpreter/CUDA/launch-same-ptu.cu b/clang/test/Interpreter/CUDA/launch-same-ptu.cu new file mode 100644 index 000000000000000..93e203a47212fbf --- /dev/null +++ b/clang/test/Interpreter/CUDA/launch-same-ptu.cu @@ -0,0 +1,21 @@ +// Tests __device__ function calls +// RUN: cat %s | clang-repl --cuda | FileCheck %s + +extern "C" int printf(const char*, ...); + +int var; +int* devptr = nullptr; +printf("cudaMalloc: %d\n", cudaMalloc((void **) &devptr, sizeof(int))); +// CHECK: cudaMalloc: 0 + +__device__ inline void test_device(int* value) { *value = 42; } __global__ void test_kernel(int* value) { test_device(value); } test_kernel<<<1,1>>>(devptr); +printf("CUDA Error: %d\n", cudaGetLastError()); +// CHECK-NEXT: CUDA Error: 0 + +printf("cudaMemcpy: %d\n", cudaMemcpy(&var, devptr, sizeof(int), cudaMemcpyDeviceToHost)); +// CHECK-NEXT: cudaMemcpy: 0 + +printf("Value: %d\n", var); +// CHECK-NEXT: Value: 42 + +%quit `````````` </details> https://github.com/llvm/llvm-project/pull/66658 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits