anutosh491 wrote:
This works as expected now !
```
anutosh491@vv-nuc:/build/anutosh491/llvm-project/build2/bin$ ./clang-repl --cuda
clang-repl> extern "C" int printf(const char*, ...);
clang-repl> template <typename T> __device__ inline T sum(T a, T b) { return a
+ b; }
clang-repl> __global__ void test_kernel(int* value) { *value = sum(40, 2); }
clang-repl> int var;
clang-repl> int* devptr = nullptr;
clang-repl> printf("cudaMalloc: %d\n", cudaMalloc((void **) &devptr,
sizeof(int)));
cudaMalloc: 0
clang-repl> test_kernel<<<1,1>>>(devptr);
clang-repl> printf("CUDA Error: %d\n", cudaGetLastError());
CUDA Error: 0
clang-repl> printf("cudaMemcpy: %d\n", cudaMemcpy(&var, devptr, sizeof(int),
cudaMemcpyDeviceToHost));
cudaMemcpy: 0
clang-repl> printf("Value: %d\n", var);
Value: 42
clang-repl>
```
https://github.com/llvm/llvm-project/pull/137458
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits