llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clangir Author: David Rivera (RiverDave) <details> <summary>Changes</summary> Note that this is the start of a series of incremental patches; Therefore the stub body is empty for now as it requires a portion of the actual CUDA runtime implementation to be deferred for a later PR. --- Full diff: https://github.com/llvm/llvm-project/pull/177790.diff 4 Files Affected: - (modified) clang/lib/CIR/CodeGen/CIRGenFunction.cpp (+3-1) - (modified) clang/lib/CIR/CodeGen/CIRGenModule.cpp (+8-2) - (added) clang/test/CIR/CodeGen/CUDA/kernel-stub-name.cu (+22) - (added) clang/test/CIR/CodeGen/inputs/cuda.h (+74) ``````````diff diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index f2d73720a9c2b..4c212b06019ea 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -748,7 +748,9 @@ cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn, emitConstructorBody(args); } else if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice && funcDecl->hasAttr<CUDAGlobalAttr>()) { - getCIRGenModule().errorNYI(bodyRange, "CUDA kernel"); + // TODO(cir): Emit device stub body with kernel launch runtime calls + // (emitDeviceStub). For now, emit an empty stub. + assert(!cir::MissingFeatures::cudaSupport()); } else if (isa<CXXMethodDecl>(funcDecl) && cast<CXXMethodDecl>(funcDecl)->isLambdaStaticInvoker()) { // The lambda static invoker function is special, because it forwards or diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 61d84f197e6ec..b535eab913a5d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -1772,9 +1772,15 @@ static std::string getMangledNameImpl(CIRGenModule &cgm, GlobalDecl gd, cgm.errorNYI(nd->getSourceRange(), "getMangledName: X86RegCall"); } else if (fd && fd->hasAttr<CUDAGlobalAttr>() && gd.getKernelReferenceKind() == KernelReferenceKind::Stub) { - cgm.errorNYI(nd->getSourceRange(), "getMangledName: CUDA device stub"); + out << "__device_stub__" << ii->getName(); + } else if (fd && + DeviceKernelAttr::isOpenCLSpelling( + fd->getAttr<DeviceKernelAttr>()) && + gd.getKernelReferenceKind() == KernelReferenceKind::Stub) { + cgm.errorNYI(nd->getSourceRange(), "getMangledName: OpenCL Stub"); + } else { + out << ii->getName(); } - out << ii->getName(); } // Check if the module name hash should be appended for internal linkage diff --git a/clang/test/CIR/CodeGen/CUDA/kernel-stub-name.cu b/clang/test/CIR/CodeGen/CUDA/kernel-stub-name.cu new file mode 100644 index 0000000000000..6d5efb69827e3 --- /dev/null +++ b/clang/test/CIR/CodeGen/CUDA/kernel-stub-name.cu @@ -0,0 +1,22 @@ +// Based on clang/test/CodeGenCUDA/kernel-stub-name.cu. + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s \ +// RUN: -x cuda -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s + +#include "../inputs/cuda.h" + +// CHECK: cir.func {{.*}} @__device_stub__ckernel() +// CHECK-NEXT: cir.return +// CHECK-NEXT: } +extern "C" __global__ void ckernel() {} + +// CHECK: cir.func {{.*}} @_ZN2ns23__device_stub__nskernelEv() +namespace ns { +__global__ void nskernel() {} +} // namespace ns + +// CHECK: cir.func {{.*}} @_Z25__device_stub__kernelfuncIiEvv() +template <class T> +__global__ void kernelfunc() {} +template __global__ void kernelfunc<int>(); diff --git a/clang/test/CIR/CodeGen/inputs/cuda.h b/clang/test/CIR/CodeGen/inputs/cuda.h new file mode 100644 index 0000000000000..204bf2972088d --- /dev/null +++ b/clang/test/CIR/CodeGen/inputs/cuda.h @@ -0,0 +1,74 @@ +/* Minimal declarations for CUDA support. Testing purposes only. */ +/* From test/CodeGenCUDA/Inputs/cuda.h. */ +#include <stddef.h> + +#if __HIP__ || __CUDA__ +#define __constant__ __attribute__((constant)) +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __host__ __attribute__((host)) +#define __shared__ __attribute__((shared)) +#if __HIP__ +#define __managed__ __attribute__((managed)) +#endif +#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__))) +#define __grid_constant__ __attribute__((grid_constant)) +#else +#define __constant__ +#define __device__ +#define __global__ +#define __host__ +#define __shared__ +#define __managed__ +#define __launch_bounds__(...) +#define __grid_constant__ +#endif + +struct dim3 { + unsigned x, y, z; + __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {} +}; + +#if __HIP__ || HIP_PLATFORM +typedef struct hipStream *hipStream_t; +typedef enum hipError {} hipError_t; +int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0, + hipStream_t stream = 0); +extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + hipStream_t stream = 0); +#ifndef __HIP_API_PER_THREAD_DEFAULT_STREAM__ +extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + size_t sharedMem, + hipStream_t stream); +#else +extern "C" hipError_t hipLaunchKernel_spt(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + size_t sharedMem, + hipStream_t stream); +#endif // __HIP_API_PER_THREAD_DEFAULT_STREAM__ +#elif __OFFLOAD_VIA_LLVM__ +extern "C" unsigned __llvmPushCallConfiguration(dim3 gridDim, dim3 blockDim, + size_t sharedMem = 0, void *stream = 0); +extern "C" unsigned llvmLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, + void **args, size_t sharedMem = 0, void *stream = 0); +#else +typedef struct cudaStream *cudaStream_t; +typedef enum cudaError {} cudaError_t; +extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + size_t sharedMem, cudaStream_t stream); +extern "C" cudaError_t cudaLaunchKernel_ptsz(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + size_t sharedMem, cudaStream_t stream); + +#endif + +extern "C" __device__ int printf(const char*, ...); `````````` </details> https://github.com/llvm/llvm-project/pull/177790 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
