Author: David Rivera Date: 2026-02-19T14:49:59-05:00 New Revision: 297965495b8b338c2becc0b3f354a476d90aba20
URL: https://github.com/llvm/llvm-project/commit/297965495b8b338c2becc0b3f354a476d90aba20 DIFF: https://github.com/llvm/llvm-project/commit/297965495b8b338c2becc0b3f354a476d90aba20.diff LOG: [CIR][HIP] Add Stub body emission test coverage and Fix kernelHandle storage (#179823) Related: https://github.com/llvm/llvm-project/issues/179278, https://github.com/llvm/llvm-project/issues/175871 Besides adding extra test coverage for hip: Given that HIP represents kernel handles as a global Op. In my previous patch there was a slight miss-use of the CIR api. since `getOrCreateCIRGlobal` expects a VarDecl for location info but `gd.getDecl()` (in this scope) is a FunctionDecl, trying to cast it to the proper type would provoke that value to be null. Causing a crash when generating the proper mlir location from the AST. here: https://github.com/llvm/llvm-project/blob/fab5b1858d02ffed88b76d33f7c691ee4e0c82fb/clang/lib/CIR/CodeGen/CIRGenModule.cpp#L731 Fixed it by using `createGlobalOp` which allows to use the location from the proper funcOp itself. Added: Modified: clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp clang/test/CIR/CodeGenCUDA/kernel-call.cu Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp index ad5da0d11ff02..451c28c3cccc1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp @@ -121,9 +121,6 @@ void CIRGenNVCUDARuntime::emitDeviceStubBodyNew(CIRGenFunction &cgf, if (cgm.getLangOpts().OffloadViaLLVM) cgm.errorNYI("CIRGenNVCUDARuntime: Offload via LLVM"); - if (cgm.getLangOpts().HIP) - cgm.errorNYI("CIRGenNVCUDARuntime: HIP Support"); - CIRGenBuilderTy &builder = cgm.getBuilder(); mlir::Location loc = fn.getLoc(); @@ -213,7 +210,8 @@ void CIRGenNVCUDARuntime::emitDeviceStubBodyNew(CIRGenFunction &cgf, cir::PointerType kernelTy = cir::PointerType::get(globalOp.getSymType()); mlir::Value kernelVal = cir::GetGlobalOp::create(builder, loc, kernelTy, globalOp.getSymName()); - return kernelVal; + mlir::Value func = builder.createBitcast(kernelVal, cgm.voidPtrTy); + return func; } if (cir::FuncOp funcOp = llvm::dyn_cast_or_null<cir::FuncOp>( kernelHandles[fn.getSymName()])) { @@ -325,10 +323,9 @@ mlir::Operation *CIRGenNVCUDARuntime::getKernelHandle(cir::FuncOp fn, CIRGenBuilderTy &builder = cgm.getBuilder(); StringRef globalName = cgm.getMangledName( gd.getWithKernelReferenceKind(KernelReferenceKind::Kernel)); - const VarDecl *varDecl = llvm::dyn_cast_or_null<VarDecl>(gd.getDecl()); - cir::GlobalOp globalOp = - cgm.getOrCreateCIRGlobal(globalName, fn.getFunctionType().getReturnType(), - LangAS::Default, varDecl, NotForDefinition); + cir::GlobalOp globalOp = CIRGenModule::createGlobalOp( + cgm, fn.getLoc(), globalName, fn.getFunctionType(), + /*isConstant=*/true); globalOp->setAttr("alignment", builder.getI64IntegerAttr( cgm.getPointerAlign().getQuantity())); diff --git a/clang/test/CIR/CodeGenCUDA/kernel-call.cu b/clang/test/CIR/CodeGenCUDA/kernel-call.cu index ccc33461567bf..05c48625335a6 100644 --- a/clang/test/CIR/CodeGenCUDA/kernel-call.cu +++ b/clang/test/CIR/CodeGenCUDA/kernel-call.cu @@ -1,16 +1,19 @@ // Based on clang/test/CodeGenCUDA/kernel-call.cu. -// Tests device stub body emission for CUDA kernels. +// Tests device stub body emission for CUDA and HIP kernels. // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-sdk-version=9.2 \ // RUN: -emit-cir %s -x cuda -o %t.cir // RUN: FileCheck --input-file=%t.cir %s --check-prefix=CUDA-NEW +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fhip-new-launch-api \ +// RUN: -x hip -emit-cir %s -o %t.hip.cir +// RUN: FileCheck --input-file=%t.hip.cir %s --check-prefix=HIP-NEW + #include "Inputs/cuda.h" // TODO: Test CUDA legacy (< 9.0) when legacy stub body is implemented -// TODO: Test HIP when HIP stub body support is complete // Check that the stub function is generated with the correct name // CUDA-NEW-LABEL: cir.func {{.*}} @_Z21__device_stub__kernelif @@ -48,5 +51,11 @@ // Check cudaLaunchKernel is called with all 6 arguments: // func ptr, gridDim, blockDim, args, sharedMem, stream // CUDA-NEW: cir.call @cudaLaunchKernel({{.*}}) : (!cir.ptr<!void>, !rec_dim3, !rec_dim3, !cir.ptr<!cir.ptr<!void>>, !u64i, !cir.ptr<!rec_cudaStream>) -> (!u32i {llvm.noundef}) - +// +// HIP-NEW: cir.global constant external @_Z6kernelif = #cir.global_view<@_Z21__device_stub__kernelif> : !cir.func<(!s32i, !cir.float)> +// HIP-NEW-LABEL: cir.func {{.*}} @_Z21__device_stub__kernelif +// HIP-NEW: cir.alloca !cir.ptr<!rec_hipStream>, {{.*}} ["stream"] +// HIP-NEW: cir.call @__hipPopCallConfiguration({{.*}}) : (!cir.ptr<!rec_dim3>, !cir.ptr<!rec_dim3>, !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!rec_hipStream>>) -> !s32i +// HIP-NEW: cir.get_global @_Z6kernelif : !cir.ptr<!cir.func<(!s32i, !cir.float)>> +// HIP-NEW: cir.call @hipLaunchKernel({{.*}}) : (!cir.ptr<!void>, !rec_dim3, !rec_dim3, !cir.ptr<!cir.ptr<!void>>, !u64i, !cir.ptr<!rec_hipStream>) -> (!u32i {llvm.noundef}) __global__ void kernel(int x, float y) {} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
