https://github.com/AbdallahRashed updated https://github.com/llvm/llvm-project/pull/195539
>From 2ca78905b86a977caee91d6c2f06a75812ba9da5 Mon Sep 17 00:00:00 2001 From: AbdallahRashed <[email protected]> Date: Mon, 4 May 2026 10:25:00 +0200 Subject: [PATCH] [CIR][CUDA] Support PseudoObjectExpr and CUDA builtin variables Implement emitPseudoObjectRValue and fix VisitPseudoObjectExpr in the scalar emitter to call it instead of errorNYI. Also remove the errorNYI guard for unique OpaqueValueExprs in the PseudoObjectExpr emission loop, matching the behavior of classic CodeGen. This unblocks CUDA builtin variable access (threadIdx, blockIdx, blockDim, gridDim) which goes through PseudoObjectExpr in the AST. Combined with the NVPTX builtin infrastructure already upstream (PR #195214), these variables now lower to cir.call_llvm_intrinsic "nvvm.read.ptx.sreg.*" operations via the generic intrinsic path. Partially addresses llvm/llvm-project#179278. --- clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 3 +- clang/lib/CIR/CodeGen/CIRGenFunction.cpp | 14 ++-- clang/lib/CIR/CodeGen/CIRGenFunction.h | 2 + .../test/CIR/CodeGenCUDA/cuda-builtin-vars.cu | 72 +++++++++++++++++++ 4 files changed, 82 insertions(+), 9 deletions(-) create mode 100644 clang/test/CIR/CodeGenCUDA/cuda-builtin-vars.cu diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 92b7156f3a3a8..231039ec5da29 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -272,8 +272,7 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> { convertType(e->getType()), e->getPackLength()); } mlir::Value VisitPseudoObjectExpr(PseudoObjectExpr *e) { - cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: pseudo object"); - return {}; + return cgf.emitPseudoObjectRValue(e).getValue(); } mlir::Value VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *e) { cgf.cgm.errorNYI(e->getSourceRange(), diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index 32b4881a93095..18c01b42b1016 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -1075,10 +1075,6 @@ emitPseudoObjectExpr(CIRGenFunction &cgf, const PseudoObjectExpr *e, // Skip unique OVEs. if (ov->isUnique()) { - // FIXME: This doesn't really affect anything, but I cannot find a test - // for this, so leave an ErrorNYI here until we can find one. - cgf.cgm.errorNYI(e->getSourceRange(), - "emitPseudoObjectExpr skipped for uniqueness"); assert(ov != resultExpr && "A unique OVE cannot be used as the result expression"); continue; @@ -1114,9 +1110,7 @@ emitPseudoObjectExpr(CIRGenFunction &cgf, const PseudoObjectExpr *e, if (forLValue) result = cgf.emitLValue(semantic); else - cgf.cgm.errorNYI( - e->getSourceRange(), - "emitPseudoObjectExpr as an RValue, when semantic is result"); + result = cgf.emitAnyExpr(semantic, slot); } else { // FIXME: best I can tell, this is only reachable as an r-value, so this // isn't properly tested. @@ -1130,6 +1124,12 @@ emitPseudoObjectExpr(CIRGenFunction &cgf, const PseudoObjectExpr *e, return result; } +RValue CIRGenFunction::emitPseudoObjectRValue(const PseudoObjectExpr *e, + AggValueSlot slot) { + return std::get<RValue>( + emitPseudoObjectExpr(*this, e, /*forLValue=*/false, slot)); +} + LValue CIRGenFunction::emitPseudoObjectLValue(const PseudoObjectExpr *e) { return std::get<LValue>(emitPseudoObjectExpr(*this, e, /*forLValue=*/true, AggValueSlot::ignored())); diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index d58c0556aada6..db698c67ac52e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1563,6 +1563,8 @@ class CIRGenFunction : public CIRGenTypeCache { AutoVarEmission emitAutoVarAlloca(const clang::VarDecl &d, mlir::OpBuilder::InsertPoint ip = {}); + RValue emitPseudoObjectRValue(const PseudoObjectExpr *e, + AggValueSlot slot = AggValueSlot::ignored()); LValue emitPseudoObjectLValue(const PseudoObjectExpr *E); /// Emit code and set up symbol table for a variable declaration with auto, diff --git a/clang/test/CIR/CodeGenCUDA/cuda-builtin-vars.cu b/clang/test/CIR/CodeGenCUDA/cuda-builtin-vars.cu new file mode 100644 index 0000000000000..67863decf980e --- /dev/null +++ b/clang/test/CIR/CodeGenCUDA/cuda-builtin-vars.cu @@ -0,0 +1,72 @@ +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -fcuda-is-device -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -fcuda-is-device -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t.ogcg.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s + +#include "__clang_cuda_builtin_vars.h" + +__attribute__((global)) +void kernel(int *out) { + int i = 0; + out[i++] = threadIdx.x; + out[i++] = threadIdx.y; + out[i++] = threadIdx.z; + + out[i++] = blockIdx.x; + out[i++] = blockIdx.y; + out[i++] = blockIdx.z; + + out[i++] = blockDim.x; + out[i++] = blockDim.y; + out[i++] = blockDim.z; + + out[i++] = gridDim.x; + out[i++] = gridDim.y; + out[i++] = gridDim.z; +} + +// CIR-DAG: cir.call_llvm_intrinsic "nvvm.read.ptx.sreg.tid.x" +// CIR-DAG: cir.call_llvm_intrinsic "nvvm.read.ptx.sreg.tid.y" +// CIR-DAG: cir.call_llvm_intrinsic "nvvm.read.ptx.sreg.tid.z" +// CIR-DAG: cir.call_llvm_intrinsic "nvvm.read.ptx.sreg.ctaid.x" +// CIR-DAG: cir.call_llvm_intrinsic "nvvm.read.ptx.sreg.ctaid.y" +// CIR-DAG: cir.call_llvm_intrinsic "nvvm.read.ptx.sreg.ctaid.z" +// CIR-DAG: cir.call_llvm_intrinsic "nvvm.read.ptx.sreg.ntid.x" +// CIR-DAG: cir.call_llvm_intrinsic "nvvm.read.ptx.sreg.ntid.y" +// CIR-DAG: cir.call_llvm_intrinsic "nvvm.read.ptx.sreg.ntid.z" +// CIR-DAG: cir.call_llvm_intrinsic "nvvm.read.ptx.sreg.nctaid.x" +// CIR-DAG: cir.call_llvm_intrinsic "nvvm.read.ptx.sreg.nctaid.y" +// CIR-DAG: cir.call_llvm_intrinsic "nvvm.read.ptx.sreg.nctaid.z" + +// LLVM-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// LLVM-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.y() +// LLVM-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.z() +// LLVM-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() +// LLVM-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() +// LLVM-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() +// LLVM-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// LLVM-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() +// LLVM-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() +// LLVM-DAG: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() +// LLVM-DAG: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() +// LLVM-DAG: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() + +// OGCG-DAG: call noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// OGCG-DAG: call noundef i32 @llvm.nvvm.read.ptx.sreg.tid.y() +// OGCG-DAG: call noundef i32 @llvm.nvvm.read.ptx.sreg.tid.z() +// OGCG-DAG: call noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() +// OGCG-DAG: call noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() +// OGCG-DAG: call noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() +// OGCG-DAG: call noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// OGCG-DAG: call noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.y() +// OGCG-DAG: call noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.z() +// OGCG-DAG: call noundef i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() +// OGCG-DAG: call noundef i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() +// OGCG-DAG: call noundef i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
