llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: David Rivera (RiverDave) <details> <summary>Changes</summary> This patch aims to improve parity with OG codegen on targets with non-flat alloca address space. I observed this after getting some crashes while compiling PolybenchGpu for HIP (amdgpu). This work had previously been merged in the incubator, most notably: https://github.com/llvm/clangir/pull/2090, https://github.com/llvm/clangir/pull/2088. CIR currently returns the raw `cir.alloca` address from temporary/local alloca creation. On AMDGPU, stack allocas live in private addrspace(5), but ordinary C/C++/HIP auto variables are still used through the language-visible generic/flat address space. OG CodeGen handles this by creating the alloca in the target stack address space and immediately casting it to the language-visible address space when those differ. For example: ```llvm %fmt = alloca [6 x i8], align 1, addrspace(5) %tmp = alloca ptr, align 8, addrspace(5) %fmt.ascast = addrspacecast ptr addrspace(5) %fmt to ptr %tmp.ascast = addrspacecast ptr addrspace(5) %tmp to ptr %arraydecay = getelementptr inbounds [6 x i8], ptr %fmt.ascast, i64 0, i64 0 store ptr %arraydecay, ptr %tmp.ascast, align 8 ``` I've also added a helper to recover the underlying alloca through casts for callers that need to annotate the original cir.alloca, and preserves source pointer address spaces when creating pointer bitcasts. --- Full diff: https://github.com/llvm/llvm-project/pull/196868.diff 9 Files Affected: - (modified) clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h (+3-1) - (modified) clang/lib/CIR/CodeGen/Address.h (+14) - (modified) clang/lib/CIR/CodeGen/CIRGenDecl.cpp (+5-4) - (modified) clang/lib/CIR/CodeGen/CIRGenExpr.cpp (+45-20) - (modified) clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp (+4-1) - (modified) clang/lib/CIR/CodeGen/CIRGenFunction.cpp (+7-6) - (modified) clang/lib/CIR/CodeGen/CIRGenFunction.h (+11) - (modified) clang/test/CIR/CodeGen/amdgpu-call-addrspace-cast.cpp (+2-2) - (added) clang/test/CIR/CodeGen/amdgpu-stack-alloca-array-decay.cpp (+42) ``````````diff diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 646fc7eb3c226..82a81ab4db596 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -544,7 +544,9 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { mlir::Value createPtrBitcast(mlir::Value src, mlir::Type newPointeeTy) { assert(mlir::isa<cir::PointerType>(src.getType()) && "expected ptr src"); - return createBitcast(src, getPointerTo(newPointeeTy)); + cir::PointerType srcPtrTy = mlir::cast<cir::PointerType>(src.getType()); + return createBitcast(src, + getPointerTo(newPointeeTy, srcPtrTy.getAddrSpace())); } mlir::Value createPtrIsNull(mlir::Value ptr) { diff --git a/clang/lib/CIR/CodeGen/Address.h b/clang/lib/CIR/CodeGen/Address.h index b459ec948c2c4..22fe1d83f7869 100644 --- a/clang/lib/CIR/CodeGen/Address.h +++ b/clang/lib/CIR/CodeGen/Address.h @@ -18,6 +18,7 @@ #include "mlir/IR/Value.h" #include "clang/AST/CharUnits.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" #include "llvm/ADT/PointerIntPair.h" @@ -146,6 +147,19 @@ class Address { return mlir::dyn_cast_or_null<OpTy>(getDefiningOp()); } + /// Return the underlying alloca for this address, if any. + /// + /// Addresses may refer to an alloca through a cast, for example when a target + /// stack address space is cast to the language-visible address space. Peel + /// those cast ops so callers that need to annotate the original alloca can + /// still find it. + cir::AllocaOp getUnderlyingAllocaOp() const { + mlir::Value ptr = getPointer(); + while (auto castOp = ptr.getDefiningOp<cir::CastOp>()) + ptr = castOp.getSrc(); + return ptr.getDefiningOp<cir::AllocaOp>(); + } + /// Whether the pointer is known not to be null. bool isKnownNonNull() const { assert(isValid() && "Invalid address"); diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp index 2bf90e99eb7d3..94e2bec69b6c4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -22,6 +22,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/Basic/Cuda.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/MissingFeatures.h" using namespace clang; @@ -203,7 +204,7 @@ static void emitStoresForConstant(CIRGenModule &cgm, const VarDecl &d, // The address is usually and alloca, but there is at least one case where // emitAutoVarInit is called from the OpenACC codegen with an address that // is not an alloca. - auto allocaOp = addr.getDefiningOp<cir::AllocaOp>(); + cir::AllocaOp allocaOp = addr.getUnderlyingAllocaOp(); if (allocaOp) allocaOp.setInitAttr(mlir::UnitAttr::get(&cgm.getMLIRContext())); @@ -303,9 +304,9 @@ void CIRGenFunction::emitAutoVarInit( if (!emission.wasEmittedAsOffloadClause()) { // In case lv has uses it means we indeed initialized something // out of it while trying to build the expression, mark it as such. - mlir::Value val = lv.getAddress().getPointer(); - assert(val && "Should have an address"); - auto allocaOp = val.getDefiningOp<cir::AllocaOp>(); + Address addr = lv.getAddress(); + assert(addr.isValid() && "Should have an address"); + cir::AllocaOp allocaOp = addr.getUnderlyingAllocaOp(); assert(allocaOp && "Address should come straight out of the alloca"); if (!allocaOp.use_empty()) diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index cb53430438219..f6e1245d8c298 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -491,7 +491,7 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr, // Update the alloca with more info on initialization. assert(addr.getPointer() && "expected pointer to exist"); - auto srcAlloca = addr.getDefiningOp<cir::AllocaOp>(); + cir::AllocaOp srcAlloca = addr.getUnderlyingAllocaOp(); if (currVarDecl && srcAlloca) { const VarDecl *vd = currVarDecl; assert(vd && "VarDecl expected"); @@ -1878,7 +1878,7 @@ static Address createReferenceTemporary(CIRGenFunction &cgf, if (const ValueDecl *extDecl = m->getExtendingDecl()) { auto extDeclAddrIter = cgf.localDeclMap.find(extDecl); if (extDeclAddrIter != cgf.localDeclMap.end()) - extDeclAlloca = extDeclAddrIter->second.getDefiningOp<cir::AllocaOp>(); + extDeclAlloca = extDeclAddrIter->second.getUnderlyingAllocaOp(); } mlir::OpBuilder::InsertPoint ip; if (extDeclAlloca) { @@ -2720,8 +2720,9 @@ Address CIRGenFunction::createMemTemp(QualType ty, CharUnits align, mlir::Location loc, const Twine &name, Address *alloca, mlir::OpBuilder::InsertPoint ip) { - Address result = createTempAlloca(convertTypeForMem(ty), align, loc, name, - /*ArraySize=*/nullptr, alloca, ip); + Address result = + createTempAlloca(convertTypeForMem(ty), /*destAddrSpace=*/{}, align, loc, + name, /*arraySize=*/nullptr, alloca, ip); if (ty->isConstantMatrixType()) { assert(!cir::MissingFeatures::matrixType()); cgm.errorNYI(loc, "temporary matrix value"); @@ -2741,33 +2742,57 @@ Address CIRGenFunction::createTempAllocaWithoutCast( return Address(alloca, ty, align); } -/// This creates a alloca and inserts it into the entry block. The alloca is -/// casted to default address space if necessary. -// TODO(cir): Implement address space casting to match classic codegen's -// CreateTempAlloca behavior with DestLangAS parameter Address CIRGenFunction::createTempAlloca(mlir::Type ty, CharUnits align, mlir::Location loc, const Twine &name, mlir::Value arraySize, Address *allocaAddr, mlir::OpBuilder::InsertPoint ip) { - Address alloca = - createTempAllocaWithoutCast(ty, align, loc, name, arraySize, ip); - if (allocaAddr) - *allocaAddr = alloca; - mlir::Value v = alloca.getPointer(); + return createTempAlloca(ty, /*destAddrSpace=*/{}, align, loc, name, arraySize, + allocaAddr, ip); +} + +Address CIRGenFunction::maybeCastStackAddressSpace( + Address alloca, mlir::ptr::MemorySpaceAttrInterface destAddrSpace, + mlir::Value arraySize) { + if (!destAddrSpace) + destAddrSpace = cir::toCIRAddressSpaceAttr( + getMLIRContext(), cgm.getLangTempAllocaAddressSpace()); + + mlir::ptr::MemorySpaceAttrInterface srcAddrSpace = getCIRAllocaAddressSpace(); // Alloca always returns a pointer in alloca address space, which may // be different from the type defined by the language. For example, // in C++ the auto variables are in the default address space. Therefore // cast alloca to the default address space when necessary. + if (srcAddrSpace == destAddrSpace) + return alloca; - cir::PointerType dstTy; - if (getCIRAllocaAddressSpace()) - dstTy = builder.getPointerTo(ty, getCIRAllocaAddressSpace()); - else - dstTy = builder.getPointerTo(ty, clang::LangAS::Default); - v = performAddrSpaceCast(v, dstTy); + mlir::OpBuilder::InsertionGuard guard(builder); + if (!arraySize) { + mlir::Block *entryBlock = getCurFunctionEntryBlock(); + builder.restoreInsertionPoint(builder.getBestAllocaInsertPoint(entryBlock)); + } else if (cir::AllocaOp allocaOp = alloca.getUnderlyingAllocaOp()) { + builder.setInsertionPointAfter(allocaOp); + } + + mlir::Type destPtrTy = + builder.getPointerTo(alloca.getElementType(), destAddrSpace); + mlir::Value casted = performAddrSpaceCast(alloca.getPointer(), destPtrTy); + return Address(casted, alloca.getElementType(), alloca.getAlignment(), + /*isKnownNonNull=*/true); +} - return Address(v, ty, align); +/// This creates a alloca and inserts it into the entry block. The alloca is +/// casted to the requested language address space if necessary. +Address CIRGenFunction::createTempAlloca( + mlir::Type ty, mlir::ptr::MemorySpaceAttrInterface destAddrSpace, + CharUnits align, mlir::Location loc, const Twine &name, + mlir::Value arraySize, Address *allocaAddr, + mlir::OpBuilder::InsertPoint ip) { + Address alloca = + createTempAllocaWithoutCast(ty, align, loc, name, arraySize, ip); + if (allocaAddr) + *allocaAddr = alloca; + return maybeCastStackAddressSpace(alloca, destAddrSpace, arraySize); } /// This creates an alloca and inserts it into the entry block if \p ArraySize diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp index c7b11aa3f0009..5d2a9f57cdc00 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp @@ -19,6 +19,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/Basic/OperatorKinds.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/MissingFeatures.h" #include "llvm/ADT/Sequence.h" #include "llvm/Support/TrailingObjects.h" @@ -963,7 +964,9 @@ static void enterNewDeleteCleanup(CIRGenFunction &cgf, const CXXNewExpr *e, typedef mlir::Value ValueTy; typedef mlir::Value RValueTy; static RValue get(CIRGenFunction &cgf, ValueTy v) { - auto alloca = v.getDefiningOp<cir::AllocaOp>(); + while (auto castOp = v.getDefiningOp<cir::CastOp>()) + v = castOp.getSrc(); + cir::AllocaOp alloca = v.getDefiningOp<cir::AllocaOp>(); return RValue::get(cgf.getBuilder().createAlignedLoad( alloca.getLoc(), alloca.getAllocaType(), alloca, llvm::MaybeAlign(alloca.getAlignment()))); diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index 3e38f678a7474..e81649d805d04 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/GlobalDecl.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/MissingFeatures.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/IR/FPEnv.h" @@ -218,10 +219,9 @@ bool CIRGenFunction::constantFoldsToSimpleInteger(const Expr *cond, void CIRGenFunction::emitAndUpdateRetAlloca(QualType type, mlir::Location loc, CharUnits alignment) { if (!type->isVoidType()) { - mlir::Value addr = emitAlloca("__retval", convertType(type), loc, alignment, - /*insertIntoFnEntryBlock=*/false); - fnRetAlloca = addr; - returnValue = Address(addr, alignment); + Address allocaAddr = Address::invalid(); + returnValue = createMemTemp(type, alignment, loc, "__retval", &allocaAddr); + fnRetAlloca = allocaAddr.getPointer(); } } @@ -231,7 +231,8 @@ void CIRGenFunction::declare(mlir::Value addrVal, const Decl *var, QualType ty, assert(isa<NamedDecl>(var) && "Needs a named decl"); assert(!symbolTable.count(var) && "not supposed to be available just yet"); - auto allocaOp = addrVal.getDefiningOp<cir::AllocaOp>(); + Address addr(addrVal, convertTypeForMem(ty), alignment); + cir::AllocaOp allocaOp = addr.getUnderlyingAllocaOp(); assert(allocaOp && "expected cir::AllocaOp"); if (isParam) @@ -239,7 +240,7 @@ void CIRGenFunction::declare(mlir::Value addrVal, const Decl *var, QualType ty, if (ty->isReferenceType() || ty.isConstQualified()) allocaOp.setConstantAttr(mlir::UnitAttr::get(&getMLIRContext())); - symbolTable.insert(var, allocaOp); + symbolTable.insert(var, addrVal); } void CIRGenFunction::LexicalScope::cleanup() { diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index de91b2f903018..cb5516da97f9a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -2283,11 +2283,22 @@ class CIRGenFunction : public CIRGenTypeCache { mlir::Value arraySize = nullptr, Address *alloca = nullptr, mlir::OpBuilder::InsertPoint ip = {}); + Address createTempAlloca(mlir::Type ty, + mlir::ptr::MemorySpaceAttrInterface destAddrSpace, + CharUnits align, mlir::Location loc, + const Twine &name = "tmp", + mlir::Value arraySize = nullptr, + Address *alloca = nullptr, + mlir::OpBuilder::InsertPoint ip = {}); Address createTempAllocaWithoutCast(mlir::Type ty, CharUnits align, mlir::Location loc, const Twine &name = "tmp", mlir::Value arraySize = nullptr, mlir::OpBuilder::InsertPoint ip = {}); + Address + maybeCastStackAddressSpace(Address alloca, + mlir::ptr::MemorySpaceAttrInterface destAddrSpace, + mlir::Value arraySize); Address createDefaultAlignTempAlloca(mlir::Type ty, mlir::Location loc, const Twine &name); diff --git a/clang/test/CIR/CodeGen/amdgpu-call-addrspace-cast.cpp b/clang/test/CIR/CodeGen/amdgpu-call-addrspace-cast.cpp index 6b98ea10fceac..3217addeca800 100644 --- a/clang/test/CIR/CodeGen/amdgpu-call-addrspace-cast.cpp +++ b/clang/test/CIR/CodeGen/amdgpu-call-addrspace-cast.cpp @@ -30,12 +30,12 @@ void call_with_global_ptr() { // CIR-LABEL: cir.func{{.*}} @_Z19call_with_local_ptrv() // CIR: %[[ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i, target_address_space(5)> // CIR: %[[CAST:.*]] = cir.cast address_space %[[ALLOCA]] : !cir.ptr<!s32i, target_address_space(5)> -> !cir.ptr<!s32i> -// CIR-NEXT: cir.call @_Z9takes_ptrPi(%[[CAST]]) +// CIR: cir.call @_Z9takes_ptrPi(%[[CAST]]) // LLVM-LABEL: define{{.*}} void @_Z19call_with_local_ptrv() // LLVM: %[[ALLOCA:.*]] = alloca i32, i64 1, align 4, addrspace(5) // LLVM: %[[CAST:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA]] to ptr -// LLVM-NEXT: call void @_Z9takes_ptrPi(ptr noundef %[[CAST]]) +// LLVM: call void @_Z9takes_ptrPi(ptr noundef %[[CAST]]) // OGCG-LABEL: define{{.*}} void @_Z19call_with_local_ptrv() // OGCG: %[[ALLOCA:.*]] = alloca i32, align 4, addrspace(5) diff --git a/clang/test/CIR/CodeGen/amdgpu-stack-alloca-array-decay.cpp b/clang/test/CIR/CodeGen/amdgpu-stack-alloca-array-decay.cpp new file mode 100644 index 0000000000000..725a78016e50e --- /dev/null +++ b/clang/test/CIR/CodeGen/amdgpu-stack-alloca-array-decay.cpp @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG + +void foo() { + const char fmt[] = "hello"; + const char *tmp = fmt; + while (*tmp++) + ; +} + +// CIR-LABEL: cir.func{{.*}} @_Z3foov() +// CIR: %[[FMT_RAW:.*]] = cir.alloca !cir.array<!s8i x 6>, !cir.ptr<!cir.array<!s8i x 6>, target_address_space(5)>, ["fmt" +// CIR-NEXT: %[[TMP_RAW:.*]] = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>, target_address_space(5)>, ["tmp" +// CIR-DAG: %[[FMT:.*]] = cir.cast address_space %[[FMT_RAW]] : !cir.ptr<!cir.array<!s8i x 6>, target_address_space(5)> -> !cir.ptr<!cir.array<!s8i x 6>> +// CIR-DAG: %[[TMP:.*]] = cir.cast address_space %[[TMP_RAW]] : !cir.ptr<!cir.ptr<!s8i>, target_address_space(5)> -> !cir.ptr<!cir.ptr<!s8i>> +// CIR: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[FMT]] : !cir.ptr<!cir.array<!s8i x 6>> -> !cir.ptr<!s8i> +// CIR: cir.store{{.*}} %[[DECAY]], %[[TMP]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>> + +// LLVM-LABEL: define{{.*}} void @_Z3foov() +// LLVM: %[[FMT:.*]] = alloca [6 x i8], i64 1, align 1, addrspace(5) +// LLVM-NEXT: %[[TMP:.*]] = alloca ptr, i64 1, align 8, addrspace(5) +// LLVM-DAG: %[[FMT_CAST:.*]] = addrspacecast ptr addrspace(5) %[[FMT]] to ptr +// LLVM-DAG: %[[TMP_CAST:.*]] = addrspacecast ptr addrspace(5) %[[TMP]] to ptr +// LLVM: %[[DECAY:.*]] = getelementptr {{.*}} ptr %[[FMT_CAST]], +// LLVM: store ptr %[[DECAY]], ptr %[[TMP_CAST]], align 8 + +// OGCG-LABEL: define{{.*}} void @_Z3foov() +// OGCG: %[[FMT:.*]] = alloca [6 x i8], align 1, addrspace(5) +// OGCG-NEXT: %[[TMP:.*]] = alloca ptr, align 8, addrspace(5) +// OGCG-NEXT: %[[FMT_CAST:.*]] = addrspacecast ptr addrspace(5) %[[FMT]] to ptr +// OGCG-NEXT: %[[TMP_CAST:.*]] = addrspacecast ptr addrspace(5) %[[TMP]] to ptr +// OGCG: %[[DECAY:.*]] = getelementptr inbounds [6 x i8], ptr %[[FMT_CAST]], i64 0, i64 0 +// OGCG: store ptr %[[DECAY]], ptr %[[TMP_CAST]], align 8 `````````` </details> https://github.com/llvm/llvm-project/pull/196868 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
