https://github.com/RiverDave updated https://github.com/llvm/llvm-project/pull/179084
>From 4ddcd54aa497d50abb73b0bf21bf6ecabe0926f5 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Sat, 31 Jan 2026 20:32:24 -0500 Subject: [PATCH 01/14] [CIR] Address Space support for GlobalOps --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 3 +++ clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 2 ++ 2 files changed, 5 insertions(+) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 322deae312738..24ad58fdb567c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -947,6 +947,9 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, return entry; } + mlir::ptr::MemorySpaceAttrInterface declCIRAS = + cir::toCIRAddressSpaceAttr(getMLIRContext(), getGlobalVarAddressSpace(d)); + mlir::Location loc = getLoc(d->getSourceRange()); // Calculate constant storage flag before creating the global. This was moved diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index eb322d135a804..53166c3acaf7c 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1768,6 +1768,8 @@ void cir::GlobalOp::build( if (isConstant) odsState.addAttribute(getConstantAttrName(odsState.name), odsBuilder.getUnitAttr()); + if (addrSpace) + odsState.addAttribute(getAddrSpaceAttrName(odsState.name), addrSpace); addrSpace = normalizeDefaultAddressSpace(addrSpace); if (addrSpace) >From 9fcbf82c6e76d349bd6ea88bec9c4fd714b65a2e Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Thu, 26 Feb 2026 17:21:27 -0500 Subject: [PATCH 02/14] Global AS lowering For CUDA and CIRGen tests for target AS --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 3 --- clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 24ad58fdb567c..322deae312738 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -947,9 +947,6 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, return entry; } - mlir::ptr::MemorySpaceAttrInterface declCIRAS = - cir::toCIRAddressSpaceAttr(getMLIRContext(), getGlobalVarAddressSpace(d)); - mlir::Location loc = getLoc(d->getSourceRange()); // Calculate constant storage flag before creating the global. This was moved diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 53166c3acaf7c..a07497f939921 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1768,6 +1768,8 @@ void cir::GlobalOp::build( if (isConstant) odsState.addAttribute(getConstantAttrName(odsState.name), odsBuilder.getUnitAttr()); + + addrSpace = normalizeDefaultAddressSpace(addrSpace); if (addrSpace) odsState.addAttribute(getAddrSpaceAttrName(odsState.name), addrSpace); >From d42e95e2fa2538491d3ca8e5cf5280b6cff87a74 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Thu, 26 Feb 2026 17:31:26 -0500 Subject: [PATCH 03/14] fix fmt --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 5cc121fbecc8e..0dfb30e07d80d 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -2662,7 +2662,11 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( convertTypeForMemory(*getTypeConverter(), dataLayout, cirSymType); // FIXME: These default values are placeholders until the the equivalent +<<<<<<< HEAD // attributes are available on cir.global ops. +======= + // attributes are available on cir.global ops. +>>>>>>> b6368aa62569 (fix fmt) const bool isConst = op.getConstant(); unsigned addrSpace = 0; if (auto targetAS = mlir::dyn_cast_if_present<cir::TargetAddressSpaceAttr>( >From 535be164a1173b5dda31aad1f2fdcf3de58d0bbc Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Thu, 26 Feb 2026 17:37:11 -0500 Subject: [PATCH 04/14] more fmt yo --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 0dfb30e07d80d..5cc121fbecc8e 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -2662,11 +2662,7 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( convertTypeForMemory(*getTypeConverter(), dataLayout, cirSymType); // FIXME: These default values are placeholders until the the equivalent -<<<<<<< HEAD // attributes are available on cir.global ops. -======= - // attributes are available on cir.global ops. ->>>>>>> b6368aa62569 (fix fmt) const bool isConst = op.getConstant(); unsigned addrSpace = 0; if (auto targetAS = mlir::dyn_cast_if_present<cir::TargetAddressSpaceAttr>( >From fdf1b4c3826ee977b3b4cc2cb61aa3d3fa6b9516 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 13 Mar 2026 01:18:16 -0400 Subject: [PATCH 05/14] [CIR][AMDGPU] Lower Language specific address spaces and implement AMDGPU target --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 7 +- clang/lib/CIR/CodeGen/TargetInfo.cpp | 46 ++++ .../CIR/Dialect/Transforms/TargetLowering.cpp | 253 +++++++++++++++++- .../Transforms/TargetLowering/CMakeLists.txt | 1 + .../Transforms/TargetLowering/LowerModule.cpp | 11 +- .../TargetLowering/TargetLoweringInfo.h | 10 + .../TargetLowering/Targets/AMDGPU.cpp | 47 ++++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 5 +- .../CIR/CodeGen/amdgpu-address-spaces.cpp | 51 ++++ .../CIR/Lowering/global-address-space.cir | 57 +++- 10 files changed, 470 insertions(+), 18 deletions(-) create mode 100644 clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp create mode 100644 clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 322deae312738..7dfd78fa2e5aa 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -908,6 +908,9 @@ cir::GlobalOp CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, LangAS langAS, const VarDecl *d, ForDefinition_t isForDefinition) { + + mlir::ptr::MemorySpaceAttrInterface cirAS = + cir::toCIRAddressSpaceAttr(getMLIRContext(), langAS); // Lookup the entry, lazily creating it if necessary. cir::GlobalOp entry; if (mlir::Operation *v = getGlobalValue(mangledName)) { @@ -918,13 +921,13 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, } if (entry) { + mlir::ptr::MemorySpaceAttrInterface entryCIRAS = entry.getAddrSpaceAttr(); assert(!cir::MissingFeatures::opGlobalWeakRef()); assert(!cir::MissingFeatures::setDLLStorageClass()); assert(!cir::MissingFeatures::openMP()); - if (entry.getSymType() == ty && - (cir::isMatchingAddressSpace(entry.getAddrSpaceAttr(), langAS))) + if (entry.getSymType() == ty && entryCIRAS == cirAS) return entry; // If there are two attempts to define the same mangled name, issue an diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index a5cc74b18a8a0..d05b7a9373303 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -86,7 +86,48 @@ class X8664TargetCIRGenInfo : public TargetCIRGenInfo { X8664TargetCIRGenInfo(CIRGenTypes &cgt) : TargetCIRGenInfo(std::make_unique<X8664ABIInfo>(cgt)) {} }; +class AMDGPUABIInfo : public ABIInfo { +public: + AMDGPUABIInfo(CIRGenTypes &cgt) : ABIInfo(cgt) {} +}; + +class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo { +public: + AMDGPUTargetCIRGenInfo(CIRGenTypes &cgt) + : TargetCIRGenInfo(std::make_unique<AMDGPUABIInfo>(cgt)) {} + + clang::LangAS + getGlobalVarAddressSpace(CIRGenModule &cgm, + const clang::VarDecl *decl) const override { + using clang::LangAS; + assert(!cgm.getLangOpts().OpenCL && + !(cgm.getLangOpts().CUDA && cgm.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + LangAS defaultGlobalAS = LangAS::opencl_global; + if (!decl) + return defaultGlobalAS; + + LangAS addrSpace = decl->getType().getAddressSpace(); + if (addrSpace != LangAS::Default) + return addrSpace; + + // Only promote to address space 4 if VarDecl has constant initialization. + if (decl->getType().isConstantStorage(cgm.getASTContext(), false, false) && + decl->hasConstantInitialization()) { + if (auto constAS = cgm.getTarget().getConstantAddressSpace()) + return *constAS; + } + + return defaultGlobalAS; + } + mlir::ptr::MemorySpaceAttrInterface + getCIRAllocaAddressSpace() const override { + return cir::LangAddressSpaceAttr::get( + &getABIInfo().cgt.getMLIRContext(), + cir::LangAddressSpace::OffloadPrivate); + } +}; } // namespace namespace { @@ -113,6 +154,11 @@ clang::CIRGen::createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt) { return std::make_unique<NVPTXTargetCIRGenInfo>(cgt); } +std::unique_ptr<TargetCIRGenInfo> +clang::CIRGen::createAMDGPUTargetCIRGenInfo(CIRGenTypes &cgt) { + return std::make_unique<AMDGPUTargetCIRGenInfo>(cgt); +} + std::unique_ptr<TargetCIRGenInfo> clang::CIRGen::createX8664TargetCIRGenInfo(CIRGenTypes &cgt) { return std::make_unique<X8664TargetCIRGenInfo>(cgt); diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp index c3ed588cf06dc..5249107376e67 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp @@ -11,10 +11,15 @@ //===----------------------------------------------------------------------===// #include "TargetLowering/LowerModule.h" +#include "TargetLowering/TargetLoweringInfo.h" +#include "mlir/IR/PatternMatch.h" #include "mlir/Support/LLVM.h" +#include "mlir/Transforms/DialectConversion.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/Dialect/Passes.h" -#include "llvm/ADT/TypeSwitch.h" using namespace mlir; using namespace cir; @@ -32,6 +37,157 @@ struct TargetLoweringPass void runOnOperation() override; }; +/// A generic target lowering pattern that matches any CIR op whose operand or +/// result types need address space conversion. Clones the op with converted +/// types. +class CIRGenericTargetLoweringPattern : public mlir::ConversionPattern { +public: + CIRGenericTargetLoweringPattern(mlir::MLIRContext *context, + const mlir::TypeConverter &typeConverter) + : mlir::ConversionPattern(typeConverter, MatchAnyOpTypeTag(), + /*benefit=*/1, context) {} + + mlir::LogicalResult + matchAndRewrite(mlir::Operation *op, llvm::ArrayRef<mlir::Value> operands, + mlir::ConversionPatternRewriter &rewriter) const override { + // Do not match on operations that have dedicated lowering patterns. + if (llvm::isa<cir::FuncOp, cir::GlobalOp>(op)) + return mlir::failure(); + + const mlir::TypeConverter *typeConverter = getTypeConverter(); + assert(typeConverter && + "CIRGenericTargetLoweringPattern requires a type converter"); + bool operandsAndResultsLegal = typeConverter->isLegal(op); + bool regionsLegal = + std::all_of(op->getRegions().begin(), op->getRegions().end(), + [typeConverter](mlir::Region ®ion) { + return typeConverter->isLegal(®ion); + }); + if (operandsAndResultsLegal && regionsLegal) + return mlir::failure(); + + assert(op->getNumRegions() == 0 && + "CIRGenericTargetLoweringPattern cannot " + "deal with operations with regions"); + + mlir::OperationState loweredOpState(op->getLoc(), op->getName()); + loweredOpState.addOperands(operands); + loweredOpState.addAttributes(op->getAttrs()); + loweredOpState.addSuccessors(op->getSuccessors()); + + llvm::SmallVector<mlir::Type> loweredResultTypes; + loweredResultTypes.reserve(op->getNumResults()); + for (mlir::Type result : op->getResultTypes()) + loweredResultTypes.push_back(typeConverter->convertType(result)); + loweredOpState.addTypes(loweredResultTypes); + + for (mlir::Region ®ion : op->getRegions()) { + mlir::Region *loweredRegion = loweredOpState.addRegion(); + rewriter.inlineRegionBefore(region, *loweredRegion, loweredRegion->end()); + if (mlir::failed( + rewriter.convertRegionTypes(loweredRegion, *getTypeConverter()))) + return mlir::failure(); + } + + mlir::Operation *loweredOp = rewriter.create(loweredOpState); + rewriter.replaceOp(op, loweredOp); + return mlir::success(); + } +}; + +/// Pattern to lower GlobalOp address space attributes. GlobalOp carries +/// addr_space as a standalone attribute (not inside a type), so the +/// TypeConverter won't reach it automatically. +class CIRGlobalOpTargetLowering + : public mlir::OpConversionPattern<cir::GlobalOp> { + const cir::TargetLoweringInfo &targetInfo; + +public: + CIRGlobalOpTargetLowering(mlir::MLIRContext *context, + const mlir::TypeConverter &typeConverter, + const cir::TargetLoweringInfo &targetInfo) + : mlir::OpConversionPattern<cir::GlobalOp>(typeConverter, context, + /*benefit=*/1), + targetInfo(targetInfo) {} + + mlir::LogicalResult + matchAndRewrite(cir::GlobalOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + mlir::Type loweredSymTy = getTypeConverter()->convertType(op.getSymType()); + if (!loweredSymTy) + return mlir::failure(); + + // Convert the addr_space attribute. + mlir::ptr::MemorySpaceAttrInterface addrSpace = op.getAddrSpaceAttr(); + if (auto langAS = mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>( + addrSpace)) { + unsigned targetAS = + targetInfo.getTargetAddrSpaceFromCIRAddrSpace(langAS.getValue()); + addrSpace = targetAS == 0 + ? nullptr + : cir::TargetAddressSpaceAttr::get(op.getContext(), + targetAS); + } + + // Only rewrite if something actually changed. + if (loweredSymTy == op.getSymType() && addrSpace == op.getAddrSpaceAttr()) + return mlir::failure(); + + auto newOp = mlir::cast<cir::GlobalOp>(rewriter.clone(*op.getOperation())); + newOp.setSymType(loweredSymTy); + newOp.setAddrSpaceAttr(addrSpace); + rewriter.replaceOp(op, newOp); + return mlir::success(); + } +}; + +/// Pattern to lower FuncOp types that contain address spaces. +class CIRFuncOpTargetLowering + : public mlir::OpConversionPattern<cir::FuncOp> { +public: + using mlir::OpConversionPattern<cir::FuncOp>::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::FuncOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + cir::FuncType opFuncType = op.getFunctionType(); + mlir::TypeConverter::SignatureConversion signatureConversion( + opFuncType.getNumInputs()); + + for (const auto &[i, argType] : llvm::enumerate(opFuncType.getInputs())) { + mlir::Type loweredArgType = getTypeConverter()->convertType(argType); + if (!loweredArgType) + return mlir::failure(); + signatureConversion.addInputs(i, loweredArgType); + } + + mlir::Type loweredReturnType = + getTypeConverter()->convertType(opFuncType.getReturnType()); + if (!loweredReturnType) + return mlir::failure(); + + auto loweredFuncType = cir::FuncType::get( + signatureConversion.getConvertedTypes(), loweredReturnType, + /*isVarArg=*/opFuncType.getVarArg()); + + // Nothing changed, skip. + if (loweredFuncType == opFuncType) + return mlir::failure(); + + cir::FuncOp loweredFuncOp = rewriter.cloneWithoutRegions(op); + loweredFuncOp.setFunctionType(loweredFuncType); + rewriter.inlineRegionBefore(op.getBody(), loweredFuncOp.getBody(), + loweredFuncOp.end()); + if (mlir::failed(rewriter.convertRegionTypes( + &loweredFuncOp.getBody(), *getTypeConverter(), + &signatureConversion))) + return mlir::failure(); + + rewriter.eraseOp(op); + return mlir::success(); + } +}; + } // namespace static void convertSyncScopeIfPresent(mlir::Operation *op, @@ -47,6 +203,82 @@ static void convertSyncScopeIfPresent(mlir::Operation *op, } } +/// Prepare the type converter for the target lowering pass. +/// Converts LangAddressSpaceAttr → TargetAddressSpaceAttr inside pointer types. +static void +prepareTargetLoweringTypeConverter(mlir::TypeConverter &converter, + const cir::TargetLoweringInfo &targetInfo) { + converter.addConversion([](mlir::Type type) { return type; }); + + converter.addConversion( + [&converter, &targetInfo](cir::PointerType type) -> mlir::Type { + mlir::Type pointee = converter.convertType(type.getPointee()); + if (!pointee) + return {}; + auto addrSpace = type.getAddrSpace(); + if (auto langAS = + mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>( + addrSpace)) { + unsigned targetAS = + targetInfo.getTargetAddrSpaceFromCIRAddrSpace(langAS.getValue()); + addrSpace = + targetAS == 0 + ? nullptr + : cir::TargetAddressSpaceAttr::get(type.getContext(), + targetAS); + } + return cir::PointerType::get(type.getContext(), pointee, addrSpace); + }); + + converter.addConversion([&converter](cir::ArrayType type) -> mlir::Type { + mlir::Type loweredElementType = + converter.convertType(type.getElementType()); + if (!loweredElementType) + return {}; + return cir::ArrayType::get(loweredElementType, type.getSize()); + }); + + converter.addConversion([&converter](cir::FuncType type) -> mlir::Type { + llvm::SmallVector<mlir::Type> loweredInputTypes; + loweredInputTypes.reserve(type.getNumInputs()); + if (mlir::failed( + converter.convertTypes(type.getInputs(), loweredInputTypes))) + return {}; + + mlir::Type loweredReturnType = converter.convertType(type.getReturnType()); + if (!loweredReturnType) + return {}; + + return cir::FuncType::get(loweredInputTypes, loweredReturnType, + /*isVarArg=*/type.getVarArg()); + }); +} + +static void populateTargetLoweringConversionTarget( + mlir::ConversionTarget &target, const mlir::TypeConverter &tc) { + target.addLegalOp<mlir::ModuleOp>(); + + target.addDynamicallyLegalDialect<cir::CIRDialect>( + [&tc](mlir::Operation *op) { + if (!tc.isLegal(op)) + return false; + return std::all_of(op->getRegions().begin(), op->getRegions().end(), + [&tc](mlir::Region ®ion) { + return tc.isLegal(®ion); + }); + }); + + target.addDynamicallyLegalOp<cir::FuncOp>( + [&tc](cir::FuncOp op) { return tc.isLegal(op.getFunctionType()); }); + + target.addDynamicallyLegalOp<cir::GlobalOp>([&tc](cir::GlobalOp op) { + if (!tc.isLegal(op.getSymType())) + return false; + return !mlir::isa_and_present<cir::LangAddressSpaceAttr>( + op.getAddrSpaceAttr()); + }); +} + void TargetLoweringPass::runOnOperation() { auto mod = mlir::cast<mlir::ModuleOp>(getOperation()); std::unique_ptr<cir::LowerModule> lowerModule = cir::createLowerModule(mod); @@ -57,11 +289,30 @@ void TargetLoweringPass::runOnOperation() { return; } + const auto &targetInfo = lowerModule->getTargetLoweringInfo(); + mod->walk([&](mlir::Operation *op) { if (mlir::isa<cir::LoadOp, cir::StoreOp, cir::AtomicXchgOp, cir::AtomicCmpXchgOp, cir::AtomicFetchOp>(op)) convertSyncScopeIfPresent(op, *lowerModule); }); + + // Address space conversion: LangAddressSpaceAttr → TargetAddressSpaceAttr. + mlir::TypeConverter typeConverter; + prepareTargetLoweringTypeConverter(typeConverter, targetInfo); + + mlir::RewritePatternSet patterns(mod.getContext()); + patterns.add<CIRGlobalOpTargetLowering>(mod.getContext(), typeConverter, + targetInfo); + patterns.add<CIRFuncOpTargetLowering>(typeConverter, mod.getContext()); + patterns.add<CIRGenericTargetLoweringPattern>(mod.getContext(), + typeConverter); + + mlir::ConversionTarget target(*mod.getContext()); + populateTargetLoweringConversionTarget(target, typeConverter); + + if (failed(mlir::applyPartialConversion(mod, target, std::move(patterns)))) + signalPassFailure(); } std::unique_ptr<Pass> mlir::createTargetLoweringPass() { diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt index 92148127424e9..07e3a67f97859 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt @@ -3,6 +3,7 @@ add_clang_library(MLIRCIRTargetLowering LowerModule.cpp LowerItaniumCXXABI.cpp TargetLoweringInfo.cpp + Targets/AMDGPU.cpp DEPENDS clangBasic diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp index f2398e3105578..26e63b3b676ae 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp @@ -45,8 +45,15 @@ static std::unique_ptr<CIRCXXABI> createCXXABI(LowerModule &lm) { static std::unique_ptr<TargetLoweringInfo> createTargetLoweringInfo(LowerModule &lm) { - assert(!cir::MissingFeatures::targetLoweringInfo()); - return std::make_unique<TargetLoweringInfo>(); + const llvm::Triple &triple = lm.getTarget().getTriple(); + + switch (triple.getArch()) { + case llvm::Triple::amdgcn: + return createAMDGPUTargetLoweringInfo(); + default: + assert(!cir::MissingFeatures::targetLoweringInfo()); + return std::make_unique<TargetLoweringInfo>(); + } } LowerModule::LowerModule(clang::LangOptions langOpts, diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h index 760c3b0b7cc5e..a307bcb373dec 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h @@ -15,6 +15,8 @@ #define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_TARGETLOWERINGINFO_H #include "clang/CIR/Dialect/IR/CIROpsEnums.h" +#include <memory> +#include <string> namespace cir { @@ -24,8 +26,16 @@ class TargetLoweringInfo { virtual cir::SyncScopeKind convertSyncScope(cir::SyncScopeKind syncScope) const; + + virtual unsigned + getTargetAddrSpaceFromCIRAddrSpace(cir::LangAddressSpace addrSpace) const { + return 0; + }; }; +// Target-specific factory functions. +std::unique_ptr<TargetLoweringInfo> createAMDGPUTargetLoweringInfo(); + } // namespace cir #endif diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp new file mode 100644 index 0000000000000..058c1200531e5 --- /dev/null +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp @@ -0,0 +1,47 @@ +//===- AMDGPU.cpp - Emit CIR for AMDGPU -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../TargetLoweringInfo.h" +#include "clang/CIR/Dialect/IR/CIROpsEnums.h" +#include "llvm/Support/ErrorHandling.h" + +namespace cir { + +namespace { + +class AMDGPUTargetLoweringInfo : public TargetLoweringInfo { +public: + // Address space mapping from: + // https://llvm.org/docs/AMDGPUUsage.html#address-spaces + unsigned getTargetAddrSpaceFromCIRAddrSpace( + cir::LangAddressSpace addrSpace) const override { + switch (addrSpace) { + case cir::LangAddressSpace::Default: + return 0; + case cir::LangAddressSpace::OffloadPrivate: + return 5; + case cir::LangAddressSpace::OffloadLocal: + return 3; + case cir::LangAddressSpace::OffloadGlobal: + return 1; + case cir::LangAddressSpace::OffloadConstant: + return 4; + case cir::LangAddressSpace::OffloadGeneric: + return 0; + } + llvm_unreachable("Unknown CIR address space for AMDGPU target"); + } +}; + +} // namespace + +std::unique_ptr<TargetLoweringInfo> createAMDGPUTargetLoweringInfo() { + return std::make_unique<AMDGPUTargetLoweringInfo>(); +} + +} // namespace cir diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 5cc121fbecc8e..73b8415ef589c 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -3368,10 +3368,7 @@ static void prepareTypeConverter(mlir::LLVMTypeConverter &converter, mlir::ptr::MemorySpaceAttrInterface addrSpaceAttr = type.getAddrSpace(); unsigned numericAS = 0; - if (auto langAsAttr = - mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>(addrSpaceAttr)) - llvm_unreachable("lowering LangAddressSpaceAttr NYI"); - else if (auto targetAsAttr = + if (auto targetAsAttr = mlir::dyn_cast_if_present<cir::TargetAddressSpaceAttr>( addrSpaceAttr)) numericAS = targetAsAttr.getValue(); diff --git a/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp new file mode 100644 index 0000000000000..35ceed46189dc --- /dev/null +++ b/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// Test address space handling for AMDGPU target in C++ mode (non-OpenCL/HIP). +// This exercises getGlobalVarAddressSpace. + +// Test default address space for globals without explicit AS. +// For AMDGPU in non-OpenCL/HIP mode, globals default to AS 1 (global). +int globalVar = 123; + +// CIR-DAG: cir.global external lang_address_space(offload_global) @globalVar = #cir.int<123> : !s32i +// LLVM-DAG: @globalVar = addrspace(1) global i32 123, align 4 +// OGCG-DAG: @globalVar = addrspace(1) global i32 123, align 4 + +// Test non-const global array goes to global AS. +int globalArray[4] = {1, 2, 3, 4}; + +// CIR-DAG: cir.global external lang_address_space(offload_global) @globalArray = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 4> +// LLVM-DAG: @globalArray = addrspace(1) global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 +// OGCG-DAG: @globalArray = addrspace(1) global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 + +// Test static global goes to global AS. +static int staticGlobal = 555; + +// CIR-DAG: cir.global "private" internal{{.*}}lang_address_space(offload_global) @_ZL12staticGlobal = #cir.int<555> : !s32i +// LLVM-DAG: @_ZL12staticGlobal = internal addrspace(1) global i32 555, align 4 +// OGCG-DAG: @_ZL12staticGlobal = internal addrspace(1) global i32 555, align 4 + +// Test constant initialization promotion to AS 4 (constant). +// Use extern to force emission since const globals are otherwise optimized away. +extern const int constGlobal = 456; + +// CIR-DAG: cir.global constant external target_address_space(4) @constGlobal = #cir.int<456> : !s32i +// LLVM-DAG: @constGlobal = addrspace(4) constant i32 456, align 4 +// OGCG-DAG: @constGlobal = addrspace(4) constant i32 456, align 4 + +// Test extern const array goes to constant AS. +extern const int constArray[3] = {10, 20, 30}; + +// CIR-DAG: cir.global constant external target_address_space(4) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3> +// LLVM-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4 +// OGCG-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4 + +// Use the static variable to ensure it's emitted. +int getStaticGlobal() { return staticGlobal; } diff --git a/clang/test/CIR/Lowering/global-address-space.cir b/clang/test/CIR/Lowering/global-address-space.cir index c9f25e1126098..7161d6852acb2 100644 --- a/clang/test/CIR/Lowering/global-address-space.cir +++ b/clang/test/CIR/Lowering/global-address-space.cir @@ -3,12 +3,13 @@ !s32i = !cir.int<s, 32> -module { - cir.global external target_address_space(1) @global_as1 = #cir.int<42> : !s32i - // CHECK: llvm.mlir.global external @global_as1(42 : i32) {addr_space = 1 : i32} : i32 +module attributes { cir.triple = "amdgcn-amd-amdhsa" } { + // Target address space lowering (passthrough) + cir.global external target_address_space(1) @global_target_as1 = #cir.int<42> : !s32i + // CHECK: llvm.mlir.global external @global_target_as1(42 : i32) {addr_space = 1 : i32} : i32 - cir.global external target_address_space(3) @global_as3 = #cir.int<100> : !s32i - // CHECK: llvm.mlir.global external @global_as3(100 : i32) {addr_space = 3 : i32} : i32 + cir.global external target_address_space(3) @global_target_as3 = #cir.int<100> : !s32i + // CHECK: llvm.mlir.global external @global_target_as3(100 : i32) {addr_space = 3 : i32} : i32 cir.global external @global_default = #cir.int<0> : !s32i // CHECK: llvm.mlir.global external @global_default(0 : i32) {addr_space = 0 : i32} : i32 @@ -16,20 +17,20 @@ module { // Test cir.get_global with address space produces correct llvm.mlir.addressof type // CHECK-LABEL: llvm.func @test_get_global_as1 cir.func @test_get_global_as1() -> !s32i { - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_as1 : !llvm.ptr<1> + // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_target_as1 : !llvm.ptr<1> // CHECK: %[[VAL:.*]] = llvm.load %[[ADDR]] {{.*}} : !llvm.ptr<1> -> i32 // CHECK: llvm.return %[[VAL]] : i32 - %0 = cir.get_global @global_as1 : !cir.ptr<!s32i, target_address_space(1)> + %0 = cir.get_global @global_target_as1 : !cir.ptr<!s32i, target_address_space(1)> %1 = cir.load %0 : !cir.ptr<!s32i, target_address_space(1)>, !s32i cir.return %1 : !s32i } // CHECK-LABEL: llvm.func @test_get_global_as3 cir.func @test_get_global_as3() -> !s32i { - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_as3 : !llvm.ptr<3> + // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_target_as3 : !llvm.ptr<3> // CHECK: %[[VAL:.*]] = llvm.load %[[ADDR]] {{.*}} : !llvm.ptr<3> -> i32 // CHECK: llvm.return %[[VAL]] : i32 - %0 = cir.get_global @global_as3 : !cir.ptr<!s32i, target_address_space(3)> + %0 = cir.get_global @global_target_as3 : !cir.ptr<!s32i, target_address_space(3)> %1 = cir.load %0 : !cir.ptr<!s32i, target_address_space(3)>, !s32i cir.return %1 : !s32i } @@ -43,4 +44,42 @@ module { %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i cir.return %1 : !s32i } + + // Language address space lowering (AMDGPU mapping) + // See: https://llvm.org/docs/AMDGPUUsage.html#address-spaces + // OffloadGlobal -> 1 + cir.global external lang_address_space(offload_global) @global_lang_global = #cir.int<1> : !s32i + // CHECK: llvm.mlir.global external @global_lang_global(1 : i32) {addr_space = 1 : i32} : i32 + + // OffloadLocal -> 3 + cir.global "private" internal lang_address_space(offload_local) @global_lang_local : !s32i + // CHECK: llvm.mlir.global internal @global_lang_local() {addr_space = 3 : i32} : i32 + + // OffloadConstant -> 4 + cir.global external lang_address_space(offload_constant) @global_lang_constant = #cir.int<2> : !s32i + // CHECK: llvm.mlir.global external @global_lang_constant(2 : i32) {addr_space = 4 : i32} : i32 + + // OffloadPrivate -> 5 + cir.global "private" internal lang_address_space(offload_private) @global_lang_private : !s32i + // CHECK: llvm.mlir.global internal @global_lang_private() {addr_space = 5 : i32} : i32 + + // OffloadGeneric -> 0 + cir.global external lang_address_space(offload_generic) @global_lang_generic = #cir.int<3> : !s32i + // CHECK: llvm.mlir.global external @global_lang_generic(3 : i32) {addr_space = 0 : i32} : i32 + + // Pointer type lowering with lang_address_space + // CHECK: llvm.func @test_ptr_lang_as(%arg0: !llvm.ptr<1>) + cir.func @test_ptr_lang_as(%arg0: !cir.ptr<!s32i, lang_address_space(offload_global)>) { + // The alloca stores a pointer to address space 1, but the alloca itself is on the stack (default AS) + // CHECK: llvm.alloca {{.*}} x !llvm.ptr<1> {{.*}} : (i64) -> !llvm.ptr + %0 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_global)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_global)>>, ["arg", init] {alignment = 8 : i64} + cir.return + } + + // CHECK: llvm.func @test_ptr_target_as(%arg0: !llvm.ptr<5>) + cir.func @test_ptr_target_as(%arg0: !cir.ptr<!s32i, target_address_space(5)>) { + // CHECK: llvm.alloca {{.*}} x !llvm.ptr<5> {{.*}} : (i64) -> !llvm.ptr + %0 = cir.alloca !cir.ptr<!s32i, target_address_space(5)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(5)>>, ["arg", init] {alignment = 8 : i64} + cir.return + } } >From f0a10bd4d9946d9a1696820a6e6afefa639eba74 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 13 Mar 2026 04:36:14 -0400 Subject: [PATCH 06/14] handle formatting --- .../CIR/Dialect/Transforms/TargetLowering.cpp | 80 +++++++++---------- 1 file changed, 38 insertions(+), 42 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp index 5249107376e67..0c1fcbe8f3ee5 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp @@ -66,9 +66,8 @@ class CIRGenericTargetLoweringPattern : public mlir::ConversionPattern { if (operandsAndResultsLegal && regionsLegal) return mlir::failure(); - assert(op->getNumRegions() == 0 && - "CIRGenericTargetLoweringPattern cannot " - "deal with operations with regions"); + assert(op->getNumRegions() == 0 && "CIRGenericTargetLoweringPattern cannot " + "deal with operations with regions"); mlir::OperationState loweredOpState(op->getLoc(), op->getName()); loweredOpState.addOperands(operands); @@ -104,10 +103,10 @@ class CIRGlobalOpTargetLowering public: CIRGlobalOpTargetLowering(mlir::MLIRContext *context, - const mlir::TypeConverter &typeConverter, - const cir::TargetLoweringInfo &targetInfo) + const mlir::TypeConverter &typeConverter, + const cir::TargetLoweringInfo &targetInfo) : mlir::OpConversionPattern<cir::GlobalOp>(typeConverter, context, - /*benefit=*/1), + /*benefit=*/1), targetInfo(targetInfo) {} mlir::LogicalResult @@ -119,14 +118,14 @@ class CIRGlobalOpTargetLowering // Convert the addr_space attribute. mlir::ptr::MemorySpaceAttrInterface addrSpace = op.getAddrSpaceAttr(); - if (auto langAS = mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>( - addrSpace)) { + if (auto langAS = + mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>(addrSpace)) { unsigned targetAS = targetInfo.getTargetAddrSpaceFromCIRAddrSpace(langAS.getValue()); - addrSpace = targetAS == 0 - ? nullptr - : cir::TargetAddressSpaceAttr::get(op.getContext(), - targetAS); + addrSpace = + targetAS == 0 + ? nullptr + : cir::TargetAddressSpaceAttr::get(op.getContext(), targetAS); } // Only rewrite if something actually changed. @@ -142,8 +141,7 @@ class CIRGlobalOpTargetLowering }; /// Pattern to lower FuncOp types that contain address spaces. -class CIRFuncOpTargetLowering - : public mlir::OpConversionPattern<cir::FuncOp> { +class CIRFuncOpTargetLowering : public mlir::OpConversionPattern<cir::FuncOp> { public: using mlir::OpConversionPattern<cir::FuncOp>::OpConversionPattern; @@ -178,9 +176,9 @@ class CIRFuncOpTargetLowering loweredFuncOp.setFunctionType(loweredFuncType); rewriter.inlineRegionBefore(op.getBody(), loweredFuncOp.getBody(), loweredFuncOp.end()); - if (mlir::failed(rewriter.convertRegionTypes( - &loweredFuncOp.getBody(), *getTypeConverter(), - &signatureConversion))) + if (mlir::failed(rewriter.convertRegionTypes(&loweredFuncOp.getBody(), + *getTypeConverter(), + &signatureConversion))) return mlir::failure(); rewriter.eraseOp(op); @@ -210,25 +208,23 @@ prepareTargetLoweringTypeConverter(mlir::TypeConverter &converter, const cir::TargetLoweringInfo &targetInfo) { converter.addConversion([](mlir::Type type) { return type; }); - converter.addConversion( - [&converter, &targetInfo](cir::PointerType type) -> mlir::Type { - mlir::Type pointee = converter.convertType(type.getPointee()); - if (!pointee) - return {}; - auto addrSpace = type.getAddrSpace(); - if (auto langAS = - mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>( - addrSpace)) { - unsigned targetAS = - targetInfo.getTargetAddrSpaceFromCIRAddrSpace(langAS.getValue()); - addrSpace = - targetAS == 0 - ? nullptr - : cir::TargetAddressSpaceAttr::get(type.getContext(), - targetAS); - } - return cir::PointerType::get(type.getContext(), pointee, addrSpace); - }); + converter.addConversion([&converter, + &targetInfo](cir::PointerType type) -> mlir::Type { + mlir::Type pointee = converter.convertType(type.getPointee()); + if (!pointee) + return {}; + auto addrSpace = type.getAddrSpace(); + if (auto langAS = + mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>(addrSpace)) { + unsigned targetAS = + targetInfo.getTargetAddrSpaceFromCIRAddrSpace(langAS.getValue()); + addrSpace = + targetAS == 0 + ? nullptr + : cir::TargetAddressSpaceAttr::get(type.getContext(), targetAS); + } + return cir::PointerType::get(type.getContext(), pointee, addrSpace); + }); converter.addConversion([&converter](cir::ArrayType type) -> mlir::Type { mlir::Type loweredElementType = @@ -254,18 +250,18 @@ prepareTargetLoweringTypeConverter(mlir::TypeConverter &converter, }); } -static void populateTargetLoweringConversionTarget( - mlir::ConversionTarget &target, const mlir::TypeConverter &tc) { +static void +populateTargetLoweringConversionTarget(mlir::ConversionTarget &target, + const mlir::TypeConverter &tc) { target.addLegalOp<mlir::ModuleOp>(); target.addDynamicallyLegalDialect<cir::CIRDialect>( [&tc](mlir::Operation *op) { if (!tc.isLegal(op)) return false; - return std::all_of(op->getRegions().begin(), op->getRegions().end(), - [&tc](mlir::Region ®ion) { - return tc.isLegal(®ion); - }); + return std::all_of( + op->getRegions().begin(), op->getRegions().end(), + [&tc](mlir::Region ®ion) { return tc.isLegal(®ion); }); }); target.addDynamicallyLegalOp<cir::FuncOp>( >From 035e2e4351f27ab5fda42061fae6d92b9be4b7f2 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 13 Mar 2026 04:39:26 -0400 Subject: [PATCH 07/14] fix tests to represent pre-target lowering state of AS --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 5 +- clang/lib/CIR/CodeGen/TargetInfo.cpp | 45 ++++------ .../CIR/CodeGen/amdgpu-address-spaces.cpp | 20 +++-- clang/test/CIR/CodeGenCUDA/address-spaces.cu | 26 +++++- .../CIR/Lowering/global-address-space.cir | 85 ------------------- 5 files changed, 55 insertions(+), 126 deletions(-) delete mode 100644 clang/test/CIR/Lowering/global-address-space.cir diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 7dfd78fa2e5aa..e0681eb760249 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -909,8 +909,6 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, LangAS langAS, const VarDecl *d, ForDefinition_t isForDefinition) { - mlir::ptr::MemorySpaceAttrInterface cirAS = - cir::toCIRAddressSpaceAttr(getMLIRContext(), langAS); // Lookup the entry, lazily creating it if necessary. cir::GlobalOp entry; if (mlir::Operation *v = getGlobalValue(mangledName)) { @@ -927,7 +925,8 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, assert(!cir::MissingFeatures::setDLLStorageClass()); assert(!cir::MissingFeatures::openMP()); - if (entry.getSymType() == ty && entryCIRAS == cirAS) + if (entry.getSymType() == ty && + cir::isMatchingAddressSpace(entryCIRAS, langAS)) return entry; // If there are two attempts to define the same mangled name, issue an diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index d05b7a9373303..3af2bf135c6a2 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -70,31 +70,6 @@ class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo { } } } -}; - -} // namespace - -namespace { - -class X8664ABIInfo : public ABIInfo { -public: - X8664ABIInfo(CIRGenTypes &cgt) : ABIInfo(cgt) {} -}; - -class X8664TargetCIRGenInfo : public TargetCIRGenInfo { -public: - X8664TargetCIRGenInfo(CIRGenTypes &cgt) - : TargetCIRGenInfo(std::make_unique<X8664ABIInfo>(cgt)) {} -}; -class AMDGPUABIInfo : public ABIInfo { -public: - AMDGPUABIInfo(CIRGenTypes &cgt) : ABIInfo(cgt) {} -}; - -class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo { -public: - AMDGPUTargetCIRGenInfo(CIRGenTypes &cgt) - : TargetCIRGenInfo(std::make_unique<AMDGPUABIInfo>(cgt)) {} clang::LangAS getGlobalVarAddressSpace(CIRGenModule &cgm, @@ -128,6 +103,21 @@ class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo { cir::LangAddressSpace::OffloadPrivate); } }; + +} // namespace + +namespace { + +class X8664ABIInfo : public ABIInfo { +public: + X8664ABIInfo(CIRGenTypes &cgt) : ABIInfo(cgt) {} +}; + +class X8664TargetCIRGenInfo : public TargetCIRGenInfo { +public: + X8664TargetCIRGenInfo(CIRGenTypes &cgt) + : TargetCIRGenInfo(std::make_unique<X8664ABIInfo>(cgt)) {} +}; } // namespace namespace { @@ -154,11 +144,6 @@ clang::CIRGen::createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt) { return std::make_unique<NVPTXTargetCIRGenInfo>(cgt); } -std::unique_ptr<TargetCIRGenInfo> -clang::CIRGen::createAMDGPUTargetCIRGenInfo(CIRGenTypes &cgt) { - return std::make_unique<AMDGPUTargetCIRGenInfo>(cgt); -} - std::unique_ptr<TargetCIRGenInfo> clang::CIRGen::createX8664TargetCIRGenInfo(CIRGenTypes &cgt) { return std::make_unique<X8664TargetCIRGenInfo>(cgt); diff --git a/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp index 35ceed46189dc..bee81138471c5 100644 --- a/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp +++ b/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp @@ -1,4 +1,7 @@ -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-cir %s -o %t.cir +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-cir \ +// RUN: -mmlir -mlir-print-ir-before=cir-target-lowering %s -o %t.cir 2> %t-pre.cir +// RUN: FileCheck --check-prefix=CIR-PRE --input-file=%t-pre.cir %s + // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-llvm %s -o %t.ll @@ -14,21 +17,24 @@ // For AMDGPU in non-OpenCL/HIP mode, globals default to AS 1 (global). int globalVar = 123; -// CIR-DAG: cir.global external lang_address_space(offload_global) @globalVar = #cir.int<123> : !s32i +// CIR-PRE-DAG: cir.global external lang_address_space(offload_global) @globalVar = #cir.int<123> : !s32i +// CIR-DAG: cir.global external target_address_space(1) @globalVar = #cir.int<123> : !s32i // LLVM-DAG: @globalVar = addrspace(1) global i32 123, align 4 // OGCG-DAG: @globalVar = addrspace(1) global i32 123, align 4 // Test non-const global array goes to global AS. int globalArray[4] = {1, 2, 3, 4}; -// CIR-DAG: cir.global external lang_address_space(offload_global) @globalArray = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 4> +// CIR-PRE-DAG: cir.global external lang_address_space(offload_global) @globalArray = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 4> +// CIR-DAG: cir.global external target_address_space(1) @globalArray = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 4> // LLVM-DAG: @globalArray = addrspace(1) global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 // OGCG-DAG: @globalArray = addrspace(1) global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 // Test static global goes to global AS. static int staticGlobal = 555; -// CIR-DAG: cir.global "private" internal{{.*}}lang_address_space(offload_global) @_ZL12staticGlobal = #cir.int<555> : !s32i +// CIR-PRE-DAG: cir.global "private" internal{{.*}}lang_address_space(offload_global) @_ZL12staticGlobal = #cir.int<555> : !s32i +// CIR-DAG: cir.global "private" internal{{.*}}target_address_space(1) @_ZL12staticGlobal = #cir.int<555> : !s32i // LLVM-DAG: @_ZL12staticGlobal = internal addrspace(1) global i32 555, align 4 // OGCG-DAG: @_ZL12staticGlobal = internal addrspace(1) global i32 555, align 4 @@ -36,14 +42,16 @@ static int staticGlobal = 555; // Use extern to force emission since const globals are otherwise optimized away. extern const int constGlobal = 456; -// CIR-DAG: cir.global constant external target_address_space(4) @constGlobal = #cir.int<456> : !s32i +// CIR-PRE-DAG: cir.global constant external target_address_space(4) @constGlobal = #cir.int<456> : !s32i +// CIR-DAG: cir.global constant external target_address_space(4) @constGlobal = #cir.int<456> : !s32i // LLVM-DAG: @constGlobal = addrspace(4) constant i32 456, align 4 // OGCG-DAG: @constGlobal = addrspace(4) constant i32 456, align 4 // Test extern const array goes to constant AS. extern const int constArray[3] = {10, 20, 30}; -// CIR-DAG: cir.global constant external target_address_space(4) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3> +// CIR-PRE-DAG: cir.global constant external target_address_space(4) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3> +// CIR-DAG: cir.global constant external target_address_space(4) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3> // LLVM-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4 // OGCG-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4 diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu b/clang/test/CIR/CodeGenCUDA/address-spaces.cu index 1ed52378b99ac..e42626bf17c56 100644 --- a/clang/test/CIR/CodeGenCUDA/address-spaces.cu +++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu @@ -5,6 +5,11 @@ // RUN: -I%S/Inputs/ %s -o %t.cir // RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -fcuda-is-device -fclangir -emit-cir \ +// RUN: -mmlir -mlir-print-ir-before=cir-target-lowering %s -o %t.cir 2> %t-pre.cir +// RUN: FileCheck --check-prefix=CIR-PRE --input-file=%t-pre.cir %s + // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \ // RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \ // RUN: -I%S/Inputs/ %s -o %t.ll @@ -19,17 +24,34 @@ // LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef, align 4 __device__ int a; -// CIR-DEVICE: cir.global external lang_address_space(offload_global) @[[DEV:.*]] = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} +// CIR-PRE: cir.global external lang_address_space(offload_global) @a = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} // LLVM-DEVICE: @[[DEV_LD:.*]] = externally_initialized global i32 0, align 4 // OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global i32 0, align 4 __constant__ int c; -// CIR-DEVICE: cir.global constant external lang_address_space(offload_constant) @[[CONST:.*]] = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} +// CIR-PRE: cir.global constant external lang_address_space(offload_constant) @c = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} // LLVM-DEVICE: @[[CONST_LL:.*]] = externally_initialized constant i32 0, align 4 // OGCG-DEVICE: @[[CONST_OD:.*]] = addrspace(4) externally_initialized constant i32 0, align 4 +__shared__ int k; +// CIR-PRE: cir.global external lang_address_space(offload_local) @k = #cir.poison : !s32i + +__shared__ float b; +// CIR-PRE: cir.global external lang_address_space(offload_local) @b = #cir.poison : !cir.float + // OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef, align 4 +__device__ void foo() { + // CIR-PRE: cir.get_global @a : !cir.ptr<!s32i, lang_address_space(offload_global)> + a++; + + // CIR-PRE: cir.get_global @c : !cir.ptr<!s32i, lang_address_space(offload_constant)> + c++; + + // CIR-PRE: cir.get_global @k : !cir.ptr<!s32i, lang_address_space(offload_local)> + k++; +} + __global__ void fn() { int i = 0; __shared__ int j; diff --git a/clang/test/CIR/Lowering/global-address-space.cir b/clang/test/CIR/Lowering/global-address-space.cir deleted file mode 100644 index 7161d6852acb2..0000000000000 --- a/clang/test/CIR/Lowering/global-address-space.cir +++ /dev/null @@ -1,85 +0,0 @@ -// RUN: cir-opt %s -cir-to-llvm -o %t.mlir -// RUN: FileCheck --input-file=%t.mlir %s - -!s32i = !cir.int<s, 32> - -module attributes { cir.triple = "amdgcn-amd-amdhsa" } { - // Target address space lowering (passthrough) - cir.global external target_address_space(1) @global_target_as1 = #cir.int<42> : !s32i - // CHECK: llvm.mlir.global external @global_target_as1(42 : i32) {addr_space = 1 : i32} : i32 - - cir.global external target_address_space(3) @global_target_as3 = #cir.int<100> : !s32i - // CHECK: llvm.mlir.global external @global_target_as3(100 : i32) {addr_space = 3 : i32} : i32 - - cir.global external @global_default = #cir.int<0> : !s32i - // CHECK: llvm.mlir.global external @global_default(0 : i32) {addr_space = 0 : i32} : i32 - - // Test cir.get_global with address space produces correct llvm.mlir.addressof type - // CHECK-LABEL: llvm.func @test_get_global_as1 - cir.func @test_get_global_as1() -> !s32i { - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_target_as1 : !llvm.ptr<1> - // CHECK: %[[VAL:.*]] = llvm.load %[[ADDR]] {{.*}} : !llvm.ptr<1> -> i32 - // CHECK: llvm.return %[[VAL]] : i32 - %0 = cir.get_global @global_target_as1 : !cir.ptr<!s32i, target_address_space(1)> - %1 = cir.load %0 : !cir.ptr<!s32i, target_address_space(1)>, !s32i - cir.return %1 : !s32i - } - - // CHECK-LABEL: llvm.func @test_get_global_as3 - cir.func @test_get_global_as3() -> !s32i { - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_target_as3 : !llvm.ptr<3> - // CHECK: %[[VAL:.*]] = llvm.load %[[ADDR]] {{.*}} : !llvm.ptr<3> -> i32 - // CHECK: llvm.return %[[VAL]] : i32 - %0 = cir.get_global @global_target_as3 : !cir.ptr<!s32i, target_address_space(3)> - %1 = cir.load %0 : !cir.ptr<!s32i, target_address_space(3)>, !s32i - cir.return %1 : !s32i - } - - // CHECK-LABEL: llvm.func @test_get_global_default - cir.func @test_get_global_default() -> !s32i { - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_default : !llvm.ptr - // CHECK: %[[VAL:.*]] = llvm.load %[[ADDR]] {{.*}} : !llvm.ptr -> i32 - // CHECK: llvm.return %[[VAL]] : i32 - %0 = cir.get_global @global_default : !cir.ptr<!s32i> - %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i - cir.return %1 : !s32i - } - - // Language address space lowering (AMDGPU mapping) - // See: https://llvm.org/docs/AMDGPUUsage.html#address-spaces - // OffloadGlobal -> 1 - cir.global external lang_address_space(offload_global) @global_lang_global = #cir.int<1> : !s32i - // CHECK: llvm.mlir.global external @global_lang_global(1 : i32) {addr_space = 1 : i32} : i32 - - // OffloadLocal -> 3 - cir.global "private" internal lang_address_space(offload_local) @global_lang_local : !s32i - // CHECK: llvm.mlir.global internal @global_lang_local() {addr_space = 3 : i32} : i32 - - // OffloadConstant -> 4 - cir.global external lang_address_space(offload_constant) @global_lang_constant = #cir.int<2> : !s32i - // CHECK: llvm.mlir.global external @global_lang_constant(2 : i32) {addr_space = 4 : i32} : i32 - - // OffloadPrivate -> 5 - cir.global "private" internal lang_address_space(offload_private) @global_lang_private : !s32i - // CHECK: llvm.mlir.global internal @global_lang_private() {addr_space = 5 : i32} : i32 - - // OffloadGeneric -> 0 - cir.global external lang_address_space(offload_generic) @global_lang_generic = #cir.int<3> : !s32i - // CHECK: llvm.mlir.global external @global_lang_generic(3 : i32) {addr_space = 0 : i32} : i32 - - // Pointer type lowering with lang_address_space - // CHECK: llvm.func @test_ptr_lang_as(%arg0: !llvm.ptr<1>) - cir.func @test_ptr_lang_as(%arg0: !cir.ptr<!s32i, lang_address_space(offload_global)>) { - // The alloca stores a pointer to address space 1, but the alloca itself is on the stack (default AS) - // CHECK: llvm.alloca {{.*}} x !llvm.ptr<1> {{.*}} : (i64) -> !llvm.ptr - %0 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_global)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_global)>>, ["arg", init] {alignment = 8 : i64} - cir.return - } - - // CHECK: llvm.func @test_ptr_target_as(%arg0: !llvm.ptr<5>) - cir.func @test_ptr_target_as(%arg0: !cir.ptr<!s32i, target_address_space(5)>) { - // CHECK: llvm.alloca {{.*}} x !llvm.ptr<5> {{.*}} : (i64) -> !llvm.ptr - %0 = cir.alloca !cir.ptr<!s32i, target_address_space(5)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(5)>>, ["arg", init] {alignment = 8 : i64} - cir.return - } -} >From ee6b9adc50de796df9e09b9d1eba9d51613de1a2 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 13 Mar 2026 04:49:06 -0400 Subject: [PATCH 08/14] Use AMDGPU enums to map CIR AS --- .../Transforms/TargetLowering/Targets/AMDGPU.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp index 058c1200531e5..186b2af31bd0c 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp @@ -8,6 +8,7 @@ #include "../TargetLoweringInfo.h" #include "clang/CIR/Dialect/IR/CIROpsEnums.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/ErrorHandling.h" namespace cir { @@ -22,17 +23,17 @@ class AMDGPUTargetLoweringInfo : public TargetLoweringInfo { cir::LangAddressSpace addrSpace) const override { switch (addrSpace) { case cir::LangAddressSpace::Default: - return 0; + return llvm::AMDGPUAS::FLAT_ADDRESS; case cir::LangAddressSpace::OffloadPrivate: - return 5; + return llvm::AMDGPUAS::PRIVATE_ADDRESS; case cir::LangAddressSpace::OffloadLocal: - return 3; + return llvm::AMDGPUAS::LOCAL_ADDRESS; case cir::LangAddressSpace::OffloadGlobal: - return 1; + return llvm::AMDGPUAS::GLOBAL_ADDRESS; case cir::LangAddressSpace::OffloadConstant: - return 4; + return llvm::AMDGPUAS::CONSTANT_ADDRESS; case cir::LangAddressSpace::OffloadGeneric: - return 0; + return llvm::AMDGPUAS::FLAT_ADDRESS; } llvm_unreachable("Unknown CIR address space for AMDGPU target"); } >From ae50e89594932b781d2fb39895e24ff546ee7a0d Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 13 Mar 2026 05:07:12 -0400 Subject: [PATCH 09/14] more fmt --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 73b8415ef589c..b622fa1ef3205 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -3369,8 +3369,8 @@ static void prepareTypeConverter(mlir::LLVMTypeConverter &converter, unsigned numericAS = 0; if (auto targetAsAttr = - mlir::dyn_cast_if_present<cir::TargetAddressSpaceAttr>( - addrSpaceAttr)) + mlir::dyn_cast_if_present<cir::TargetAddressSpaceAttr>( + addrSpaceAttr)) numericAS = targetAsAttr.getValue(); return mlir::LLVM::LLVMPointerType::get(type.getContext(), numericAS); }); >From a50308c131560444c96ce5519b6b5d18480b835a Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 13 Mar 2026 15:05:43 -0400 Subject: [PATCH 10/14] proper amdgpu constant AS encoding --- clang/lib/CIR/CodeGen/TargetInfo.cpp | 7 +++---- clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index 3af2bf135c6a2..f674299168960 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -3,6 +3,7 @@ #include "CIRGenFunction.h" #include "CIRGenModule.h" #include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h" +#include "clang/Basic/AddressSpaces.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" @@ -88,10 +89,8 @@ class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo { // Only promote to address space 4 if VarDecl has constant initialization. if (decl->getType().isConstantStorage(cgm.getASTContext(), false, false) && - decl->hasConstantInitialization()) { - if (auto constAS = cgm.getTarget().getConstantAddressSpace()) - return *constAS; - } + decl->hasConstantInitialization()) + return LangAS::opencl_constant; return defaultGlobalAS; } diff --git a/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp index bee81138471c5..185dea3a8b265 100644 --- a/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp +++ b/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp @@ -42,7 +42,7 @@ static int staticGlobal = 555; // Use extern to force emission since const globals are otherwise optimized away. extern const int constGlobal = 456; -// CIR-PRE-DAG: cir.global constant external target_address_space(4) @constGlobal = #cir.int<456> : !s32i +// CIR-PRE-DAG: cir.global constant external lang_address_space(offload_constant) @constGlobal = #cir.int<456> : !s32i // CIR-DAG: cir.global constant external target_address_space(4) @constGlobal = #cir.int<456> : !s32i // LLVM-DAG: @constGlobal = addrspace(4) constant i32 456, align 4 // OGCG-DAG: @constGlobal = addrspace(4) constant i32 456, align 4 @@ -50,7 +50,7 @@ extern const int constGlobal = 456; // Test extern const array goes to constant AS. extern const int constArray[3] = {10, 20, 30}; -// CIR-PRE-DAG: cir.global constant external target_address_space(4) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3> +// CIR-PRE-DAG: cir.global constant external lang_address_space(offload_constant) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3> // CIR-DAG: cir.global constant external target_address_space(4) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3> // LLVM-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4 // OGCG-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4 >From 9725dc27909aec94cb3cf83c18d76e0584d86628 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 13 Mar 2026 16:44:51 -0400 Subject: [PATCH 11/14] Coverage for AS target lowering and fix generic lowering conversion pattern on alloca types. --- .../CIR/Dialect/Transforms/TargetLowering.cpp | 15 ++++- .../CIR/CodeGen/amdgpu-target-lowering-as.cpp | 66 +++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 clang/test/CIR/CodeGen/amdgpu-target-lowering-as.cpp diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp index 0c1fcbe8f3ee5..aa782aedfda1d 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp @@ -71,7 +71,20 @@ class CIRGenericTargetLoweringPattern : public mlir::ConversionPattern { mlir::OperationState loweredOpState(op->getLoc(), op->getName()); loweredOpState.addOperands(operands); - loweredOpState.addAttributes(op->getAttrs()); + + // Copy attributes, converting any TypeAttr through the type converter so + // that address-space-bearing types (e.g. AllocaOp's allocaType) stay in + // sync with the converted result types. + for (mlir::NamedAttribute attr : op->getAttrs()) { + if (auto typeAttr = mlir::dyn_cast<mlir::TypeAttr>(attr.getValue())) { + mlir::Type converted = typeConverter->convertType(typeAttr.getValue()); + loweredOpState.addAttribute(attr.getName(), + mlir::TypeAttr::get(converted)); + } else { + loweredOpState.addAttribute(attr.getName(), attr.getValue()); + } + } + loweredOpState.addSuccessors(op->getSuccessors()); llvm::SmallVector<mlir::Type> loweredResultTypes; diff --git a/clang/test/CIR/CodeGen/amdgpu-target-lowering-as.cpp b/clang/test/CIR/CodeGen/amdgpu-target-lowering-as.cpp new file mode 100644 index 0000000000000..848fa54286a3e --- /dev/null +++ b/clang/test/CIR/CodeGen/amdgpu-target-lowering-as.cpp @@ -0,0 +1,66 @@ +// Tests for LangAddressSpaceAttr -> TargetAddressSpaceAttr conversion in the +// cir-target-lowering pass for the AMDGPU target. Exercises: +// - CIRGlobalOpTargetLowering (GlobalOp addr_space attribute) +// - CIRFuncOpTargetLowering (FuncOp pointer argument types) +// - CIRGenericTargetLoweringPattern (get_global result pointer types) +// +// AMDGPU address space mapping: +// offload_global -> 1 (GLOBAL_ADDRESS) +// offload_local -> 3 (LOCAL_ADDRESS) +// offload_constant -> 4 (CONSTANT_ADDRESS) +// offload_private -> 5 (PRIVATE_ADDRESS) + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-cir \ +// RUN: -mmlir -mlir-print-ir-before=cir-target-lowering %s -o %t.cir 2> %t.pre.cir +// RUN: FileCheck --check-prefix=PRE --input-file=%t.pre.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=POST --input-file=%t.cir %s + +// ---- GlobalOp: lang AS emitted before target lowering, target AS after ---- + +// Note: opencl_local/opencl_private globals require loader_uninitialized which +// is NYI in CIR. Those AS are exercised via pointer parameter types below. + +int [[clang::opencl_global]] g_global = 0; +int [[clang::opencl_constant]] g_constant = 0; + +// PRE-DAG: cir.global {{.*}} lang_address_space(offload_global) @g_global +// PRE-DAG: cir.global {{.*}} lang_address_space(offload_constant) @g_constant + +// POST-DAG: cir.global {{.*}} target_address_space(1) @g_global +// POST-DAG: cir.global {{.*}} target_address_space(4) @g_constant + +// ---- FuncOp: pointer args exercise all four lang AS -> target AS ---- + +void func_ptr_args(int [[clang::opencl_global]] *global_ptr, + int [[clang::opencl_local]] *local_ptr, + int [[clang::opencl_constant]] *const_ptr, + int [[clang::opencl_private]] *private_ptr) {} + +// PRE: cir.func {{.*}} @_Z13func_ptr_args +// PRE-SAME: !cir.ptr<!s32i, lang_address_space(offload_global)> +// PRE-SAME: !cir.ptr<!s32i, lang_address_space(offload_local)> +// PRE-SAME: !cir.ptr<!s32i, lang_address_space(offload_constant)> +// PRE-SAME: !cir.ptr<!s32i, lang_address_space(offload_private)> + +// POST: cir.func {{.*}} @_Z13func_ptr_args +// POST-SAME: !cir.ptr<!s32i, target_address_space(1)> +// POST-SAME: !cir.ptr<!s32i, target_address_space(3)> +// POST-SAME: !cir.ptr<!s32i, target_address_space(4)> +// POST-SAME: !cir.ptr<!s32i, target_address_space(5)> + +// ---- get_global: result pointer type with lang AS -> target AS ---- + +void get_globals() { + (void)g_global; + (void)g_constant; +} + +// PRE: cir.func {{.*}} @_Z11get_globalsv +// PRE: cir.get_global @g_global : !cir.ptr<!s32i, lang_address_space(offload_global)> +// PRE: cir.get_global @g_constant : !cir.ptr<!s32i, lang_address_space(offload_constant)> + +// POST: cir.func {{.*}} @_Z11get_globalsv +// POST: cir.get_global @g_global : !cir.ptr<!s32i, target_address_space(1)> +// POST: cir.get_global @g_constant : !cir.ptr<!s32i, target_address_space(4)> >From a0abf04a5e7bbe1c5a58a32e4f02920d9986983b Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 13 Mar 2026 17:13:35 -0400 Subject: [PATCH 12/14] add ogcg cuda checks and todo on nptx lowering --- clang/test/CIR/CodeGenCUDA/address-spaces.cu | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu b/clang/test/CIR/CodeGenCUDA/address-spaces.cu index e42626bf17c56..a47a2867e7111 100644 --- a/clang/test/CIR/CodeGenCUDA/address-spaces.cu +++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu @@ -10,35 +10,40 @@ // RUN: -mmlir -mlir-print-ir-before=cir-target-lowering %s -o %t.cir 2> %t-pre.cir // RUN: FileCheck --check-prefix=CIR-PRE --input-file=%t-pre.cir %s +// TODO: Add CIR (post target lowering) and LLVM checks once NVPTX TargetLoweringInfo +// is implemented. + // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \ // RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \ // RUN: -I%S/Inputs/ %s -o %t.ll // RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \ // RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \ // RUN: -I%S/Inputs/ %s -o %t.ll // RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s +// Verifies CIR emits correct address spaces for CUDA globals. + // CIR-DEVICE: cir.global "private" internal dso_local @_ZZ2fnvE1j = #cir.undef : !s32i {alignment = 4 : i64} // LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef, align 4 __device__ int a; // CIR-PRE: cir.global external lang_address_space(offload_global) @a = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} // LLVM-DEVICE: @[[DEV_LD:.*]] = externally_initialized global i32 0, align 4 +// OGCG-DAG: @a = addrspace(1) externally_initialized global i32 0, align 4 // OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global i32 0, align 4 __constant__ int c; // CIR-PRE: cir.global constant external lang_address_space(offload_constant) @c = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} // LLVM-DEVICE: @[[CONST_LL:.*]] = externally_initialized constant i32 0, align 4 +// OGCG-DAG: @c = addrspace(4) externally_initialized constant i32 0, align 4 // OGCG-DEVICE: @[[CONST_OD:.*]] = addrspace(4) externally_initialized constant i32 0, align 4 -__shared__ int k; -// CIR-PRE: cir.global external lang_address_space(offload_local) @k = #cir.poison : !s32i - -__shared__ float b; -// CIR-PRE: cir.global external lang_address_space(offload_local) @b = #cir.poison : !cir.float - // OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef, align 4 __device__ void foo() { @@ -47,9 +52,6 @@ __device__ void foo() { // CIR-PRE: cir.get_global @c : !cir.ptr<!s32i, lang_address_space(offload_constant)> c++; - - // CIR-PRE: cir.get_global @k : !cir.ptr<!s32i, lang_address_space(offload_local)> - k++; } __global__ void fn() { >From 4a919d3a61006d8ab432524ffc58f508a6a84f3c Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 13 Mar 2026 17:54:36 -0400 Subject: [PATCH 13/14] Add table-based CIR -> Target AS mapping --- .../TargetLowering/Targets/AMDGPU.cpp | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp index 186b2af31bd0c..aa396335dc1cb 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp @@ -9,33 +9,32 @@ #include "../TargetLoweringInfo.h" #include "clang/CIR/Dialect/IR/CIROpsEnums.h" #include "llvm/Support/AMDGPUAddrSpace.h" -#include "llvm/Support/ErrorHandling.h" namespace cir { namespace { +// Address space mapping from: +// https://llvm.org/docs/AMDGPUUsage.html#address-spaces +// +// Indexed by cir::LangAddressSpace enum values. +constexpr unsigned AMDGPUAddrSpaceMap[] = { + llvm::AMDGPUAS::FLAT_ADDRESS, // Default + llvm::AMDGPUAS::PRIVATE_ADDRESS, // OffloadPrivate + llvm::AMDGPUAS::LOCAL_ADDRESS, // OffloadLocal + llvm::AMDGPUAS::GLOBAL_ADDRESS, // OffloadGlobal + llvm::AMDGPUAS::CONSTANT_ADDRESS, // OffloadConstant + llvm::AMDGPUAS::FLAT_ADDRESS, // OffloadGeneric +}; + class AMDGPUTargetLoweringInfo : public TargetLoweringInfo { public: - // Address space mapping from: - // https://llvm.org/docs/AMDGPUUsage.html#address-spaces unsigned getTargetAddrSpaceFromCIRAddrSpace( cir::LangAddressSpace addrSpace) const override { - switch (addrSpace) { - case cir::LangAddressSpace::Default: - return llvm::AMDGPUAS::FLAT_ADDRESS; - case cir::LangAddressSpace::OffloadPrivate: - return llvm::AMDGPUAS::PRIVATE_ADDRESS; - case cir::LangAddressSpace::OffloadLocal: - return llvm::AMDGPUAS::LOCAL_ADDRESS; - case cir::LangAddressSpace::OffloadGlobal: - return llvm::AMDGPUAS::GLOBAL_ADDRESS; - case cir::LangAddressSpace::OffloadConstant: - return llvm::AMDGPUAS::CONSTANT_ADDRESS; - case cir::LangAddressSpace::OffloadGeneric: - return llvm::AMDGPUAS::FLAT_ADDRESS; - } - llvm_unreachable("Unknown CIR address space for AMDGPU target"); + auto idx = static_cast<unsigned>(addrSpace); + assert(idx < std::size(AMDGPUAddrSpaceMap) && + "Unknown CIR address space for AMDGPU target"); + return AMDGPUAddrSpaceMap[idx]; } }; >From 5e533531ea02ca23e42f29f724522635733ac5c6 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 20 Mar 2026 18:42:41 -0400 Subject: [PATCH 14/14] fix code dup rebase bug --- clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index a07497f939921..eb322d135a804 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1773,10 +1773,6 @@ void cir::GlobalOp::build( if (addrSpace) odsState.addAttribute(getAddrSpaceAttrName(odsState.name), addrSpace); - addrSpace = normalizeDefaultAddressSpace(addrSpace); - if (addrSpace) - odsState.addAttribute(getAddrSpaceAttrName(odsState.name), addrSpace); - cir::GlobalLinkageKindAttr linkageAttr = cir::GlobalLinkageKindAttr::get(odsBuilder.getContext(), linkage); odsState.addAttribute(getLinkageAttrName(odsState.name), linkageAttr); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
