Author: David Rivera Date: 2026-04-02T03:00:14-04:00 New Revision: e3cbd9984a78422c3799629eb6b5f7f7818c1a11
URL: https://github.com/llvm/llvm-project/commit/e3cbd9984a78422c3799629eb6b5f7f7818c1a11 DIFF: https://github.com/llvm/llvm-project/commit/e3cbd9984a78422c3799629eb6b5f7f7818c1a11.diff LOG: [CIR][AMDGPU] Lower Language specific address spaces and implement AMDGPU target (#179084) Added: clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp clang/test/CIR/CodeGen/amdgpu-target-lowering-as.cpp Modified: clang/lib/CIR/CodeGen/CIRGenModule.cpp clang/lib/CIR/CodeGen/TargetInfo.cpp clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp clang/test/CIR/CodeGenCUDA/address-spaces.cu Removed: clang/test/CIR/Lowering/global-address-space.cir ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 322deae312738..e0681eb760249 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -908,6 +908,7 @@ cir::GlobalOp CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, LangAS langAS, const VarDecl *d, ForDefinition_t isForDefinition) { + // Lookup the entry, lazily creating it if necessary. cir::GlobalOp entry; if (mlir::Operation *v = getGlobalValue(mangledName)) { @@ -918,13 +919,14 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, } if (entry) { + mlir::ptr::MemorySpaceAttrInterface entryCIRAS = entry.getAddrSpaceAttr(); assert(!cir::MissingFeatures::opGlobalWeakRef()); assert(!cir::MissingFeatures::setDLLStorageClass()); assert(!cir::MissingFeatures::openMP()); if (entry.getSymType() == ty && - (cir::isMatchingAddressSpace(entry.getAddrSpaceAttr(), langAS))) + cir::isMatchingAddressSpace(entryCIRAS, langAS)) return entry; // If there are two attempts to define the same mangled name, issue an diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index a5cc74b18a8a0..f674299168960 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -3,6 +3,7 @@ #include "CIRGenFunction.h" #include "CIRGenModule.h" #include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h" +#include "clang/Basic/AddressSpaces.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" @@ -70,6 +71,36 @@ class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo { } } } + + clang::LangAS + getGlobalVarAddressSpace(CIRGenModule &cgm, + const clang::VarDecl *decl) const override { + using clang::LangAS; + assert(!cgm.getLangOpts().OpenCL && + !(cgm.getLangOpts().CUDA && cgm.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + LangAS defaultGlobalAS = LangAS::opencl_global; + if (!decl) + return defaultGlobalAS; + + LangAS addrSpace = decl->getType().getAddressSpace(); + if (addrSpace != LangAS::Default) + return addrSpace; + + // Only promote to address space 4 if VarDecl has constant initialization. + if (decl->getType().isConstantStorage(cgm.getASTContext(), false, false) && + decl->hasConstantInitialization()) + return LangAS::opencl_constant; + + return defaultGlobalAS; + } + + mlir::ptr::MemorySpaceAttrInterface + getCIRAllocaAddressSpace() const override { + return cir::LangAddressSpaceAttr::get( + &getABIInfo().cgt.getMLIRContext(), + cir::LangAddressSpace::OffloadPrivate); + } }; } // namespace @@ -86,7 +117,6 @@ class X8664TargetCIRGenInfo : public TargetCIRGenInfo { X8664TargetCIRGenInfo(CIRGenTypes &cgt) : TargetCIRGenInfo(std::make_unique<X8664ABIInfo>(cgt)) {} }; - } // namespace namespace { diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp index c3ed588cf06dc..aa782aedfda1d 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering.cpp @@ -11,10 +11,15 @@ //===----------------------------------------------------------------------===// #include "TargetLowering/LowerModule.h" +#include "TargetLowering/TargetLoweringInfo.h" +#include "mlir/IR/PatternMatch.h" #include "mlir/Support/LLVM.h" +#include "mlir/Transforms/DialectConversion.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/Dialect/Passes.h" -#include "llvm/ADT/TypeSwitch.h" using namespace mlir; using namespace cir; @@ -32,6 +37,168 @@ struct TargetLoweringPass void runOnOperation() override; }; +/// A generic target lowering pattern that matches any CIR op whose operand or +/// result types need address space conversion. Clones the op with converted +/// types. +class CIRGenericTargetLoweringPattern : public mlir::ConversionPattern { +public: + CIRGenericTargetLoweringPattern(mlir::MLIRContext *context, + const mlir::TypeConverter &typeConverter) + : mlir::ConversionPattern(typeConverter, MatchAnyOpTypeTag(), + /*benefit=*/1, context) {} + + mlir::LogicalResult + matchAndRewrite(mlir::Operation *op, llvm::ArrayRef<mlir::Value> operands, + mlir::ConversionPatternRewriter &rewriter) const override { + // Do not match on operations that have dedicated lowering patterns. + if (llvm::isa<cir::FuncOp, cir::GlobalOp>(op)) + return mlir::failure(); + + const mlir::TypeConverter *typeConverter = getTypeConverter(); + assert(typeConverter && + "CIRGenericTargetLoweringPattern requires a type converter"); + bool operandsAndResultsLegal = typeConverter->isLegal(op); + bool regionsLegal = + std::all_of(op->getRegions().begin(), op->getRegions().end(), + [typeConverter](mlir::Region ®ion) { + return typeConverter->isLegal(®ion); + }); + if (operandsAndResultsLegal && regionsLegal) + return mlir::failure(); + + assert(op->getNumRegions() == 0 && "CIRGenericTargetLoweringPattern cannot " + "deal with operations with regions"); + + mlir::OperationState loweredOpState(op->getLoc(), op->getName()); + loweredOpState.addOperands(operands); + + // Copy attributes, converting any TypeAttr through the type converter so + // that address-space-bearing types (e.g. AllocaOp's allocaType) stay in + // sync with the converted result types. + for (mlir::NamedAttribute attr : op->getAttrs()) { + if (auto typeAttr = mlir::dyn_cast<mlir::TypeAttr>(attr.getValue())) { + mlir::Type converted = typeConverter->convertType(typeAttr.getValue()); + loweredOpState.addAttribute(attr.getName(), + mlir::TypeAttr::get(converted)); + } else { + loweredOpState.addAttribute(attr.getName(), attr.getValue()); + } + } + + loweredOpState.addSuccessors(op->getSuccessors()); + + llvm::SmallVector<mlir::Type> loweredResultTypes; + loweredResultTypes.reserve(op->getNumResults()); + for (mlir::Type result : op->getResultTypes()) + loweredResultTypes.push_back(typeConverter->convertType(result)); + loweredOpState.addTypes(loweredResultTypes); + + for (mlir::Region ®ion : op->getRegions()) { + mlir::Region *loweredRegion = loweredOpState.addRegion(); + rewriter.inlineRegionBefore(region, *loweredRegion, loweredRegion->end()); + if (mlir::failed( + rewriter.convertRegionTypes(loweredRegion, *getTypeConverter()))) + return mlir::failure(); + } + + mlir::Operation *loweredOp = rewriter.create(loweredOpState); + rewriter.replaceOp(op, loweredOp); + return mlir::success(); + } +}; + +/// Pattern to lower GlobalOp address space attributes. GlobalOp carries +/// addr_space as a standalone attribute (not inside a type), so the +/// TypeConverter won't reach it automatically. +class CIRGlobalOpTargetLowering + : public mlir::OpConversionPattern<cir::GlobalOp> { + const cir::TargetLoweringInfo &targetInfo; + +public: + CIRGlobalOpTargetLowering(mlir::MLIRContext *context, + const mlir::TypeConverter &typeConverter, + const cir::TargetLoweringInfo &targetInfo) + : mlir::OpConversionPattern<cir::GlobalOp>(typeConverter, context, + /*benefit=*/1), + targetInfo(targetInfo) {} + + mlir::LogicalResult + matchAndRewrite(cir::GlobalOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + mlir::Type loweredSymTy = getTypeConverter()->convertType(op.getSymType()); + if (!loweredSymTy) + return mlir::failure(); + + // Convert the addr_space attribute. + mlir::ptr::MemorySpaceAttrInterface addrSpace = op.getAddrSpaceAttr(); + if (auto langAS = + mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>(addrSpace)) { + unsigned targetAS = + targetInfo.getTargetAddrSpaceFromCIRAddrSpace(langAS.getValue()); + addrSpace = + targetAS == 0 + ? nullptr + : cir::TargetAddressSpaceAttr::get(op.getContext(), targetAS); + } + + // Only rewrite if something actually changed. + if (loweredSymTy == op.getSymType() && addrSpace == op.getAddrSpaceAttr()) + return mlir::failure(); + + auto newOp = mlir::cast<cir::GlobalOp>(rewriter.clone(*op.getOperation())); + newOp.setSymType(loweredSymTy); + newOp.setAddrSpaceAttr(addrSpace); + rewriter.replaceOp(op, newOp); + return mlir::success(); + } +}; + +/// Pattern to lower FuncOp types that contain address spaces. +class CIRFuncOpTargetLowering : public mlir::OpConversionPattern<cir::FuncOp> { +public: + using mlir::OpConversionPattern<cir::FuncOp>::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::FuncOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + cir::FuncType opFuncType = op.getFunctionType(); + mlir::TypeConverter::SignatureConversion signatureConversion( + opFuncType.getNumInputs()); + + for (const auto &[i, argType] : llvm::enumerate(opFuncType.getInputs())) { + mlir::Type loweredArgType = getTypeConverter()->convertType(argType); + if (!loweredArgType) + return mlir::failure(); + signatureConversion.addInputs(i, loweredArgType); + } + + mlir::Type loweredReturnType = + getTypeConverter()->convertType(opFuncType.getReturnType()); + if (!loweredReturnType) + return mlir::failure(); + + auto loweredFuncType = cir::FuncType::get( + signatureConversion.getConvertedTypes(), loweredReturnType, + /*isVarArg=*/opFuncType.getVarArg()); + + // Nothing changed, skip. + if (loweredFuncType == opFuncType) + return mlir::failure(); + + cir::FuncOp loweredFuncOp = rewriter.cloneWithoutRegions(op); + loweredFuncOp.setFunctionType(loweredFuncType); + rewriter.inlineRegionBefore(op.getBody(), loweredFuncOp.getBody(), + loweredFuncOp.end()); + if (mlir::failed(rewriter.convertRegionTypes(&loweredFuncOp.getBody(), + *getTypeConverter(), + &signatureConversion))) + return mlir::failure(); + + rewriter.eraseOp(op); + return mlir::success(); + } +}; + } // namespace static void convertSyncScopeIfPresent(mlir::Operation *op, @@ -47,6 +214,80 @@ static void convertSyncScopeIfPresent(mlir::Operation *op, } } +/// Prepare the type converter for the target lowering pass. +/// Converts LangAddressSpaceAttr → TargetAddressSpaceAttr inside pointer types. +static void +prepareTargetLoweringTypeConverter(mlir::TypeConverter &converter, + const cir::TargetLoweringInfo &targetInfo) { + converter.addConversion([](mlir::Type type) { return type; }); + + converter.addConversion([&converter, + &targetInfo](cir::PointerType type) -> mlir::Type { + mlir::Type pointee = converter.convertType(type.getPointee()); + if (!pointee) + return {}; + auto addrSpace = type.getAddrSpace(); + if (auto langAS = + mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>(addrSpace)) { + unsigned targetAS = + targetInfo.getTargetAddrSpaceFromCIRAddrSpace(langAS.getValue()); + addrSpace = + targetAS == 0 + ? nullptr + : cir::TargetAddressSpaceAttr::get(type.getContext(), targetAS); + } + return cir::PointerType::get(type.getContext(), pointee, addrSpace); + }); + + converter.addConversion([&converter](cir::ArrayType type) -> mlir::Type { + mlir::Type loweredElementType = + converter.convertType(type.getElementType()); + if (!loweredElementType) + return {}; + return cir::ArrayType::get(loweredElementType, type.getSize()); + }); + + converter.addConversion([&converter](cir::FuncType type) -> mlir::Type { + llvm::SmallVector<mlir::Type> loweredInputTypes; + loweredInputTypes.reserve(type.getNumInputs()); + if (mlir::failed( + converter.convertTypes(type.getInputs(), loweredInputTypes))) + return {}; + + mlir::Type loweredReturnType = converter.convertType(type.getReturnType()); + if (!loweredReturnType) + return {}; + + return cir::FuncType::get(loweredInputTypes, loweredReturnType, + /*isVarArg=*/type.getVarArg()); + }); +} + +static void +populateTargetLoweringConversionTarget(mlir::ConversionTarget &target, + const mlir::TypeConverter &tc) { + target.addLegalOp<mlir::ModuleOp>(); + + target.addDynamicallyLegalDialect<cir::CIRDialect>( + [&tc](mlir::Operation *op) { + if (!tc.isLegal(op)) + return false; + return std::all_of( + op->getRegions().begin(), op->getRegions().end(), + [&tc](mlir::Region ®ion) { return tc.isLegal(®ion); }); + }); + + target.addDynamicallyLegalOp<cir::FuncOp>( + [&tc](cir::FuncOp op) { return tc.isLegal(op.getFunctionType()); }); + + target.addDynamicallyLegalOp<cir::GlobalOp>([&tc](cir::GlobalOp op) { + if (!tc.isLegal(op.getSymType())) + return false; + return !mlir::isa_and_present<cir::LangAddressSpaceAttr>( + op.getAddrSpaceAttr()); + }); +} + void TargetLoweringPass::runOnOperation() { auto mod = mlir::cast<mlir::ModuleOp>(getOperation()); std::unique_ptr<cir::LowerModule> lowerModule = cir::createLowerModule(mod); @@ -57,11 +298,30 @@ void TargetLoweringPass::runOnOperation() { return; } + const auto &targetInfo = lowerModule->getTargetLoweringInfo(); + mod->walk([&](mlir::Operation *op) { if (mlir::isa<cir::LoadOp, cir::StoreOp, cir::AtomicXchgOp, cir::AtomicCmpXchgOp, cir::AtomicFetchOp>(op)) convertSyncScopeIfPresent(op, *lowerModule); }); + + // Address space conversion: LangAddressSpaceAttr → TargetAddressSpaceAttr. + mlir::TypeConverter typeConverter; + prepareTargetLoweringTypeConverter(typeConverter, targetInfo); + + mlir::RewritePatternSet patterns(mod.getContext()); + patterns.add<CIRGlobalOpTargetLowering>(mod.getContext(), typeConverter, + targetInfo); + patterns.add<CIRFuncOpTargetLowering>(typeConverter, mod.getContext()); + patterns.add<CIRGenericTargetLoweringPattern>(mod.getContext(), + typeConverter); + + mlir::ConversionTarget target(*mod.getContext()); + populateTargetLoweringConversionTarget(target, typeConverter); + + if (failed(mlir::applyPartialConversion(mod, target, std::move(patterns)))) + signalPassFailure(); } std::unique_ptr<Pass> mlir::createTargetLoweringPass() { diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt index 92148127424e9..07e3a67f97859 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt @@ -3,6 +3,7 @@ add_clang_library(MLIRCIRTargetLowering LowerModule.cpp LowerItaniumCXXABI.cpp TargetLoweringInfo.cpp + Targets/AMDGPU.cpp DEPENDS clangBasic diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp index f2398e3105578..26e63b3b676ae 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp @@ -45,8 +45,15 @@ static std::unique_ptr<CIRCXXABI> createCXXABI(LowerModule &lm) { static std::unique_ptr<TargetLoweringInfo> createTargetLoweringInfo(LowerModule &lm) { - assert(!cir::MissingFeatures::targetLoweringInfo()); - return std::make_unique<TargetLoweringInfo>(); + const llvm::Triple &triple = lm.getTarget().getTriple(); + + switch (triple.getArch()) { + case llvm::Triple::amdgcn: + return createAMDGPUTargetLoweringInfo(); + default: + assert(!cir::MissingFeatures::targetLoweringInfo()); + return std::make_unique<TargetLoweringInfo>(); + } } LowerModule::LowerModule(clang::LangOptions langOpts, diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h index 760c3b0b7cc5e..a307bcb373dec 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h @@ -15,6 +15,8 @@ #define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_TARGETLOWERINGINFO_H #include "clang/CIR/Dialect/IR/CIROpsEnums.h" +#include <memory> +#include <string> namespace cir { @@ -24,8 +26,16 @@ class TargetLoweringInfo { virtual cir::SyncScopeKind convertSyncScope(cir::SyncScopeKind syncScope) const; + + virtual unsigned + getTargetAddrSpaceFromCIRAddrSpace(cir::LangAddressSpace addrSpace) const { + return 0; + }; }; +// Target-specific factory functions. +std::unique_ptr<TargetLoweringInfo> createAMDGPUTargetLoweringInfo(); + } // namespace cir #endif diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp new file mode 100644 index 0000000000000..aa396335dc1cb --- /dev/null +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp @@ -0,0 +1,47 @@ +//===- AMDGPU.cpp - Emit CIR for AMDGPU -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../TargetLoweringInfo.h" +#include "clang/CIR/Dialect/IR/CIROpsEnums.h" +#include "llvm/Support/AMDGPUAddrSpace.h" + +namespace cir { + +namespace { + +// Address space mapping from: +// https://llvm.org/docs/AMDGPUUsage.html#address-spaces +// +// Indexed by cir::LangAddressSpace enum values. +constexpr unsigned AMDGPUAddrSpaceMap[] = { + llvm::AMDGPUAS::FLAT_ADDRESS, // Default + llvm::AMDGPUAS::PRIVATE_ADDRESS, // OffloadPrivate + llvm::AMDGPUAS::LOCAL_ADDRESS, // OffloadLocal + llvm::AMDGPUAS::GLOBAL_ADDRESS, // OffloadGlobal + llvm::AMDGPUAS::CONSTANT_ADDRESS, // OffloadConstant + llvm::AMDGPUAS::FLAT_ADDRESS, // OffloadGeneric +}; + +class AMDGPUTargetLoweringInfo : public TargetLoweringInfo { +public: + unsigned getTargetAddrSpaceFromCIRAddrSpace( + cir::LangAddressSpace addrSpace) const override { + auto idx = static_cast<unsigned>(addrSpace); + assert(idx < std::size(AMDGPUAddrSpaceMap) && + "Unknown CIR address space for AMDGPU target"); + return AMDGPUAddrSpaceMap[idx]; + } +}; + +} // namespace + +std::unique_ptr<TargetLoweringInfo> createAMDGPUTargetLoweringInfo() { + return std::make_unique<AMDGPUTargetLoweringInfo>(); +} + +} // namespace cir diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 5cc121fbecc8e..b622fa1ef3205 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -3368,12 +3368,9 @@ static void prepareTypeConverter(mlir::LLVMTypeConverter &converter, mlir::ptr::MemorySpaceAttrInterface addrSpaceAttr = type.getAddrSpace(); unsigned numericAS = 0; - if (auto langAsAttr = - mlir::dyn_cast_if_present<cir::LangAddressSpaceAttr>(addrSpaceAttr)) - llvm_unreachable("lowering LangAddressSpaceAttr NYI"); - else if (auto targetAsAttr = - mlir::dyn_cast_if_present<cir::TargetAddressSpaceAttr>( - addrSpaceAttr)) + if (auto targetAsAttr = + mlir::dyn_cast_if_present<cir::TargetAddressSpaceAttr>( + addrSpaceAttr)) numericAS = targetAsAttr.getValue(); return mlir::LLVM::LLVMPointerType::get(type.getContext(), numericAS); }); diff --git a/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp new file mode 100644 index 0000000000000..185dea3a8b265 --- /dev/null +++ b/clang/test/CIR/CodeGen/amdgpu-address-spaces.cpp @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-cir \ +// RUN: -mmlir -mlir-print-ir-before=cir-target-lowering %s -o %t.cir 2> %t-pre.cir +// RUN: FileCheck --check-prefix=CIR-PRE --input-file=%t-pre.cir %s + +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// Test address space handling for AMDGPU target in C++ mode (non-OpenCL/HIP). +// This exercises getGlobalVarAddressSpace. + +// Test default address space for globals without explicit AS. +// For AMDGPU in non-OpenCL/HIP mode, globals default to AS 1 (global). +int globalVar = 123; + +// CIR-PRE-DAG: cir.global external lang_address_space(offload_global) @globalVar = #cir.int<123> : !s32i +// CIR-DAG: cir.global external target_address_space(1) @globalVar = #cir.int<123> : !s32i +// LLVM-DAG: @globalVar = addrspace(1) global i32 123, align 4 +// OGCG-DAG: @globalVar = addrspace(1) global i32 123, align 4 + +// Test non-const global array goes to global AS. +int globalArray[4] = {1, 2, 3, 4}; + +// CIR-PRE-DAG: cir.global external lang_address_space(offload_global) @globalArray = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 4> +// CIR-DAG: cir.global external target_address_space(1) @globalArray = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 4> +// LLVM-DAG: @globalArray = addrspace(1) global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 +// OGCG-DAG: @globalArray = addrspace(1) global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 + +// Test static global goes to global AS. +static int staticGlobal = 555; + +// CIR-PRE-DAG: cir.global "private" internal{{.*}}lang_address_space(offload_global) @_ZL12staticGlobal = #cir.int<555> : !s32i +// CIR-DAG: cir.global "private" internal{{.*}}target_address_space(1) @_ZL12staticGlobal = #cir.int<555> : !s32i +// LLVM-DAG: @_ZL12staticGlobal = internal addrspace(1) global i32 555, align 4 +// OGCG-DAG: @_ZL12staticGlobal = internal addrspace(1) global i32 555, align 4 + +// Test constant initialization promotion to AS 4 (constant). +// Use extern to force emission since const globals are otherwise optimized away. +extern const int constGlobal = 456; + +// CIR-PRE-DAG: cir.global constant external lang_address_space(offload_constant) @constGlobal = #cir.int<456> : !s32i +// CIR-DAG: cir.global constant external target_address_space(4) @constGlobal = #cir.int<456> : !s32i +// LLVM-DAG: @constGlobal = addrspace(4) constant i32 456, align 4 +// OGCG-DAG: @constGlobal = addrspace(4) constant i32 456, align 4 + +// Test extern const array goes to constant AS. +extern const int constArray[3] = {10, 20, 30}; + +// CIR-PRE-DAG: cir.global constant external lang_address_space(offload_constant) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3> +// CIR-DAG: cir.global constant external target_address_space(4) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3> +// LLVM-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4 +// OGCG-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4 + +// Use the static variable to ensure it's emitted. +int getStaticGlobal() { return staticGlobal; } diff --git a/clang/test/CIR/CodeGen/amdgpu-target-lowering-as.cpp b/clang/test/CIR/CodeGen/amdgpu-target-lowering-as.cpp new file mode 100644 index 0000000000000..848fa54286a3e --- /dev/null +++ b/clang/test/CIR/CodeGen/amdgpu-target-lowering-as.cpp @@ -0,0 +1,66 @@ +// Tests for LangAddressSpaceAttr -> TargetAddressSpaceAttr conversion in the +// cir-target-lowering pass for the AMDGPU target. Exercises: +// - CIRGlobalOpTargetLowering (GlobalOp addr_space attribute) +// - CIRFuncOpTargetLowering (FuncOp pointer argument types) +// - CIRGenericTargetLoweringPattern (get_global result pointer types) +// +// AMDGPU address space mapping: +// offload_global -> 1 (GLOBAL_ADDRESS) +// offload_local -> 3 (LOCAL_ADDRESS) +// offload_constant -> 4 (CONSTANT_ADDRESS) +// offload_private -> 5 (PRIVATE_ADDRESS) + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-cir \ +// RUN: -mmlir -mlir-print-ir-before=cir-target-lowering %s -o %t.cir 2> %t.pre.cir +// RUN: FileCheck --check-prefix=PRE --input-file=%t.pre.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=POST --input-file=%t.cir %s + +// ---- GlobalOp: lang AS emitted before target lowering, target AS after ---- + +// Note: opencl_local/opencl_private globals require loader_uninitialized which +// is NYI in CIR. Those AS are exercised via pointer parameter types below. + +int [[clang::opencl_global]] g_global = 0; +int [[clang::opencl_constant]] g_constant = 0; + +// PRE-DAG: cir.global {{.*}} lang_address_space(offload_global) @g_global +// PRE-DAG: cir.global {{.*}} lang_address_space(offload_constant) @g_constant + +// POST-DAG: cir.global {{.*}} target_address_space(1) @g_global +// POST-DAG: cir.global {{.*}} target_address_space(4) @g_constant + +// ---- FuncOp: pointer args exercise all four lang AS -> target AS ---- + +void func_ptr_args(int [[clang::opencl_global]] *global_ptr, + int [[clang::opencl_local]] *local_ptr, + int [[clang::opencl_constant]] *const_ptr, + int [[clang::opencl_private]] *private_ptr) {} + +// PRE: cir.func {{.*}} @_Z13func_ptr_args +// PRE-SAME: !cir.ptr<!s32i, lang_address_space(offload_global)> +// PRE-SAME: !cir.ptr<!s32i, lang_address_space(offload_local)> +// PRE-SAME: !cir.ptr<!s32i, lang_address_space(offload_constant)> +// PRE-SAME: !cir.ptr<!s32i, lang_address_space(offload_private)> + +// POST: cir.func {{.*}} @_Z13func_ptr_args +// POST-SAME: !cir.ptr<!s32i, target_address_space(1)> +// POST-SAME: !cir.ptr<!s32i, target_address_space(3)> +// POST-SAME: !cir.ptr<!s32i, target_address_space(4)> +// POST-SAME: !cir.ptr<!s32i, target_address_space(5)> + +// ---- get_global: result pointer type with lang AS -> target AS ---- + +void get_globals() { + (void)g_global; + (void)g_constant; +} + +// PRE: cir.func {{.*}} @_Z11get_globalsv +// PRE: cir.get_global @g_global : !cir.ptr<!s32i, lang_address_space(offload_global)> +// PRE: cir.get_global @g_constant : !cir.ptr<!s32i, lang_address_space(offload_constant)> + +// POST: cir.func {{.*}} @_Z11get_globalsv +// POST: cir.get_global @g_global : !cir.ptr<!s32i, target_address_space(1)> +// POST: cir.get_global @g_constant : !cir.ptr<!s32i, target_address_space(4)> diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu b/clang/test/CIR/CodeGenCUDA/address-spaces.cu index 1ed52378b99ac..a47a2867e7111 100644 --- a/clang/test/CIR/CodeGenCUDA/address-spaces.cu +++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu @@ -5,31 +5,55 @@ // RUN: -I%S/Inputs/ %s -o %t.cir // RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -fcuda-is-device -fclangir -emit-cir \ +// RUN: -mmlir -mlir-print-ir-before=cir-target-lowering %s -o %t.cir 2> %t-pre.cir +// RUN: FileCheck --check-prefix=CIR-PRE --input-file=%t-pre.cir %s + +// TODO: Add CIR (post target lowering) and LLVM checks once NVPTX TargetLoweringInfo +// is implemented. + // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \ // RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \ // RUN: -I%S/Inputs/ %s -o %t.ll // RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \ // RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \ // RUN: -I%S/Inputs/ %s -o %t.ll // RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s +// Verifies CIR emits correct address spaces for CUDA globals. + // CIR-DEVICE: cir.global "private" internal dso_local @_ZZ2fnvE1j = #cir.undef : !s32i {alignment = 4 : i64} // LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef, align 4 __device__ int a; -// CIR-DEVICE: cir.global external lang_address_space(offload_global) @[[DEV:.*]] = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} +// CIR-PRE: cir.global external lang_address_space(offload_global) @a = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} // LLVM-DEVICE: @[[DEV_LD:.*]] = externally_initialized global i32 0, align 4 +// OGCG-DAG: @a = addrspace(1) externally_initialized global i32 0, align 4 // OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global i32 0, align 4 __constant__ int c; -// CIR-DEVICE: cir.global constant external lang_address_space(offload_constant) @[[CONST:.*]] = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} +// CIR-PRE: cir.global constant external lang_address_space(offload_constant) @c = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} // LLVM-DEVICE: @[[CONST_LL:.*]] = externally_initialized constant i32 0, align 4 +// OGCG-DAG: @c = addrspace(4) externally_initialized constant i32 0, align 4 // OGCG-DEVICE: @[[CONST_OD:.*]] = addrspace(4) externally_initialized constant i32 0, align 4 // OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef, align 4 +__device__ void foo() { + // CIR-PRE: cir.get_global @a : !cir.ptr<!s32i, lang_address_space(offload_global)> + a++; + + // CIR-PRE: cir.get_global @c : !cir.ptr<!s32i, lang_address_space(offload_constant)> + c++; +} + __global__ void fn() { int i = 0; __shared__ int j; diff --git a/clang/test/CIR/Lowering/global-address-space.cir b/clang/test/CIR/Lowering/global-address-space.cir deleted file mode 100644 index c9f25e1126098..0000000000000 --- a/clang/test/CIR/Lowering/global-address-space.cir +++ /dev/null @@ -1,46 +0,0 @@ -// RUN: cir-opt %s -cir-to-llvm -o %t.mlir -// RUN: FileCheck --input-file=%t.mlir %s - -!s32i = !cir.int<s, 32> - -module { - cir.global external target_address_space(1) @global_as1 = #cir.int<42> : !s32i - // CHECK: llvm.mlir.global external @global_as1(42 : i32) {addr_space = 1 : i32} : i32 - - cir.global external target_address_space(3) @global_as3 = #cir.int<100> : !s32i - // CHECK: llvm.mlir.global external @global_as3(100 : i32) {addr_space = 3 : i32} : i32 - - cir.global external @global_default = #cir.int<0> : !s32i - // CHECK: llvm.mlir.global external @global_default(0 : i32) {addr_space = 0 : i32} : i32 - - // Test cir.get_global with address space produces correct llvm.mlir.addressof type - // CHECK-LABEL: llvm.func @test_get_global_as1 - cir.func @test_get_global_as1() -> !s32i { - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_as1 : !llvm.ptr<1> - // CHECK: %[[VAL:.*]] = llvm.load %[[ADDR]] {{.*}} : !llvm.ptr<1> -> i32 - // CHECK: llvm.return %[[VAL]] : i32 - %0 = cir.get_global @global_as1 : !cir.ptr<!s32i, target_address_space(1)> - %1 = cir.load %0 : !cir.ptr<!s32i, target_address_space(1)>, !s32i - cir.return %1 : !s32i - } - - // CHECK-LABEL: llvm.func @test_get_global_as3 - cir.func @test_get_global_as3() -> !s32i { - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_as3 : !llvm.ptr<3> - // CHECK: %[[VAL:.*]] = llvm.load %[[ADDR]] {{.*}} : !llvm.ptr<3> -> i32 - // CHECK: llvm.return %[[VAL]] : i32 - %0 = cir.get_global @global_as3 : !cir.ptr<!s32i, target_address_space(3)> - %1 = cir.load %0 : !cir.ptr<!s32i, target_address_space(3)>, !s32i - cir.return %1 : !s32i - } - - // CHECK-LABEL: llvm.func @test_get_global_default - cir.func @test_get_global_default() -> !s32i { - // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @global_default : !llvm.ptr - // CHECK: %[[VAL:.*]] = llvm.load %[[ADDR]] {{.*}} : !llvm.ptr -> i32 - // CHECK: llvm.return %[[VAL]] : i32 - %0 = cir.get_global @global_default : !cir.ptr<!s32i> - %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i - cir.return %1 : !s32i - } -} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
