[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/118462 >From 0eae465092e76474a7e87f5617748d091a5d7ca3 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 3 Dec 2024 10:12:36 + Subject: [PATCH 1/6] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM --- .../llvm}/CodeGen/RegAllocPriorityAdvisor.h | 78 +++- llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Passes/MachinePassRegistry.def | 1 + llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp | 6 +- .../lib/CodeGen/MLRegAllocPriorityAdvisor.cpp | 184 +++--- llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp | 2 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 9 +- llvm/lib/CodeGen/RegAllocGreedy.h | 2 +- llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp | 155 +++ llvm/lib/Passes/PassBuilder.cpp | 1 + 10 files changed, 320 insertions(+), 120 deletions(-) rename llvm/{lib => include/llvm}/CodeGen/RegAllocPriorityAdvisor.h (57%) diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h b/llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h similarity index 57% rename from llvm/lib/CodeGen/RegAllocPriorityAdvisor.h rename to llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h index 0758743c2b1403..a53739fdc3fc40 100644 --- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h +++ b/llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h @@ -9,8 +9,10 @@ #ifndef LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H #define LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/RegAllocEvictionAdvisor.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" namespace llvm { @@ -68,12 +70,72 @@ class DummyPriorityAdvisor : public RegAllocPriorityAdvisor { unsigned getPriority(const LiveInterval &LI) const override; }; -class RegAllocPriorityAdvisorAnalysis : public ImmutablePass { +/// Common provider for getting the priority advisor and logging rewards. +/// Legacy analysis forwards all calls to this provider. +/// New analysis serves the provider as the analysis result. +/// Expensive setup is done in the constructor, so that the advisor can be +/// created quickly for every machine function. +/// TODO: Remove once legacy PM support is dropped. +class RegAllocPriorityAdvisorProvider { public: enum class AdvisorMode : int { Default, Release, Development, Dummy }; - RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode) - : ImmutablePass(ID), Mode(Mode){}; + RegAllocPriorityAdvisorProvider(AdvisorMode Mode) : Mode(Mode) {} + + virtual ~RegAllocPriorityAdvisorProvider() = default; + + virtual void logRewardIfNeeded(const MachineFunction &MF, + llvm::function_ref GetReward) {}; + + virtual std::unique_ptr + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0; + + void setAnalyses(SlotIndexes *SI) { this->SI = SI; } + + AdvisorMode getAdvisorMode() const { return Mode; } + +protected: + SlotIndexes *SI; + +private: + const AdvisorMode Mode; +}; + +RegAllocPriorityAdvisorProvider *createReleaseModePriorityAdvisorProvider(); + +RegAllocPriorityAdvisorProvider * +createDevelopmentModePriorityAdvisorProvider(LLVMContext &Ctx); + +class RegAllocPriorityAdvisorAnalysis +: public AnalysisInfoMixin { + static AnalysisKey Key; + friend AnalysisInfoMixin; + +public: + struct Result { +// Owned by this analysis. +RegAllocPriorityAdvisorProvider *Provider; + +bool invalidate(MachineFunction &MF, const PreservedAnalyses &PA, +MachineFunctionAnalysisManager::Invalidator &Inv) { + auto PAC = PA.getChecker(); + return !PAC.preservedWhenStateless() || + Inv.invalidate(MF, PA); +} + }; + + Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + +private: + void initializeProvider(LLVMContext &Ctx); + std::unique_ptr Provider; +}; + +class RegAllocPriorityAdvisorAnalysisLegacy : public ImmutablePass { +public: + using AdvisorMode = RegAllocPriorityAdvisorProvider::AdvisorMode; + RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode Mode) + : ImmutablePass(ID), Mode(Mode) {}; static char ID; /// Get an advisor for the given context (i.e. machine function, etc) @@ -81,7 +143,7 @@ class RegAllocPriorityAdvisorAnalysis : public ImmutablePass { getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0; AdvisorMode getAdvisorMode() const { return Mode; } virtual void logRewardIfNeeded(const MachineFunction &MF, - llvm::function_ref GetReward){}; + llvm::function_ref GetReward) {}; protected: // This analysis preserves everything, and subclasses may have additional @@ -97,11 +159,13 @@ class RegAllocPriorityAdvisorAnalysis : public ImmutablePass { /// Specialization for the API used by the analysis infrastructure to create /// an instan
[llvm-branch-commits] [lldb] 21f62ea - Revert "[lldb][DWARFASTParserClang] Make C++ method parsing aware of explicit…"
Author: Michael Buch Date: 2025-01-23T11:19:37Z New Revision: 21f62eaa7e28867b02d356a97a4fe134eb5d1f59 URL: https://github.com/llvm/llvm-project/commit/21f62eaa7e28867b02d356a97a4fe134eb5d1f59 DIFF: https://github.com/llvm/llvm-project/commit/21f62eaa7e28867b02d356a97a4fe134eb5d1f59.diff LOG: Revert "[lldb][DWARFASTParserClang] Make C++ method parsing aware of explicit…" This reverts commit ad6d808906075c3386bbeada3c37d8d3e6afe248. Added: Modified: lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp Removed: diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 682ee6d287bf5c..f54b7fc9cdad24 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -173,9 +173,7 @@ GetCXXObjectParameter(const DWARFDIE &subprogram, if (!DeclKindIsCXXClass(containing_decl_ctx.getDeclKind())) return {}; - if (DWARFDIE object_parameter = - subprogram.GetAttributeValueAsReferenceDIE(DW_AT_object_pointer)) -return object_parameter; + // FIXME: if subprogram has a explicit DW_AT_object_pointer, use it. // If no DW_AT_object_pointer was specified, assume the implicit object // parameter is the first parameter to the function, is called "this" and is @@ -217,6 +215,11 @@ static unsigned GetCXXMethodCVQuals(const DWARFDIE &subprogram, return 0; uint32_t encoding_mask = this_type->GetEncodingMask(); + + // FIXME: explicit object parameters need not to be pointers + if (!(encoding_mask & (1u << Type::eEncodingIsPointerUID))) +return 0; + unsigned cv_quals = 0; if (encoding_mask & (1u << Type::eEncodingIsConstUID)) cv_quals |= clang::Qualifiers::Const; diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp index 9c0300be08a78a..b31f56aa372d58 100644 --- a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp +++ b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp @@ -902,181 +902,3 @@ TEST_F(DWARFASTParserClangTests, TestParseDWARFAttributes_ObjectPointer) { EXPECT_TRUE(attrs.object_pointer.IsValid()); EXPECT_EQ(attrs.object_pointer, param_die); } - -TEST_F(DWARFASTParserClangTests, TestParseSubroutine_ExplicitObjectParameter) { - // Tests parsing of a C++ non-static member function with an explicit object - // parameter that isn't called "this" and is not a pointer (but a CV-qualified - // rvalue reference instead). - - const char *yamldata = R"( !ELF -FileHeader: - Class: ELFCLASS64 - Data:ELFDATA2LSB - Type:ET_EXEC - Machine: EM_AARCH64 -DWARF: - debug_str: -- Context -- func -- mySelf - debug_abbrev: -- ID: 0 - Table: -- Code:0x1 - Tag: DW_TAG_compile_unit - Children:DW_CHILDREN_yes - Attributes: -- Attribute: DW_AT_language - Form:DW_FORM_data2 -- Code:0x2 - Tag: DW_TAG_structure_type - Children:DW_CHILDREN_yes - Attributes: -- Attribute: DW_AT_name - Form:DW_FORM_strp -- Code:0x3 - Tag: DW_TAG_subprogram - Children:DW_CHILDREN_yes - Attributes: -- Attribute: DW_AT_name - Form:DW_FORM_strp -- Attribute: DW_AT_declaration - Form:DW_FORM_flag_present -- Attribute: DW_AT_object_pointer - Form:DW_FORM_ref4 -- Attribute: DW_AT_external - Form:DW_FORM_flag_present -- Code:0x4 - Tag: DW_TAG_formal_parameter - Children:DW_CHILDREN_no - Attributes: -- Attribute: DW_AT_name - Form:DW_FORM_strp -- Attribute: DW_AT_type - Form:DW_FORM_ref4 -- Code:0x5 - Tag: DW_TAG_rvalue_reference_type - Children:DW_CHILDREN_no - Attributes: -- Attribute: DW_AT_type - Form:DW_FORM_ref4 -- Code:0x6 - Tag: DW_TAG_const_type - Children:DW_CHILDREN_no - Attributes: -- Attribute: DW_AT_type - Form:DW_FORM_ref4 -- Code:0x7 - Tag: DW_TAG_volatile_type - Children:DW_CHILDREN_no - Attributes: -
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
tblah wrote: I'm not sure why the bot didn't run on this. @llvm/pr-subscribers-flang-openmp https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
https://github.com/tblah edited https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] bb21661 - Revert "[lldb][test] Remove compiler version check and use regex (#123393)"
Author: Michael Buch Date: 2025-01-23T11:27:19Z New Revision: bb21661782242f931f3d04eb8fed9be792bd4ef8 URL: https://github.com/llvm/llvm-project/commit/bb21661782242f931f3d04eb8fed9be792bd4ef8 DIFF: https://github.com/llvm/llvm-project/commit/bb21661782242f931f3d04eb8fed9be792bd4ef8.diff LOG: Revert "[lldb][test] Remove compiler version check and use regex (#123393)" This reverts commit b62e55803c52ca04093a0eea361407e849dc23e1. Added: Modified: lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py Removed: diff --git a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py index 759077302bfca4..1c3e64f14c 100644 --- a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py @@ -23,6 +23,13 @@ def test(self): self.runCmd("settings set target.import-std-module true") +if self.expectedCompiler(["clang"]) and self.expectedCompilerVersion( +[">", "16.0"] +): +vector_type = "std::vector" +else: +vector_type = "std::vector >" + size_type = "size_type" value_type = "value_type" iterator = "iterator" @@ -34,14 +41,13 @@ def test(self): ValueCheck(name="current"), ] -self.expect( -"expr a", -patterns=[ -"""\(std::vector )*>\) \$0 = size=3 \{ - \[0\] = \(a = 3\) - \[1\] = \(a = 1\) - \[2\] = \(a = 2\) -\}""" +self.expect_expr( +"a", +result_type=vector_type, +result_children=[ +ValueCheck(children=[ValueCheck(value="3")]), +ValueCheck(children=[ValueCheck(value="1")]), +ValueCheck(children=[ValueCheck(value="2")]), ], ) diff --git a/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py index e18785ec1359cc..a1f33271f39d2f 100644 --- a/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py @@ -17,26 +17,42 @@ def test(self): self, "// Set break point at this line.", lldb.SBFileSpec("main.cpp") ) +if self.expectedCompiler(["clang"]) and self.expectedCompilerVersion( +[">", "16.0"] +): +vector_type = "std::vector" +vector_of_vector_type = "std::vector >" +else: +vector_type = "std::vector" +vector_of_vector_type = ( +"std::vector, std::allocator > >" +) + size_type = "size_type" value_type = "value_type" self.runCmd("settings set target.import-std-module true") -self.expect( -"expr a", -patterns=[ -"""\(std::vector(, std::allocator )* >\) \$0 = size=2 \{ - \[0\] = size=3 \{ -\[0\] = 1 -\[1\] = 2 -\[2\] = 3 - \} - \[1\] = size=3 \{ -\[0\] = 3 -\[1\] = 2 -\[2\] = 1 - \} -\}""" +self.expect_expr( +"a", +result_type=vector_of_vector_type, +result_children=[ +ValueCheck( +type=vector_type, +children=[ +ValueCheck(value="1"), +ValueCheck(value="2"), +ValueCheck(value="3"), +], +), +ValueCheck( +type=vector_type, +children=[ +ValueCheck(value="3"), +ValueCheck(value="2"), +ValueCheck(value="1"), +], +), ], ) self.expect_expr("a.size()", result_type=size_type, result_value="2") ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
https://github.com/tblah edited https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -488,44 +559,34 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, mlir::OpBuilder::InsertionGuard guard(firOpBuilder); firOpBuilder.setInsertionPointToStart(moduleOp.getBody()); auto result = firOpBuilder.create( -symLoc, uniquePrivatizerName, symType, +symLoc, uniquePrivatizerName, allocType, isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate : mlir::omp::DataSharingClauseType::Private); fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym); lower::SymMapScope outerScope(symTable); -// Populate the `alloc` region. -{ - mlir::Region &allocRegion = result.getAllocRegion(); - mlir::Block *allocEntryBlock = firOpBuilder.createBlock( - &allocRegion, /*insertPt=*/{}, symType, symLoc); - - firOpBuilder.setInsertionPointToEnd(allocEntryBlock); - - fir::ExtendedValue localExV = - hlfir::translateToExtendedValue( - symLoc, firOpBuilder, hlfir::Entity{allocRegion.getArgument(0)}, - /*contiguousHint=*/ - evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext())) - .first; - - symTable.addSymbol(*sym, localExV); - lower::SymMapScope innerScope(symTable); - cloneSymbol(sym); - mlir::Value cloneAddr = symTable.shallowLookupSymbol(*sym).getAddr(); - mlir::Type cloneType = cloneAddr.getType(); - - // A `convert` op is required for variables that are storage associated - // via `equivalence`. The problem is that these variables are declared as - // `fir.ptr`s while their privatized storage is declared as `fir.ref`, - // therefore we convert to proper symbol type. - mlir::Value yieldedValue = - (symType == cloneType) ? cloneAddr - : firOpBuilder.createConvert( - cloneAddr.getLoc(), symType, cloneAddr); - - firOpBuilder.create(hsb.getAddr().getLoc(), - yieldedValue); +// Populate the `init` region. +const bool needsInitialization = ergawy wrote: Can you comment this variable providing examples for each case where it is set to true? The condition is a bit complex specially the firts part. https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
https://github.com/ergawy edited https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -20,25 +20,42 @@ namespace mlir { class Region; } // namespace mlir +namespace Fortran { +namespace semantics { +class Symbol; +} // namespace semantics +} // namespace Fortran + namespace fir { class FirOpBuilder; class ShapeShiftOp; } // namespace fir namespace Fortran { namespace lower { +class AbstractConverter; + namespace omp { +enum class DeclOperationKind { Private, FirstPrivate, Reduction }; +inline bool isPrivatization(DeclOperationKind kind) { + return (kind == DeclOperationKind::FirstPrivate) || + (kind == DeclOperationKind::Private); +} +inline bool isReduction(DeclOperationKind kind) { + return kind == DeclOperationKind::Reduction; +} + /// Generate init and cleanup regions suitable for reduction or privatizer /// declarations. `scalarInitValue` may be nullptr if there is no default -/// initialization (for privatization). -void populateByRefInitAndCleanupRegions(fir::FirOpBuilder &builder, -mlir::Location loc, mlir::Type argType, -mlir::Value scalarInitValue, -mlir::Block *initBlock, -mlir::Value allocatedPrivVarArg, -mlir::Value moldArg, -mlir::Region &cleanupRegion); +/// initialization (for privatization). If this is for a privatizer, set +/// `isPrivate` to `true`. ergawy wrote: I think this needs to be updated to refer to `kind`? https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -132,7 +168,7 @@ void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) { if (needInitClone()) { Fortran::lower::initializeCloneAtRuntime(converter, *sym, symTable); -callsInitClone = true; +mightHaveReadMoldArg = true; ergawy wrote: The name of this field is a bit confusing because: 1. it does not specify where the mold arg comes from (in this case it is the `init` region's mold arg), 2. its use here, somewhat implies that `DataSharingProcessor::cloneSymbol` is exclusively called for the delayed privatization case, which seems to be the opposite looking the changes below. Maybe it can be named: `initializedCloneFromHostSym`, or something similar? https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -96,17 +149,118 @@ fir::ShapeShiftOp Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder, return shapeShift; } +// Initialize box newBox using moldBox. These should both have the same type and +// be boxes containing derived types e.g. +// fir.box> +// fir.box> +// fir.box>> +// fir.class<...>> +// If the type doesn't match , this does nothing +static void initializeIfDerivedTypeBox(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value newBox, + mlir::Value moldBox, bool hasInitializer, + bool isFirstPrivate) { + fir::BoxType boxTy = mlir::dyn_cast(newBox.getType()); + fir::ClassType classTy = mlir::dyn_cast(newBox.getType()); + if (!boxTy && !classTy) +return; + + // remove pointer and array types in the middle + mlir::Type eleTy; + if (boxTy) +eleTy = boxTy.getElementType(); + if (classTy) +eleTy = classTy.getEleTy(); + mlir::Type derivedTy = fir::unwrapRefType(eleTy); + if (auto array = mlir::dyn_cast(derivedTy)) +derivedTy = array.getElementType(); + + if (!fir::isa_derived(derivedTy)) +return; + assert(moldBox.getType() == newBox.getType()); + + if (hasInitializer) +fir::runtime::genDerivedTypeInitialize(builder, loc, newBox); + + if (hlfir::mayHaveAllocatableComponent(derivedTy) && !isFirstPrivate) +fir::runtime::genDerivedTypeInitializeClone(builder, loc, newBox, moldBox); +} + +static void getLengthParameters(fir::FirOpBuilder &builder, mlir::Location loc, +mlir::Value moldArg, +llvm::SmallVectorImpl &lenParams) { + // We pass derived types unboxed and so are not self-contained entities. + // Assume that unboxed derived types won't need length paramters. + if (!hlfir::isFortranEntity(moldArg)) +return; + + hlfir::genLengthParameters(loc, builder, hlfir::Entity{moldArg}, lenParams); + if (lenParams.empty()) +return; + + // The verifier for EmboxOp doesn't allow length parameters when the the + // character already has static LEN. genLengthParameters may still return them + // in this case. + mlir::Type unwrappedType = + fir::unwrapRefType(fir::unwrapSeqOrBoxedSeqType(moldArg.getType())); + if (auto strTy = mlir::dyn_cast(unwrappedType)) { +if (strTy.hasConstantLen()) + lenParams.resize(0); + } +} + +static bool +isDerivedTypeNeedingInitialization(const Fortran::semantics::Symbol &sym) { + // Fortran::lower::hasDefaultInitialization returns false for ALLOCATABLE, so + // re-implement here. + // ignorePointer=true because either the pointer points to the same target as + // the original variable, or it is uninitialized. + if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType()) +if (const Fortran::semantics::DerivedTypeSpec *derivedTypeSpec = +declTypeSpec->AsDerived()) + if (derivedTypeSpec->HasDefaultInitialization( + /*ignoreAllocatable=*/false, /*ignorePointer=*/true)) +return true; ergawy wrote: nit ```suggestion return derivedTypeSpec->HasDefaultInitialization( /*ignoreAllocatable=*/false, /*ignorePointer=*/true); ``` https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -55,15 +55,19 @@ class MapsForPrivatizedSymbolsPass std::underlying_type_t>( llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO); Operation *definingOp = var.getDefiningOp(); -auto declOp = llvm::dyn_cast_or_null(definingOp); -assert(declOp && - "Expected defining Op of privatized var to be hlfir.declare"); +assert(definingOp && + "Privatizing a block argument without any hlfir.declare"); +Value varPtr = var; // We want the first result of the hlfir.declare op because our goal // is to map the descriptor (fir.box or fir.boxchar) and the first // result for hlfir.declare is the descriptor if a the symbol being // decalred needs a descriptor. -Value varPtr = declOp.getBase(); +// Some types are boxed immediately before privatization. These have other +// operations in between the privatization and the declaration. It is safe +// to use var directly here because they will be boxed anyay. ergawy wrote: ```suggestion // to use var directly here because they will be boxed anyway. ``` https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -468,15 +505,49 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); assert(hsb && "Host symbol box not found"); - mlir::Type symType = hsb.getAddr().getType(); + mlir::Value privVal = hsb.getAddr(); ergawy wrote: Isn't this rather `hostVal`? https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -468,15 +505,49 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); assert(hsb && "Host symbol box not found"); - mlir::Type symType = hsb.getAddr().getType(); + mlir::Value privVal = hsb.getAddr(); + mlir::Type allocType; + if (mlir::isa(privVal.getType())) +allocType = privVal.getType(); + else +allocType = fir::unwrapRefType(privVal.getType()); + mlir::Location symLoc = hsb.getAddr().getLoc(); std::string privatizerName = sym->name().ToString() + ".privatizer"; bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate); + if (auto poly = mlir::dyn_cast(allocType)) { +if (!mlir::isa(poly.getEleTy()) && isFirstPrivate) + TODO(symLoc, "create polymorphic host associated copy"); + } ergawy wrote: nit: move above closer to where we initialize `allocType`. Same for the next 2 if conditions as well as the declaration of `argType`. Just keeps all type handling in one visual block instead of being interrupted by privatizer stuff. https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -34,52 +34,48 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]> let description = [{ This operation provides a declaration of how to implement the [first]privatization of a variable. The dialect users should provide -information about how to create an instance of the type in the alloc region, -how to initialize the copy from the original item in the copy region, and if -needed, how to deallocate allocated memory in the dealloc region. +which type should be allocated for this variable. The allocated (usually by +alloca) variable is passed to the initialization region which does everything +else (e.g. initialization of Fortran runtime descriptors). Information about +how to initialize the copy from the original item should be given in the +copy region, and if needed, how to deallocate memory (allocated by the +initialization region) in the dealloc region. ergawy wrote: ```suggestion copy region, and if needed, how to deallocate memory (allocated implicitely by the operation) in the dealloc region. ``` https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -468,15 +505,49 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); assert(hsb && "Host symbol box not found"); - mlir::Type symType = hsb.getAddr().getType(); + mlir::Value privVal = hsb.getAddr(); + mlir::Type allocType; + if (mlir::isa(privVal.getType())) +allocType = privVal.getType(); + else +allocType = fir::unwrapRefType(privVal.getType()); ergawy wrote: Just a bit easier to read. ```suggestion mlir::Type allocType = privVal.getType(); if (!mlir::isa(allocType)) allocType = fir::unwrapRefType(allocType); ``` https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -96,17 +149,118 @@ fir::ShapeShiftOp Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder, return shapeShift; } +// Initialize box newBox using moldBox. These should both have the same type and +// be boxes containing derived types e.g. +// fir.box> +// fir.box> +// fir.box>> +// fir.class<...>> +// If the type doesn't match , this does nothing +static void initializeIfDerivedTypeBox(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value newBox, + mlir::Value moldBox, bool hasInitializer, + bool isFirstPrivate) { + fir::BoxType boxTy = mlir::dyn_cast(newBox.getType()); + fir::ClassType classTy = mlir::dyn_cast(newBox.getType()); + if (!boxTy && !classTy) +return; + + // remove pointer and array types in the middle + mlir::Type eleTy; + if (boxTy) +eleTy = boxTy.getElementType(); + if (classTy) +eleTy = classTy.getEleTy(); + mlir::Type derivedTy = fir::unwrapRefType(eleTy); + if (auto array = mlir::dyn_cast(derivedTy)) +derivedTy = array.getElementType(); + + if (!fir::isa_derived(derivedTy)) +return; + assert(moldBox.getType() == newBox.getType()); + + if (hasInitializer) +fir::runtime::genDerivedTypeInitialize(builder, loc, newBox); + + if (hlfir::mayHaveAllocatableComponent(derivedTy) && !isFirstPrivate) +fir::runtime::genDerivedTypeInitializeClone(builder, loc, newBox, moldBox); +} + +static void getLengthParameters(fir::FirOpBuilder &builder, mlir::Location loc, +mlir::Value moldArg, +llvm::SmallVectorImpl &lenParams) { + // We pass derived types unboxed and so are not self-contained entities. + // Assume that unboxed derived types won't need length paramters. + if (!hlfir::isFortranEntity(moldArg)) +return; + + hlfir::genLengthParameters(loc, builder, hlfir::Entity{moldArg}, lenParams); + if (lenParams.empty()) +return; + + // The verifier for EmboxOp doesn't allow length parameters when the the + // character already has static LEN. genLengthParameters may still return them + // in this case. + mlir::Type unwrappedType = + fir::unwrapRefType(fir::unwrapSeqOrBoxedSeqType(moldArg.getType())); + if (auto strTy = mlir::dyn_cast(unwrappedType)) { +if (strTy.hasConstantLen()) + lenParams.resize(0); + } +} + +static bool +isDerivedTypeNeedingInitialization(const Fortran::semantics::Symbol &sym) { + // Fortran::lower::hasDefaultInitialization returns false for ALLOCATABLE, so + // re-implement here. + // ignorePointer=true because either the pointer points to the same target as + // the original variable, or it is uninitialized. + if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType()) +if (const Fortran::semantics::DerivedTypeSpec *derivedTypeSpec = +declTypeSpec->AsDerived()) + if (derivedTypeSpec->HasDefaultInitialization( + /*ignoreAllocatable=*/false, /*ignorePointer=*/true)) +return true; + return false; +} + +static mlir::Value generateZeroShapeForRank(fir::FirOpBuilder &builder, +mlir::Location loc, +mlir::Value moldArg) { + mlir::Type moldVal = fir::unwrapRefType(moldArg.getType()); ergawy wrote: ```suggestion mlir::Type moldType = fir::unwrapRefType(moldArg.getType()); ``` https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -96,17 +149,118 @@ fir::ShapeShiftOp Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder, return shapeShift; } +// Initialize box newBox using moldBox. These should both have the same type and +// be boxes containing derived types e.g. +// fir.box> +// fir.box> +// fir.box>> +// fir.class<...>> +// If the type doesn't match , this does nothing +static void initializeIfDerivedTypeBox(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value newBox, + mlir::Value moldBox, bool hasInitializer, + bool isFirstPrivate) { + fir::BoxType boxTy = mlir::dyn_cast(newBox.getType()); + fir::ClassType classTy = mlir::dyn_cast(newBox.getType()); + if (!boxTy && !classTy) +return; + + // remove pointer and array types in the middle + mlir::Type eleTy; + if (boxTy) +eleTy = boxTy.getElementType(); + if (classTy) +eleTy = classTy.getEleTy(); + mlir::Type derivedTy = fir::unwrapRefType(eleTy); + if (auto array = mlir::dyn_cast(derivedTy)) +derivedTy = array.getElementType(); + + if (!fir::isa_derived(derivedTy)) +return; + assert(moldBox.getType() == newBox.getType()); ergawy wrote: nit: move to the function start to document the pre-conditions expected by it. https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -148,41 +309,113 @@ void Fortran::lower::omp::populateByRefInitAndCleanupRegions( builder.setInsertionPointToEnd(initBlock); mlir::Value boxAlloca = allocatedPrivVarArg; + +moldArg = builder.loadIfRef(loc, moldArg); +getLengthParameters(builder, loc, moldArg, lenParams); + +// The initial state of a private pointer is undefined so we don't need to +// match the mold argument (OpenMP 5.2 end of page 106). +if (isPrivatization(kind) && +mlir::isa(boxTy.getEleTy())) { + // we need a shape with the right rank so that the embox op is lowered + // to an llvm struct of the right type. This returns nullptr if the types + // aren't right. + mlir::Value shape = generateZeroShapeForRank(builder, loc, moldArg); + // Just incase, do initialize the box with a null value + mlir::Value null = builder.createNullConstant(loc, boxTy.getEleTy()); + mlir::Value nullBox; + if (shape) +nullBox = builder.create( +loc, boxTy, null, shape, /*slice=*/mlir::Value{}, lenParams); ergawy wrote: I think we can get rid of the `else`, right? If `shape` is an empty value, it is the same as passing `Value{}` I think. https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: AMDGPURegBankLegalize (PR #112864)
https://github.com/nhaehnle approved this pull request. https://github.com/llvm/llvm-project/pull/112864 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi (PR #112866)
https://github.com/nhaehnle approved this pull request. https://github.com/llvm/llvm-project/pull/112866 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] X86: Fix convertToThreeAddress losing subregister indexes (PR #124098)
llvmbot wrote: @llvm/pr-subscribers-backend-x86 Author: Matt Arsenault (arsenm) Changes This avoids dozens of regressions in a future patch. These primarily manifested as assertions where we had copies of 64-bit registers to 32-bit registers. This is testable in principle with hand written MIR, but that's a bit too much x86 for me. --- Full diff: https://github.com/llvm/llvm-project/pull/124098.diff 2 Files Affected: - (modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+44-33) - (modified) llvm/lib/Target/X86/X86InstrInfo.h (+3-2) ``diff diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 1baac05827c47c..ec9e8ca4ee1447 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1158,8 +1158,9 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr, bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, unsigned Opc, bool AllowSP, Register &NewSrc, - bool &isKill, MachineOperand &ImplicitOp, - LiveVariables *LV, LiveIntervals *LIS) const { + unsigned &NewSrcSubReg, bool &isKill, + MachineOperand &ImplicitOp, LiveVariables *LV, + LiveIntervals *LIS) const { MachineFunction &MF = *MI.getParent()->getParent(); const TargetRegisterClass *RC; if (AllowSP) { @@ -1168,12 +1169,14 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, RC = Opc != X86::LEA32r ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass; } Register SrcReg = Src.getReg(); + unsigned SubReg = Src.getSubReg(); isKill = MI.killsRegister(SrcReg, /*TRI=*/nullptr); // For both LEA64 and LEA32 the register already has essentially the right // type (32-bit or 64-bit) we may just need to forbid SP. if (Opc != X86::LEA64_32r) { NewSrc = SrcReg; +NewSrcSubReg = SubReg; assert(!Src.isUndef() && "Undef op doesn't need optimization"); if (NewSrc.isVirtual() && !MF.getRegInfo().constrainRegClass(NewSrc, RC)) @@ -1189,6 +1192,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, ImplicitOp.setImplicit(); NewSrc = getX86SubSuperRegister(SrcReg, 64); +assert(!SubReg); assert(NewSrc.isValid() && "Invalid Operand"); assert(!Src.isUndef() && "Undef op doesn't need optimization"); } else { @@ -1198,7 +1202,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, MachineInstr *Copy = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY)) .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit) -.addReg(SrcReg, getKillRegState(isKill)); +.addReg(SrcReg, getKillRegState(isKill), SubReg); // Which is obviously going to be dead after we're done with it. isKill = true; @@ -1258,7 +1262,9 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineBasicBlock::iterator MBBI = MI.getIterator(); Register Dest = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); + unsigned SrcSubReg = MI.getOperand(1).getSubReg(); Register Src2; + unsigned Src2SubReg; bool IsDead = MI.getOperand(0).isDead(); bool IsKill = MI.getOperand(1).isKill(); unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit; @@ -1268,7 +1274,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineInstr *InsMI = BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) .addReg(InRegLEA, RegState::Define, SubReg) - .addReg(Src, getKillRegState(IsKill)); + .addReg(Src, getKillRegState(IsKill), SrcSubReg); MachineInstr *ImpDef2 = nullptr; MachineInstr *InsMI2 = nullptr; @@ -1306,6 +1312,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, case X86::ADD16rr: case X86::ADD16rr_DB: { Src2 = MI.getOperand(2).getReg(); +Src2SubReg = MI.getOperand(2).getSubReg(); bool IsKill2 = MI.getOperand(2).isKill(); assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization"); if (Src == Src2) { @@ -1323,7 +1330,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, InRegLEA2); InsMI2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY)) .addReg(InRegLEA2, RegState::Define, SubReg) - .addReg(Src2, getKillRegState(IsKill2)); + .addReg(Src2, getKillRegState(IsKill2), Src2SubReg); addRegReg(MIB, InRegLEA, true, InRegLEA2, true); } if (LV && IsKill2 && InsMI2) @@ -1428,6 +1435,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI, MachineInstr *NewMI = nullptr; Regi
[llvm-branch-commits] [llvm] X86: Fix convertToThreeAddress losing subregister indexes (PR #124098)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/124098 This avoids dozens of regressions in a future patch. These primarily manifested as assertions where we had copies of 64-bit registers to 32-bit registers. This is testable in principle with hand written MIR, but that's a bit too much x86 for me. >From 84222b6a4872e768cf951df27d867c59de4cc49a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 23 Jan 2025 16:34:03 +0700 Subject: [PATCH] X86: Fix convertToThreeAddress losing subregister indexes This avoids dozens of regressions in a future patch. These primarily manifested as assertions where we had copies of 64-bit registers to 32-bit registers. This is testable in principle with hand written MIR, but that's a bit too much x86 for me. --- llvm/lib/Target/X86/X86InstrInfo.cpp | 77 llvm/lib/Target/X86/X86InstrInfo.h | 5 +- 2 files changed, 47 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 1baac05827c47c..ec9e8ca4ee1447 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1158,8 +1158,9 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr, bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, unsigned Opc, bool AllowSP, Register &NewSrc, - bool &isKill, MachineOperand &ImplicitOp, - LiveVariables *LV, LiveIntervals *LIS) const { + unsigned &NewSrcSubReg, bool &isKill, + MachineOperand &ImplicitOp, LiveVariables *LV, + LiveIntervals *LIS) const { MachineFunction &MF = *MI.getParent()->getParent(); const TargetRegisterClass *RC; if (AllowSP) { @@ -1168,12 +1169,14 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, RC = Opc != X86::LEA32r ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass; } Register SrcReg = Src.getReg(); + unsigned SubReg = Src.getSubReg(); isKill = MI.killsRegister(SrcReg, /*TRI=*/nullptr); // For both LEA64 and LEA32 the register already has essentially the right // type (32-bit or 64-bit) we may just need to forbid SP. if (Opc != X86::LEA64_32r) { NewSrc = SrcReg; +NewSrcSubReg = SubReg; assert(!Src.isUndef() && "Undef op doesn't need optimization"); if (NewSrc.isVirtual() && !MF.getRegInfo().constrainRegClass(NewSrc, RC)) @@ -1189,6 +1192,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, ImplicitOp.setImplicit(); NewSrc = getX86SubSuperRegister(SrcReg, 64); +assert(!SubReg); assert(NewSrc.isValid() && "Invalid Operand"); assert(!Src.isUndef() && "Undef op doesn't need optimization"); } else { @@ -1198,7 +1202,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, MachineInstr *Copy = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY)) .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit) -.addReg(SrcReg, getKillRegState(isKill)); +.addReg(SrcReg, getKillRegState(isKill), SubReg); // Which is obviously going to be dead after we're done with it. isKill = true; @@ -1258,7 +1262,9 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineBasicBlock::iterator MBBI = MI.getIterator(); Register Dest = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); + unsigned SrcSubReg = MI.getOperand(1).getSubReg(); Register Src2; + unsigned Src2SubReg; bool IsDead = MI.getOperand(0).isDead(); bool IsKill = MI.getOperand(1).isKill(); unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit; @@ -1268,7 +1274,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineInstr *InsMI = BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) .addReg(InRegLEA, RegState::Define, SubReg) - .addReg(Src, getKillRegState(IsKill)); + .addReg(Src, getKillRegState(IsKill), SrcSubReg); MachineInstr *ImpDef2 = nullptr; MachineInstr *InsMI2 = nullptr; @@ -1306,6 +1312,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, case X86::ADD16rr: case X86::ADD16rr_DB: { Src2 = MI.getOperand(2).getReg(); +Src2SubReg = MI.getOperand(2).getSubReg(); bool IsKill2 = MI.getOperand(2).isKill(); assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization"); if (Src == Src2) { @@ -1323,7 +1330,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, InRegLEA2); InsMI2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY)) .addReg(InRegL
[llvm-branch-commits] [llvm] X86: Fix convertToThreeAddress losing subregister indexes (PR #124098)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/124098 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] X86: Fix convertToThreeAddress losing subregister indexes (PR #124098)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/124098?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#124098** https://app.graphite.dev/github/pr/llvm/llvm-project/124098?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/124098?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#124095** https://app.graphite.dev/github/pr/llvm/llvm-project/124095?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/124098 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [PassBuilder][CodeGen] Add callback style pass buider (PR #116913)
optimisan wrote: Will new codegen support disabling individual passes? Instead of having separate arguments like `-disable-machine-sink` we could do `-disable-passes=machine-sink`. https://github.com/llvm/llvm-project/pull/116913 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/119540 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)
@@ -146,11 +149,137 @@ static cl::opt SplitThresholdForRegWithHint( static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); -char RAGreedy::ID = 0; -char &llvm::RAGreedyID = RAGreedy::ID; +namespace { +class RAGreedyLegacy : public MachineFunctionPass { + RegAllocFilterFunc F; -INITIALIZE_PASS_BEGIN(RAGreedy, "greedy", -"Greedy Register Allocator", false, false) +public: + RAGreedyLegacy(const RegAllocFilterFunc F = nullptr); + + static char ID; + /// Return the pass name. + StringRef getPassName() const override { return "Greedy Register Allocator"; } + + /// RAGreedy analysis usage. + void getAnalysisUsage(AnalysisUsage &AU) const override; + /// Perform register allocation. + bool runOnMachineFunction(MachineFunction &mf) override; + + MachineFunctionProperties getRequiredProperties() const override { +return MachineFunctionProperties().set( +MachineFunctionProperties::Property::NoPHIs); + } + + MachineFunctionProperties getClearedProperties() const override { +return MachineFunctionProperties().set( +MachineFunctionProperties::Property::IsSSA); + } +}; + +} // end anonymous namespace + +RAGreedyLegacy::RAGreedyLegacy(const RegAllocFilterFunc F) +: MachineFunctionPass(ID), F(F) { + initializeRAGreedyLegacyPass(*PassRegistry::getPassRegistry()); +} + +RAGreedy::RAGreedy(const RegAllocFilterFunc F) : RegAllocBase(F) {} + +void RAGreedy::setAnalyses(RequiredAnalyses &Analyses) { + VRM = Analyses.VRM; + LIS = Analyses.LIS; + Matrix = Analyses.LRM; + Indexes = Analyses.Indexes; + MBFI = Analyses.MBFI; + DomTree = Analyses.DomTree; + Loops = Analyses.Loops; + ORE = Analyses.ORE; + Bundles = Analyses.Bundles; + SpillPlacer = Analyses.SpillPlacer; + DebugVars = Analyses.DebugVars; + LSS = Analyses.LSS; + EvictProvider = Analyses.EvictProvider; + PriorityProvider = Analyses.PriorityProvider; +} + +void RAGreedyPass::printPipeline(raw_ostream &OS, function_ref MapClassName2PassName) const { + StringRef FilterName = Opts.FilterName.empty() ? "all" : Opts.FilterName; + OS << "regallocgreedy<" << FilterName << ">"; +} + +PreservedAnalyses RAGreedyPass::run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM) { + MFPropsModifier _(*this, MF); + + RAGreedy Impl(Opts.Filter); + RAGreedy::RequiredAnalyses Analyses; + + Analyses.LIS = &MFAM.getResult(MF); + Analyses.LRM = &MFAM.getResult(MF); + Analyses.LSS = &MFAM.getResult(MF); + Analyses.Indexes = &MFAM.getResult(MF); + Analyses.MBFI = &MFAM.getResult(MF); + Analyses.DomTree = &MFAM.getResult(MF); + Analyses.ORE = &MFAM.getResult(MF); + Analyses.Loops = &MFAM.getResult(MF); + Analyses.Bundles = &MFAM.getResult(MF); + Analyses.SpillPlacer = &MFAM.getResult(MF); + Analyses.DebugVars = &MFAM.getResult(MF); + Analyses.EvictProvider = + MFAM.getResult(MF).Provider; + Analyses.PriorityProvider = + MFAM.getResult(MF).Provider; + Analyses.VRM = &MFAM.getResult(MF); + + Impl.setAnalyses(Analyses); optimisan wrote: Oh, I changed the evictionadvisor provider api so forgot to simplify this out. https://github.com/llvm/llvm-project/pull/119540 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/119540 >From 74007cb20fbf8508ca457fc30d693ffa4526b432 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 11 Dec 2024 08:51:55 + Subject: [PATCH 1/5] [CodeGen][NewPM] Port RegAllocGreedy to NPM --- llvm/include/llvm/CodeGen/MachineFunction.h | 1 + llvm/include/llvm/CodeGen/Passes.h| 2 +- llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Passes/MachinePassRegistry.def | 9 + llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 185 ++ llvm/lib/CodeGen/RegAllocGreedy.h | 57 +++--- llvm/lib/Passes/PassBuilder.cpp | 1 + 8 files changed, 196 insertions(+), 63 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index d517b5e6647291..c2a82888c65211 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -911,6 +911,7 @@ class LLVM_ABI MachineFunction { /// Run the current MachineFunction through the machine code verifier, useful /// for debugger use. + /// TODO: Add the param LiveStks /// \returns true if no problems were found. bool verify(LiveIntervals *LiveInts, SlotIndexes *Indexes, const char *Banner = nullptr, raw_ostream *OS = nullptr, diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index d1fac4a304cffe..1096c34b307f9b 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -167,7 +167,7 @@ namespace llvm { extern char &LiveRangeShrinkID; /// Greedy register allocator. - extern char &RAGreedyID; + extern char &RAGreedyLegacyID; /// Basic register allocator. extern char &RABasicID; diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index e74b85c0de886f..afe0aa6113dd21 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -250,7 +250,7 @@ void initializeProfileSummaryInfoWrapperPassPass(PassRegistry &); void initializePromoteLegacyPassPass(PassRegistry &); void initializeRABasicPass(PassRegistry &); void initializePseudoProbeInserterPass(PassRegistry &); -void initializeRAGreedyPass(PassRegistry &); +void initializeRAGreedyLegacyPass(PassRegistry &); void initializeReachingDefAnalysisPass(PassRegistry &); void initializeReassociateLegacyPassPass(PassRegistry &); void initializeRegAllocEvictionAdvisorAnalysisLegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index d512c6459b5a4e..00aae5a4c30b97 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -187,6 +187,15 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( return parseRegAllocFastPassOptions(*PB, Params); }, "filter=reg-filter;no-clear-vregs") + +MACHINE_FUNCTION_PASS_WITH_PARAMS( +"regallocgreedy", "RAGreedy", +[](RegAllocFilterFunc F) { return RAGreedyPass(F); }, +[PB = this](StringRef Params) { + // TODO: parseRegAllocFilter(*PB, Params); + return Expected(nullptr); +}, "" +) #undef MACHINE_FUNCTION_PASS_WITH_PARAMS // After a pass is converted to new pass manager, its entry should be moved from diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 925d9af7d0e06d..0f76024bc24e15 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -112,7 +112,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); initializeRABasicPass(Registry); - initializeRAGreedyPass(Registry); + initializeRAGreedyLegacyPass(Registry); initializeRegAllocFastPass(Registry); initializeRegUsageInfoCollectorLegacyPass(Registry); initializeRegUsageInfoPropagationLegacyPass(Registry); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 72c38ab8c7d07b..49d251b4fe4d57 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -43,8 +43,10 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocEvictionAdvisor.h" +#include "llvm/CodeGen/RegAllocGreedyPass.h" #include "llvm/CodeGen/RegAllocPriorityAdvisor.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" @@ -55,6 +57,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Analysis.h" #include "l
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
https://github.com/ergawy edited https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
https://github.com/ergawy commented: Thanks for the great work @tblah! I partially reviewed since this is a huge PR (understandably). I will come back and continue reviewing the rest laster. https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -34,52 +34,48 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]> let description = [{ This operation provides a declaration of how to implement the [first]privatization of a variable. The dialect users should provide -information about how to create an instance of the type in the alloc region, -how to initialize the copy from the original item in the copy region, and if -needed, how to deallocate allocated memory in the dealloc region. +which type should be allocated for this variable. The allocated (usually by +alloca) variable is passed to the initialization region which does everything +else (e.g. initialization of Fortran runtime descriptors). Information about +how to initialize the copy from the original item should be given in the +copy region, and if needed, how to deallocate memory (allocated by the +initialization region) in the dealloc region. tblah wrote: Thanks for reviewing the PR. These readability comments are very helpful. The LLVM lowering is responsible for managing the implicitly allocated memory (it is usually just a stack allocation). The `dealloc` region undoes what is done in the `init` region. So for example for a `!fir.box>>`, the box is allocated implicitly on the stack and then the `init` region performs the heap allocation for the array and the dealloc region frees that heap allocation. I will add an example in the omp.private documentation to make it clearer. https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -488,44 +559,34 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, mlir::OpBuilder::InsertionGuard guard(firOpBuilder); firOpBuilder.setInsertionPointToStart(moduleOp.getBody()); auto result = firOpBuilder.create( -symLoc, uniquePrivatizerName, symType, +symLoc, uniquePrivatizerName, allocType, isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate : mlir::omp::DataSharingClauseType::Private); fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym); lower::SymMapScope outerScope(symTable); -// Populate the `alloc` region. -{ - mlir::Region &allocRegion = result.getAllocRegion(); - mlir::Block *allocEntryBlock = firOpBuilder.createBlock( - &allocRegion, /*insertPt=*/{}, symType, symLoc); - - firOpBuilder.setInsertionPointToEnd(allocEntryBlock); - - fir::ExtendedValue localExV = - hlfir::translateToExtendedValue( - symLoc, firOpBuilder, hlfir::Entity{allocRegion.getArgument(0)}, - /*contiguousHint=*/ - evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext())) - .first; - - symTable.addSymbol(*sym, localExV); - lower::SymMapScope innerScope(symTable); - cloneSymbol(sym); - mlir::Value cloneAddr = symTable.shallowLookupSymbol(*sym).getAddr(); - mlir::Type cloneType = cloneAddr.getType(); - - // A `convert` op is required for variables that are storage associated - // via `equivalence`. The problem is that these variables are declared as - // `fir.ptr`s while their privatized storage is declared as `fir.ref`, - // therefore we convert to proper symbol type. - mlir::Value yieldedValue = - (symType == cloneType) ? cloneAddr - : firOpBuilder.createConvert( - cloneAddr.getLoc(), symType, cloneAddr); - - firOpBuilder.create(hsb.getAddr().getLoc(), - yieldedValue); +// Populate the `init` region. +const bool needsInitialization = +(Fortran::lower::hasDefaultInitialization(sym->GetUltimate()) && + (!isFirstPrivate || hlfir::mayHaveAllocatableComponent(allocType))) || +mlir::isa(allocType) || +mlir::isa(allocType); +if (needsInitialization) { + mlir::Region &initRegion = result.getInitRegion(); + mlir::Block *initBlock = firOpBuilder.createBlock( + &initRegion, /*insertPt=*/{}, {argType, argType}, {symLoc, symLoc}); + + populateByRefInitAndCleanupRegions( + converter, symLoc, argType, /*scalarInitValue=*/nullptr, initBlock, + result.getInitPrivateArg(), result.getInitMoldArg(), + result.getDeallocRegion(), + isFirstPrivate ? DeclOperationKind::FirstPrivate + : DeclOperationKind::Private, + sym); + // TODO: currently there are false positives from dead uses of the mold + // arg + if (!result.getInitMoldArg().getUses().empty()) +mightHaveReadMoldArg = true; tblah wrote: Quite a lot of cases actually. Some examples: - Derived types that need a runtime initialization call but which have no allocatable components (the allocatable component initialization does read from the mold argument) - Pointers only need to be set to NULL - Arrays with compile-time known sizes. In this case the box is allocated implicitly but memory for the actual array needs to be allocated (on the heap) in the init region and the box has to be set up to point to that allocated array and contain the correct shape. - Characters with compile-time known sizes Currently there are some cases where the there are dead loads from the mold argument or maybe even reading character length parameters which never get used. These are eventually removed by DCE but would lead to a false positive here. I hope to improve the init region generation in a future patch to remove these cases. https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi (PR #112866)
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112866 >From d3f746c12fc0e741dfe754f19f394d770a22e84c Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Thu, 31 Oct 2024 14:10:57 +0100 Subject: [PATCH] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi Change existing code for G_PHI to match what LLVM-IR version is doing via PHINode::hasConstantOrUndefValue. This is not safe for regular PHI since it may appear with an undef operand and getVRegDef can fail. Most notably this improves number of values that can be allocated to sgpr register bank in AMDGPURegBankSelect. Common case here are phis that appear in structurize-cfg lowering for cycles with multiple exits: Undef incoming value is coming from block that reached cycle exit condition, if other incoming is uniform keep the phi uniform despite the fact it is joining values from pair of blocks that are entered via divergent condition branch. --- llvm/lib/CodeGen/MachineSSAContext.cpp| 27 +- .../AMDGPU/MIR/hidden-diverge-gmir.mir| 28 +++ .../AMDGPU/MIR/hidden-loop-diverge.mir| 4 +- .../AMDGPU/MIR/uses-value-from-cycle.mir | 8 +- .../GlobalISel/divergence-structurizer.mir| 80 -- .../regbankselect-mui-regbanklegalize.mir | 69 --- .../regbankselect-mui-regbankselect.mir | 18 ++-- .../AMDGPU/GlobalISel/regbankselect-mui.ll| 84 ++- .../AMDGPU/GlobalISel/regbankselect-mui.mir | 51 ++- 9 files changed, 191 insertions(+), 178 deletions(-) diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp b/llvm/lib/CodeGen/MachineSSAContext.cpp index e384187b6e8593..8e13c0916dd9e1 100644 --- a/llvm/lib/CodeGen/MachineSSAContext.cpp +++ b/llvm/lib/CodeGen/MachineSSAContext.cpp @@ -54,9 +54,34 @@ const MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const { return F->getRegInfo().getVRegDef(value)->getParent(); } +static bool isUndef(const MachineInstr &MI) { + return MI.getOpcode() == TargetOpcode::G_IMPLICIT_DEF || + MI.getOpcode() == TargetOpcode::IMPLICIT_DEF; +} + +/// MachineInstr equivalent of PHINode::hasConstantOrUndefValue() for G_PHI. template <> bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) { - return Phi.isConstantValuePHI(); + if (!Phi.isPHI()) +return false; + + // In later passes PHI may appear with an undef operand, getVRegDef can fail. + if (Phi.getOpcode() == TargetOpcode::PHI) +return Phi.isConstantValuePHI(); + + // For G_PHI we do equivalent of PHINode::hasConstantOrUndefValue(). + const MachineRegisterInfo &MRI = Phi.getMF()->getRegInfo(); + Register This = Phi.getOperand(0).getReg(); + Register ConstantValue; + for (unsigned i = 1, e = Phi.getNumOperands(); i < e; i += 2) { +Register Incoming = Phi.getOperand(i).getReg(); +if (Incoming != This && !isUndef(*MRI.getVRegDef(Incoming))) { + if (ConstantValue && ConstantValue != Incoming) +return false; + ConstantValue = Incoming; +} + } + return true; } template <> diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir index ce00edf3363f77..9694a340b5e906 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir @@ -1,24 +1,24 @@ # RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s # CHECK-LABEL: MachineUniformityInfo for function: hidden_diverge # CHECK-LABEL: BLOCK bb.0 -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_ -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) -# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1 -# CHECK: DIVERGENT: G_BR %bb.2 +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_ +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) +# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1 +# CHECK: DIVERGENT: G_BR %bb.2 # CHECK-LABEL: BLOCK bb.1 # CHECK-LABEL: BLOCK bb.2 -# CHECK: D
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: RegBankLegalize rules for load (PR #112882)
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112882 >From 0adced1346e563e75aab408c2a948cdbd8c449c3 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Thu, 23 Jan 2025 13:35:07 +0100 Subject: [PATCH] AMDGPU/GlobalISel: RegBankLegalize rules for load Add IDs for bit width that cover multiple LLTs: B32 B64 etc. "Predicate" wrapper class for bool predicate functions used to write pretty rules. Predicates can be combined using &&, || and !. Lowering for splitting and widening loads. Write rules for loads to not change existing mir tests from old regbankselect. --- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 288 +++- .../AMDGPU/AMDGPURegBankLegalizeHelper.h | 5 + .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 278 ++- .../AMDGPU/AMDGPURegBankLegalizeRules.h | 65 +++- .../AMDGPU/GlobalISel/regbankselect-load.mir | 320 +++--- .../GlobalISel/regbankselect-zextload.mir | 9 +- 6 files changed, 900 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index d27fa1f62538b6..3c007987b84947 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -50,6 +50,83 @@ void RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) { lower(MI, Mapping, WaterfallSgprs); } +void RegBankLegalizeHelper::splitLoad(MachineInstr &MI, + ArrayRef LLTBreakdown, LLT MergeTy) { + MachineFunction &MF = B.getMF(); + assert(MI.getNumMemOperands() == 1); + MachineMemOperand &BaseMMO = **MI.memoperands_begin(); + Register Dst = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst); + Register Base = MI.getOperand(1).getReg(); + LLT PtrTy = MRI.getType(Base); + const RegisterBank *PtrRB = MRI.getRegBankOrNull(Base); + LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); + SmallVector LoadPartRegs; + + unsigned ByteOffset = 0; + for (LLT PartTy : LLTBreakdown) { +Register BasePlusOffset; +if (ByteOffset == 0) { + BasePlusOffset = Base; +} else { + auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset); + BasePlusOffset = B.buildPtrAdd({PtrRB, PtrTy}, Base, Offset).getReg(0); +} +auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy); +auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO); +LoadPartRegs.push_back(LoadPart.getReg(0)); +ByteOffset += PartTy.getSizeInBytes(); + } + + if (!MergeTy.isValid()) { +// Loads are of same size, concat or merge them together. +B.buildMergeLikeInstr(Dst, LoadPartRegs); + } else { +// Loads are not all of same size, need to unmerge them to smaller pieces +// of MergeTy type, then merge pieces to Dst. +SmallVector MergeTyParts; +for (Register Reg : LoadPartRegs) { + if (MRI.getType(Reg) == MergeTy) { +MergeTyParts.push_back(Reg); + } else { +auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, Reg); +for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) + MergeTyParts.push_back(Unmerge.getReg(i)); + } +} +B.buildMergeLikeInstr(Dst, MergeTyParts); + } + MI.eraseFromParent(); +} + +void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy, + LLT MergeTy) { + MachineFunction &MF = B.getMF(); + assert(MI.getNumMemOperands() == 1); + MachineMemOperand &BaseMMO = **MI.memoperands_begin(); + Register Dst = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst); + Register Base = MI.getOperand(1).getReg(); + + MachineMemOperand *WideMMO = MF.getMachineMemOperand(&BaseMMO, 0, WideTy); + auto WideLoad = B.buildLoad({DstRB, WideTy}, Base, *WideMMO); + + if (WideTy.isScalar()) { +B.buildTrunc(Dst, WideLoad); + } else { +SmallVector MergeTyParts; +auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, WideLoad); + +LLT DstTy = MRI.getType(Dst); +unsigned NumElts = DstTy.getSizeInBits() / MergeTy.getSizeInBits(); +for (unsigned i = 0; i < NumElts; ++i) { + MergeTyParts.push_back(Unmerge.getReg(i)); +} +B.buildMergeLikeInstr(Dst, MergeTyParts); + } + MI.eraseFromParent(); +} + void RegBankLegalizeHelper::lower(MachineInstr &MI, const RegBankLLTMapping &Mapping, SmallSet &WaterfallSgprs) { @@ -128,6 +205,54 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, MI.eraseFromParent(); break; } + case SplitLoad: { +LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); +unsigned Size = DstTy.getSizeInBits(); +// Even split to 128-bit loads +if (Size > 128) { + LLT B128; + if (DstTy.isVector()) { +LLT EltTy = DstTy.getElementType(); +B128 = LLT::f
[llvm-branch-commits] [llvm] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi (PR #112866)
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112866 >From d3f746c12fc0e741dfe754f19f394d770a22e84c Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Thu, 31 Oct 2024 14:10:57 +0100 Subject: [PATCH] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi Change existing code for G_PHI to match what LLVM-IR version is doing via PHINode::hasConstantOrUndefValue. This is not safe for regular PHI since it may appear with an undef operand and getVRegDef can fail. Most notably this improves number of values that can be allocated to sgpr register bank in AMDGPURegBankSelect. Common case here are phis that appear in structurize-cfg lowering for cycles with multiple exits: Undef incoming value is coming from block that reached cycle exit condition, if other incoming is uniform keep the phi uniform despite the fact it is joining values from pair of blocks that are entered via divergent condition branch. --- llvm/lib/CodeGen/MachineSSAContext.cpp| 27 +- .../AMDGPU/MIR/hidden-diverge-gmir.mir| 28 +++ .../AMDGPU/MIR/hidden-loop-diverge.mir| 4 +- .../AMDGPU/MIR/uses-value-from-cycle.mir | 8 +- .../GlobalISel/divergence-structurizer.mir| 80 -- .../regbankselect-mui-regbanklegalize.mir | 69 --- .../regbankselect-mui-regbankselect.mir | 18 ++-- .../AMDGPU/GlobalISel/regbankselect-mui.ll| 84 ++- .../AMDGPU/GlobalISel/regbankselect-mui.mir | 51 ++- 9 files changed, 191 insertions(+), 178 deletions(-) diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp b/llvm/lib/CodeGen/MachineSSAContext.cpp index e384187b6e8593..8e13c0916dd9e1 100644 --- a/llvm/lib/CodeGen/MachineSSAContext.cpp +++ b/llvm/lib/CodeGen/MachineSSAContext.cpp @@ -54,9 +54,34 @@ const MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const { return F->getRegInfo().getVRegDef(value)->getParent(); } +static bool isUndef(const MachineInstr &MI) { + return MI.getOpcode() == TargetOpcode::G_IMPLICIT_DEF || + MI.getOpcode() == TargetOpcode::IMPLICIT_DEF; +} + +/// MachineInstr equivalent of PHINode::hasConstantOrUndefValue() for G_PHI. template <> bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) { - return Phi.isConstantValuePHI(); + if (!Phi.isPHI()) +return false; + + // In later passes PHI may appear with an undef operand, getVRegDef can fail. + if (Phi.getOpcode() == TargetOpcode::PHI) +return Phi.isConstantValuePHI(); + + // For G_PHI we do equivalent of PHINode::hasConstantOrUndefValue(). + const MachineRegisterInfo &MRI = Phi.getMF()->getRegInfo(); + Register This = Phi.getOperand(0).getReg(); + Register ConstantValue; + for (unsigned i = 1, e = Phi.getNumOperands(); i < e; i += 2) { +Register Incoming = Phi.getOperand(i).getReg(); +if (Incoming != This && !isUndef(*MRI.getVRegDef(Incoming))) { + if (ConstantValue && ConstantValue != Incoming) +return false; + ConstantValue = Incoming; +} + } + return true; } template <> diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir index ce00edf3363f77..9694a340b5e906 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir @@ -1,24 +1,24 @@ # RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s # CHECK-LABEL: MachineUniformityInfo for function: hidden_diverge # CHECK-LABEL: BLOCK bb.0 -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_ -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) -# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1 -# CHECK: DIVERGENT: G_BR %bb.2 +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_ +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) +# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1 +# CHECK: DIVERGENT: G_BR %bb.2 # CHECK-LABEL: BLOCK bb.1 # CHECK-LABEL: BLOCK bb.2 -# CHECK: D
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
@@ -1,16 +1,21 @@ // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -// CHECK-DAG: @[[CB:.+]] = external constant { float } +// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s cbuffer A { -float a; - // CHECK-DAG:@_ZL1b = internal global float 3.00e+00, align 4 + // CHECK: @a = external addrspace(2) externally_initialized global float, align 4 + float a; + // CHECK: @_ZL1b = internal global float 3.00e+00, align 4 static float b = 3; Keenuts wrote: If the end-goal is not to support this, but we "do" because we just inherit the behavior for now, I'd be in favor of checking that this is disallowed and mark as XFAIL with this issued referenced https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] Rename libFortranRuntime.a to libflang_rt.a (PR #122341)
https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/122341 >From c77098f90a5c20bdbce078a0ee3aec1fe53772e3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 8 Jan 2025 11:23:02 +0100 Subject: [PATCH 1/5] clang-format to sort headers --- flang/tools/f18-parse-demo/f18-parse-demo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/tools/f18-parse-demo/f18-parse-demo.cpp b/flang/tools/f18-parse-demo/f18-parse-demo.cpp index 90bbce246e3f16..a50c88dc840643 100644 --- a/flang/tools/f18-parse-demo/f18-parse-demo.cpp +++ b/flang/tools/f18-parse-demo/f18-parse-demo.cpp @@ -21,7 +21,6 @@ // scaffolding compiler driver that can test some semantic passes of the // F18 compiler under development. -#include "flang/Support/Fortran-features.h" #include "flang/Parser/characters.h" #include "flang/Parser/dump-parse-tree.h" #include "flang/Parser/message.h" @@ -30,6 +29,7 @@ #include "flang/Parser/parsing.h" #include "flang/Parser/provenance.h" #include "flang/Parser/unparse.h" +#include "flang/Support/Fortran-features.h" #include "flang/Support/default-kinds.h" #include "llvm/Support/Errno.h" #include "llvm/Support/FileSystem.h" >From 22ed7ebde19d4003fa3036039f75977b1e6b9f60 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 8 Jan 2025 14:15:45 +0100 Subject: [PATCH 2/5] FortranRuntime -> flang_rt --- clang/lib/Driver/ToolChains/CommonArgs.cpp| 4 +- clang/lib/Driver/ToolChains/Flang.cpp | 8 ++-- flang-rt/unittests/Evaluate/CMakeLists.txt| 21 ++ flang/CMakeLists.txt | 2 +- flang/docs/FlangDriver.md | 8 ++-- flang/docs/GettingStarted.md | 6 +-- flang/docs/OpenACC-descriptor-management.md | 2 +- flang/docs/ReleaseNotes.md| 2 + .../ExternalHelloWorld/CMakeLists.txt | 2 +- flang/runtime/CMakeLists.txt | 40 +++ flang/runtime/CUDA/CMakeLists.txt | 2 +- flang/runtime/Float128Math/CMakeLists.txt | 2 +- flang/runtime/time-intrinsic.cpp | 2 +- flang/test/CMakeLists.txt | 6 ++- .../test/Driver/gcc-toolchain-install-dir.f90 | 2 +- flang/test/Driver/linker-flags.f90| 8 ++-- .../test/Driver/msvc-dependent-lib-flags.f90 | 8 ++-- flang/test/Driver/nostdlib.f90| 2 +- flang/test/Runtime/no-cpp-dep.c | 2 +- flang/test/lit.cfg.py | 2 +- flang/tools/f18/CMakeLists.txt| 8 ++-- flang/unittests/CMakeLists.txt| 2 +- flang/unittests/Evaluate/CMakeLists.txt | 9 +++-- flang/unittests/Frontend/CMakeLists.txt | 1 + flang/unittests/Runtime/CMakeLists.txt| 2 +- flang/unittests/Runtime/CUDA/CMakeLists.txt | 2 +- lld/COFF/MinGW.cpp| 2 +- 27 files changed, 97 insertions(+), 60 deletions(-) create mode 100644 flang-rt/unittests/Evaluate/CMakeLists.txt diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 9e9872975de9c2..4c6b9f29f362ca 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1317,7 +1317,7 @@ void tools::addOpenMPHostOffloadingArgs(const Compilation &C, /// Add Fortran runtime libs void tools::addFortranRuntimeLibs(const ToolChain &TC, const ArgList &Args, llvm::opt::ArgStringList &CmdArgs) { - // Link FortranRuntime + // Link flang_rt // These are handled earlier on Windows by telling the frontend driver to // add the correct libraries to link against as dependents in the object // file. @@ -1333,7 +1333,7 @@ void tools::addFortranRuntimeLibs(const ToolChain &TC, const ArgList &Args, if (AsNeeded) addAsNeededOption(TC, Args, CmdArgs, /*as_needed=*/false); } -CmdArgs.push_back("-lFortranRuntime"); +CmdArgs.push_back("-lflang_rt"); addArchSpecificRPath(TC, Args, CmdArgs); } diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 749af4ada9a696..2cf1108b28dab3 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -356,26 +356,26 @@ static void processVSRuntimeLibrary(const ToolChain &TC, const ArgList &Args, case options::OPT__SLASH_MT: CmdArgs.push_back("-D_MT"); CmdArgs.push_back("--dependent-lib=libcmt"); -CmdArgs.push_back("--dependent-lib=FortranRuntime.static.lib"); +CmdArgs.push_back("--dependent-lib=flang_rt.static.lib"); break; case options::OPT__SLASH_MTd: CmdArgs.push_back("-D_MT"); CmdArgs.push_back("-D_DEBUG"); CmdArgs.push_back("--dependent-lib=libcmtd"); -CmdArgs.push_back("--dependent-lib=FortranRuntime.static_dbg.lib"); +CmdArgs.push_back("--dependent-lib=flang_rt.static_dbg.lib"); break; case options::OPT__SLASH_M
[llvm-branch-commits] [flang] [Flang] Promote FortranEvaluateTesting library (PR #122334)
https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/122334 >From 74432e2d5d4916f09ee6f60a4d80f3f5a96f1b12 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 9 Jan 2025 11:31:23 +0100 Subject: [PATCH 1/3] Promote Testing lib --- .../flang/Testing}/fp-testing.h| 14 +++--- .../Evaluate => include/flang/Testing}/testing.h | 14 +++--- .../Evaluate => lib/Testing}/fp-testing.cpp| 10 +- .../Evaluate => lib/Testing}/testing.cpp | 10 +- flang/unittests/Evaluate/CMakeLists.txt| 4 ++-- flang/unittests/Evaluate/ISO-Fortran-binding.cpp | 2 +- flang/unittests/Evaluate/bit-population-count.cpp | 2 +- flang/unittests/Evaluate/expression.cpp| 2 +- flang/unittests/Evaluate/folding.cpp | 2 +- flang/unittests/Evaluate/integer.cpp | 2 +- flang/unittests/Evaluate/intrinsics.cpp| 2 +- .../unittests/Evaluate/leading-zero-bit-count.cpp | 2 +- flang/unittests/Evaluate/logical.cpp | 2 +- flang/unittests/Evaluate/real.cpp | 4 ++-- flang/unittests/Evaluate/reshape.cpp | 2 +- flang/unittests/Evaluate/uint128.cpp | 2 +- 16 files changed, 54 insertions(+), 22 deletions(-) rename flang/{unittests/Evaluate => include/flang/Testing}/fp-testing.h (54%) rename flang/{unittests/Evaluate => include/flang/Testing}/testing.h (74%) rename flang/{unittests/Evaluate => lib/Testing}/fp-testing.cpp (87%) rename flang/{unittests/Evaluate => lib/Testing}/testing.cpp (88%) diff --git a/flang/unittests/Evaluate/fp-testing.h b/flang/include/flang/Testing/fp-testing.h similarity index 54% rename from flang/unittests/Evaluate/fp-testing.h rename to flang/include/flang/Testing/fp-testing.h index 9091963a99b32d..e223d2ef7d1b8b 100644 --- a/flang/unittests/Evaluate/fp-testing.h +++ b/flang/include/flang/Testing/fp-testing.h @@ -1,5 +1,13 @@ -#ifndef FORTRAN_TEST_EVALUATE_FP_TESTING_H_ -#define FORTRAN_TEST_EVALUATE_FP_TESTING_H_ +//===-- include/flang/Testing/fp-testing.h --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef FORTRAN_TESTING_FP_TESTING_H_ +#define FORTRAN_TESTING_FP_TESTING_H_ #include "flang/Common/target-rounding.h" #include @@ -24,4 +32,4 @@ class ScopedHostFloatingPointEnvironment { #endif }; -#endif // FORTRAN_TEST_EVALUATE_FP_TESTING_H_ +#endif /* FORTRAN_TESTING_FP_TESTING_H_ */ diff --git a/flang/unittests/Evaluate/testing.h b/flang/include/flang/Testing/testing.h similarity index 74% rename from flang/unittests/Evaluate/testing.h rename to flang/include/flang/Testing/testing.h index 422e2853c05bc6..404650c9a89f2c 100644 --- a/flang/unittests/Evaluate/testing.h +++ b/flang/include/flang/Testing/testing.h @@ -1,5 +1,13 @@ -#ifndef FORTRAN_EVALUATE_TESTING_H_ -#define FORTRAN_EVALUATE_TESTING_H_ +//===-- include/flang/Testing/testing.h -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef FORTRAN_TESTING_TESTING_H_ +#define FORTRAN_TESTING_TESTING_H_ #include #include @@ -33,4 +41,4 @@ FailureDetailPrinter Match(const char *file, int line, const std::string &want, FailureDetailPrinter Compare(const char *file, int line, const char *xs, const char *rel, const char *ys, std::uint64_t x, std::uint64_t y); } // namespace testing -#endif // FORTRAN_EVALUATE_TESTING_H_ +#endif /* FORTRAN_TESTING_TESTING_H_ */ diff --git a/flang/unittests/Evaluate/fp-testing.cpp b/flang/lib/Testing/fp-testing.cpp similarity index 87% rename from flang/unittests/Evaluate/fp-testing.cpp rename to flang/lib/Testing/fp-testing.cpp index 1a1d7425d58249..5e1728e8df5e4b 100644 --- a/flang/unittests/Evaluate/fp-testing.cpp +++ b/flang/lib/Testing/fp-testing.cpp @@ -1,4 +1,12 @@ -#include "fp-testing.h" +//===-- lib/Testing/fp-testing.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "flang/Testing/fp-testing.h" #include "llvm/Support/Errno.h" #include #include diff --git a/flang/unittests/Evaluate/testing.cpp b/flang/lib/Testing/testing.cpp similarity index 88% rename from flang/unittests/Evaluate/
[llvm-branch-commits] [flang] [Flang] Optionally do not compile the runtime in-tree (PR #122336)
https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/122336 >From dd3ac2e6d8d8d57cd639c25bea3b8d5c99a2f81e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 9 Jan 2025 15:58:48 +0100 Subject: [PATCH 1/9] Introduce FLANG_INCLUDE_RUNTIME --- flang/CMakeLists.txt| 7 +++- flang/test/CMakeLists.txt | 6 +++- flang/test/Driver/ctofortran.f90| 1 + flang/test/Driver/exec.f90 | 1 + flang/test/Runtime/no-cpp-dep.c | 2 +- flang/test/lit.cfg.py | 5 ++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/f18/CMakeLists.txt | 4 +-- flang/unittests/CMakeLists.txt | 6 ++-- flang/unittests/Evaluate/CMakeLists.txt | 46 ++--- 10 files changed, 50 insertions(+), 29 deletions(-) diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 68947eaa9c9bd7..69e963a43d0b97 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -233,6 +233,9 @@ else() include_directories(SYSTEM ${MLIR_TABLEGEN_OUTPUT_DIR}) endif() +option(FLANG_INCLUDE_RUNTIME "Build the runtime in-tree (deprecated; to be replaced with LLVM_ENABLE_RUNTIMES=flang-rt)" ON) +pythonize_bool(FLANG_INCLUDE_RUNTIME) + set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) @@ -473,7 +476,9 @@ if (FLANG_CUF_RUNTIME) find_package(CUDAToolkit REQUIRED) endif() -add_subdirectory(runtime) +if (FLANG_INCLUDE_RUNTIME) + add_subdirectory(runtime) +endif () if (LLVM_INCLUDE_EXAMPLES) add_subdirectory(examples) diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index cab214c2ef4c8c..e398e0786147aa 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -71,9 +71,13 @@ set(FLANG_TEST_DEPENDS llvm-objdump llvm-readobj split-file - FortranRuntime FortranDecimal ) + +if (FLANG_INCLUDE_RUNTIME) + list(APPEND FLANG_TEST_DEPENDS FortranRuntime) +endif () + if (LLVM_ENABLE_PLUGINS AND NOT WIN32) list(APPEND FLANG_TEST_DEPENDS Bye) endif() diff --git a/flang/test/Driver/ctofortran.f90 b/flang/test/Driver/ctofortran.f90 index 78eac32133b18e..10c7adaccc9588 100644 --- a/flang/test/Driver/ctofortran.f90 +++ b/flang/test/Driver/ctofortran.f90 @@ -1,4 +1,5 @@ ! UNSUPPORTED: system-windows +! REQUIRES: flang-rt ! RUN: split-file %s %t ! RUN: chmod +x %t/runtest.sh ! RUN: %t/runtest.sh %t %t/ffile.f90 %t/cfile.c %flang | FileCheck %s diff --git a/flang/test/Driver/exec.f90 b/flang/test/Driver/exec.f90 index fd174005ddf62a..9ca91ee24011c9 100644 --- a/flang/test/Driver/exec.f90 +++ b/flang/test/Driver/exec.f90 @@ -1,4 +1,5 @@ ! UNSUPPORTED: system-windows +! REQUIRES: flang-rt ! Verify that flang can correctly build executables. ! RUN: %flang %s -o %t diff --git a/flang/test/Runtime/no-cpp-dep.c b/flang/test/Runtime/no-cpp-dep.c index b1a5fa004014cc..7303ce63fdec41 100644 --- a/flang/test/Runtime/no-cpp-dep.c +++ b/flang/test/Runtime/no-cpp-dep.c @@ -3,7 +3,7 @@ This test makes sure that flang's runtime does not depend on the C++ runtime library. It tries to link this simple file against libFortranRuntime.a with a C compiler. -REQUIRES: c-compiler +REQUIRES: c-compiler, flang-rt RUN: %if system-aix %{ export OBJECT_MODE=64 %} RUN: %cc -std=c99 %s -I%include %libruntime -lm \ diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index c452b6d231c89f..78378bf5f413e8 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -163,10 +163,13 @@ ToolSubst("%not_todo_abort_cmd", command=FindTool("not"), unresolved="fatal") ) +if config.flang_include_runtime: + config.available_features.add("flang-rt") + # Define some variables to help us test that the flang runtime doesn't depend on # the C++ runtime libraries. For this we need a C compiler. If for some reason # we don't have one, we can just disable the test. -if config.cc: +if config.flang_include_runtime and config.cc: libruntime = os.path.join(config.flang_lib_dir, "libFortranRuntime.a") include = os.path.join(config.flang_src_dir, "include") diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index d1a0ac763cf8a0..19f9330f93ae14 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -32,6 +32,7 @@ else: config.openmp_module_dir = None config.flang_runtime_f128_math_lib = "@FLANG_RUNTIME_F128_MATH_LIB@" config.have_ldbl_mant_dig_113 = "@HAVE_LDBL_MANT_DIG_113@" +config.flang_include_runtime = @FLANG_INCLUDE_RUNTIME@ import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index 4362fcf0537616..022c346aabdbde 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -72,7 +72,7 @@ if (NOT CMAKE_CROSSCOMPILING) set(depends ${FLANG_
[llvm-branch-commits] [flang] [Flang] Remove FLANG_INCLUDE_RUNTIME (PR #124126)
https://github.com/Meinersbur created https://github.com/llvm/llvm-project/pull/124126 Remove the FLANG_INCLUDE_RUNTIME option which was replaced by LLVM_ENABLE_RUNTIMES=flang-rt. This PR does not (yet) include adding `LLVM_ENABLE_RUNTIMES=flang-rt` implicitly. CMake command lines must be updated to get a working Fortran toolchain. >From bd152c56900698f727c8ece9889a03ec693c13ad Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 22 Jan 2025 20:45:26 +0100 Subject: [PATCH] [Flang] Remove FLANG_INCLUDE_RUNTIME --- flang/CMakeLists.txt | 25 +- .../modules/AddFlangOffloadRuntime.cmake | 146 flang/runtime/CMakeLists.txt | 350 -- flang/runtime/CUDA/CMakeLists.txt | 41 -- flang/runtime/Float128Math/CMakeLists.txt | 133 --- flang/test/CMakeLists.txt | 10 - flang/test/lit.cfg.py | 3 - flang/test/lit.site.cfg.py.in | 1 - flang/tools/f18/CMakeLists.txt| 17 +- flang/unittests/CMakeLists.txt| 43 +-- flang/unittests/Evaluate/CMakeLists.txt | 16 - 11 files changed, 5 insertions(+), 780 deletions(-) delete mode 100644 flang/cmake/modules/AddFlangOffloadRuntime.cmake delete mode 100644 flang/runtime/CMakeLists.txt delete mode 100644 flang/runtime/CUDA/CMakeLists.txt delete mode 100644 flang/runtime/Float128Math/CMakeLists.txt diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 38004c149b7835..aceb2d09c54388 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -23,7 +23,6 @@ if (LLVM_ENABLE_EH) endif() set(FLANG_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang-rt") if (CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE) message(FATAL_ERROR "In-source builds are not allowed. \ @@ -237,24 +236,8 @@ else() include_directories(SYSTEM ${MLIR_TABLEGEN_OUTPUT_DIR}) endif() -set(FLANG_INCLUDE_RUNTIME_default ON) -if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) - set(FLANG_INCLUDE_RUNTIME_default OFF) -endif () -option(FLANG_INCLUDE_RUNTIME "Build the runtime in-tree (deprecated; to be replaced with LLVM_ENABLE_RUNTIMES=flang-rt)" FLANG_INCLUDE_RUNTIME_default) -if (FLANG_INCLUDE_RUNTIME) - if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) -message(WARNING "Building Flang-RT using LLVM_ENABLE_RUNTIMES. FLANG_INCLUDE_RUNTIME=${FLANG_INCLUDE_RUNTIME} ignored.") -set(FLANG_INCLUDE_RUNTIME OFF) - else () - message(STATUS "Building flang_rt in-tree") - endif () -else () - if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) -message(STATUS "Building Flang-RT using LLVM_ENABLE_RUNTIMES.") - else () -message(STATUS "Not building Flang-RT. For a usable Fortran toolchain, compile a standalone Flang-RT") - endif () +if (NOT "flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Not building Flang-RT. For a usable Fortran toolchain, compile a standalone Flang-RT") endif () set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH @@ -484,10 +467,6 @@ if (FLANG_CUF_RUNTIME) find_package(CUDAToolkit REQUIRED) endif() -if (FLANG_INCLUDE_RUNTIME) - add_subdirectory(runtime) -endif () - if (LLVM_INCLUDE_EXAMPLES) add_subdirectory(examples) endif() diff --git a/flang/cmake/modules/AddFlangOffloadRuntime.cmake b/flang/cmake/modules/AddFlangOffloadRuntime.cmake deleted file mode 100644 index 8e4f47d18535dc..00 --- a/flang/cmake/modules/AddFlangOffloadRuntime.cmake +++ /dev/null @@ -1,146 +0,0 @@ -option(FLANG_EXPERIMENTAL_CUDA_RUNTIME - "Compile Fortran runtime as CUDA sources (experimental)" OFF - ) - -option(FLANG_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS - "Do not compile global variables' definitions when producing PTX library" OFF - ) - -set(FLANG_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation") - -set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING - "Compile Fortran runtime as OpenMP target offload sources (experimental). Valid options are 'off', 'host_device', 'nohost'") - -set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING - "List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')") - -macro(enable_cuda_compilation name files) - if (FLANG_EXPERIMENTAL_CUDA_RUNTIME) -if (BUILD_SHARED_LIBS) - message(FATAL_ERROR -"BUILD_SHARED_LIBS is not supported for CUDA build of Fortran runtime" -) -endif() - -enable_language(CUDA) - -# TODO: figure out how to make target property CUDA_SEPARABLE_COMPILATION -# work, and avoid setting CMAKE_CUDA_SEPARABLE_COMPILATION. -set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - -# Treat all supported sources as CUDA files. -set_source_files_properties(${files} PROPERTIES LANGUAGE CUDA) -set(CUDA_COMPILE_OPTIONS) -if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "Clang") - # Allow varargs. - set(CUDA_COMP
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -55,15 +55,19 @@ class MapsForPrivatizedSymbolsPass std::underlying_type_t>( llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO); Operation *definingOp = var.getDefiningOp(); -auto declOp = llvm::dyn_cast_or_null(definingOp); -assert(declOp && - "Expected defining Op of privatized var to be hlfir.declare"); +assert(definingOp && + "Privatizing a block argument without any hlfir.declare"); tblah wrote: MLIR values can come from two places: 1. the result of an operation 2. a block argument We can't assume that getting the defining operation produces a non-null result because the value might be a block argument. For example, ``` func.func @func(%arg0 : !type0, %arg1 : !type1) { %0 = hlfir.declare %arg0 omp.private(%0, %arg1) } ``` Here we can get a defining operation for `%0` because it is the `hlfir.declare` for that function argument (which is the normal way flang would lower a function argument). `%arg1` has no defining operation because it is a block argument. https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
https://github.com/tblah edited https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 8573475 - Revert "[InstCombine] Teach foldSelectOpOp about samesign (#122723)"
Author: Alexander Kornienko Date: 2025-01-23T15:40:05+01:00 New Revision: 8573475d5cc1c40679a86d6992a42e5c564a1f23 URL: https://github.com/llvm/llvm-project/commit/8573475d5cc1c40679a86d6992a42e5c564a1f23 DIFF: https://github.com/llvm/llvm-project/commit/8573475d5cc1c40679a86d6992a42e5c564a1f23.diff LOG: Revert "[InstCombine] Teach foldSelectOpOp about samesign (#122723)" This reverts commit 48757e02ba2c1651c268351d062f80923baceda4. Added: Modified: llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp llvm/test/Transforms/InstCombine/select-cmp.ll Removed: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index d5d9a829c3068a..f66a976ccb47fe 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -428,10 +428,10 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI, CmpPredicate TPred, FPred; if (match(TI, m_ICmp(TPred, m_Value(), m_Value())) && match(FI, m_ICmp(FPred, m_Value(), m_Value( { - bool Swapped = ICmpInst::isRelational(FPred) && - CmpPredicate::getMatching( - TPred, ICmpInst::getSwappedCmpPredicate(FPred)); - if (CmpPredicate::getMatching(TPred, FPred) || Swapped) { + // FIXME: Use CmpPredicate::getMatching here. + CmpInst::Predicate T = TPred, F = FPred; + if (T == F || T == ICmpInst::getSwappedCmpPredicate(F)) { +bool Swapped = T != F; if (Value *MatchOp = getCommonOp(TI, FI, ICmpInst::isEquality(TPred), Swapped)) { Value *NewSel = Builder.CreateSelect(Cond, OtherOpT, OtherOpF, diff --git a/llvm/test/Transforms/InstCombine/select-cmp.ll b/llvm/test/Transforms/InstCombine/select-cmp.ll index 7e5d5821d9f6a7..f7505bd85f89eb 100644 --- a/llvm/test/Transforms/InstCombine/select-cmp.ll +++ b/llvm/test/Transforms/InstCombine/select-cmp.ll @@ -23,18 +23,6 @@ define i1 @icmp_ne_common_op00(i1 %c, i6 %x, i6 %y, i6 %z) { ret i1 %r } -define i1 @icmp_ne_samesign_common(i1 %c, i6 %x, i6 %y, i6 %z) { -; CHECK-LABEL: @icmp_ne_samesign_common( -; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i6 [[Y:%.*]], i6 [[Z:%.*]] -; CHECK-NEXT:[[R:%.*]] = icmp ne i6 [[X:%.*]], [[R_V]] -; CHECK-NEXT:ret i1 [[R]] -; - %cmp1 = icmp samesign ne i6 %x, %y - %cmp2 = icmp ne i6 %x, %z - %r = select i1 %c, i1 %cmp1, i1 %cmp2 - ret i1 %r -} - define i1 @icmp_ne_common_op01(i1 %c, i3 %x, i3 %y, i3 %z) { ; CHECK-LABEL: @icmp_ne_common_op01( ; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i3 [[Y:%.*]], i3 [[Z:%.*]] @@ -83,18 +71,6 @@ define i1 @icmp_eq_common_op00(i1 %c, i5 %x, i5 %y, i5 %z) { ret i1 %r } -define i1 @icmp_eq_samesign_common(i1 %c, i5 %x, i5 %y, i5 %z) { -; CHECK-LABEL: @icmp_eq_samesign_common( -; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i5 [[Y:%.*]], i5 [[Z:%.*]] -; CHECK-NEXT:[[R:%.*]] = icmp eq i5 [[X:%.*]], [[R_V]] -; CHECK-NEXT:ret i1 [[R]] -; - %cmp1 = icmp eq i5 %x, %y - %cmp2 = icmp samesign eq i5 %x, %z - %r = select i1 %c, i1 %cmp1, i1 %cmp2 - ret i1 %r -} - define <5 x i1> @icmp_eq_common_op01(<5 x i1> %c, <5 x i7> %x, <5 x i7> %y, <5 x i7> %z) { ; CHECK-LABEL: @icmp_eq_common_op01( ; CHECK-NEXT:[[R_V:%.*]] = select <5 x i1> [[C:%.*]], <5 x i7> [[Y:%.*]], <5 x i7> [[Z:%.*]] @@ -158,18 +134,6 @@ define i1 @icmp_slt_common(i1 %c, i6 %x, i6 %y, i6 %z) { ret i1 %r } -define i1 @icmp_slt_samesign_common(i1 %c, i6 %x, i6 %y, i6 %z) { -; CHECK-LABEL: @icmp_slt_samesign_common( -; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i6 [[Y:%.*]], i6 [[Z:%.*]] -; CHECK-NEXT:[[R:%.*]] = icmp ult i6 [[X:%.*]], [[R_V]] -; CHECK-NEXT:ret i1 [[R]] -; - %cmp1 = icmp samesign ult i6 %x, %y - %cmp2 = icmp slt i6 %x, %z - %r = select i1 %c, i1 %cmp1, i1 %cmp2 - ret i1 %r -} - define i1 @icmp_sgt_common(i1 %c, i6 %x, i6 %y, i6 %z) { ; CHECK-LABEL: @icmp_sgt_common( ; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i6 [[Y:%.*]], i6 [[Z:%.*]] @@ -182,18 +146,6 @@ define i1 @icmp_sgt_common(i1 %c, i6 %x, i6 %y, i6 %z) { ret i1 %r } -define i1 @icmp_sgt_samesign_common(i1 %c, i6 %x, i6 %y, i6 %z) { -; CHECK-LABEL: @icmp_sgt_samesign_common( -; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i6 [[Y:%.*]], i6 [[Z:%.*]] -; CHECK-NEXT:[[R:%.*]] = icmp ugt i6 [[X:%.*]], [[R_V]] -; CHECK-NEXT:ret i1 [[R]] -; - %cmp1 = icmp samesign ugt i6 %x, %y - %cmp2 = icmp sgt i6 %x, %z - %r = select i1 %c, i1 %cmp1, i1 %cmp2 - ret i1 %r -} - define i1 @icmp_sle_common(i1 %c, i6 %x, i6 %y, i6 %z) { ; CHECK-LABEL: @icmp_sle_common( ; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i6 [[Y:%.*]], i6 [[Z:%.*]] @@ -206,18 +158,6 @@ define i1 @icmp_sle_common(i1 %c, i6 %x, i6 %y, i6 %z) { ret i1 %r } -define i1 @icm
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
@@ -1,7 +1,14 @@ -// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: dxil-pc-shadermodel6.3-library %s \ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +// CHECK: @a = external addrspace(2) externally_initialized global float, align 4 +// CHECK: @b = external addrspace(2) externally_initialized global double, align 8 +// CHECK: @c = external addrspace(2) externally_initialized global float, align 4 +// CHECK: @d = external addrspace(2) externally_initialized global double, align 8 + // CHECK: @[[CB:.+]] = external constant { float, double } Keenuts wrote: Ok, so this `CB` will be replaced by the CBV, and the actual globals will be deleted in favor of the intrinsic if I understand, thanks! https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Rework update of waves per eu (PR #123995)
@@ -1109,74 +1109,38 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { Function *F = getAssociatedFunction(); auto &InfoCache = static_cast(A.getInfoCache()); -auto TakeRange = [&](std::pair R) { - auto [Min, Max] = R; - ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); - IntegerRangeState RangeState(Range); - clampStateAndIndicateChange(this->getState(), RangeState); - indicateOptimisticFixpoint(); -}; - -std::pair MaxWavesPerEURange{ -1U, InfoCache.getMaxWavesPerEU(*F)}; - // If the attribute exists, we will honor it if it is not the default. if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) { + std::pair MaxWavesPerEURange{ + 1U, InfoCache.getMaxWavesPerEU(*F)}; if (*Attr != MaxWavesPerEURange) { -TakeRange(*Attr); +auto [Min, Max] = *Attr; +ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); +IntegerRangeState RangeState(Range); +clampStateAndIndicateChange(this->getState(), RangeState); +indicateOptimisticFixpoint(); return; } } -// Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the -// calculation of waves per EU involves flat work group size, we can't -// simply use an assumed flat work group size as a start point, because the -// update of flat work group size is in an inverse direction of waves per -// EU. However, we can still do something if it is an entry function. Since -// an entry function is a terminal node, and flat work group size either -// from attribute or default will be used anyway, we can take that value and -// calculate the waves per EU based on it. This result can't be updated by -// no means, but that could still allow us to propagate it. -if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) { - std::pair FlatWorkGroupSize; - if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) -FlatWorkGroupSize = *Attr; - else -FlatWorkGroupSize = InfoCache.getDefaultFlatWorkGroupSize(*F); - TakeRange(InfoCache.getEffectiveWavesPerEU(*F, MaxWavesPerEURange, - FlatWorkGroupSize)); -} +if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) + indicatePessimisticFixpoint(); } ChangeStatus updateImpl(Attributor &A) override { -auto &InfoCache = static_cast(A.getInfoCache()); ChangeStatus Change = ChangeStatus::UNCHANGED; auto CheckCallSite = [&](AbstractCallSite CS) { Function *Caller = CS.getInstruction()->getFunction(); - Function *Func = getAssociatedFunction(); - LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName() -<< "->" << Func->getName() << '\n'); - const auto *CallerInfo = A.getAAFor( *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); - const auto *AssumedGroupSize = A.getAAFor( - *this, IRPosition::function(*Func), DepClassTy::REQUIRED); - if (!CallerInfo || !AssumedGroupSize || !CallerInfo->isValidState() || - !AssumedGroupSize->isValidState()) + if (!CallerInfo || !CallerInfo->isValidState()) return false; - - unsigned Min, Max; - std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU( - *Caller, - {CallerInfo->getAssumed().getLower().getZExtValue(), - CallerInfo->getAssumed().getUpper().getZExtValue() - 1}, - {AssumedGroupSize->getAssumed().getLower().getZExtValue(), - AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1}); - ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1)); + unsigned Min = CallerInfo->getAssumed().getLower().getZExtValue(); + unsigned Max = CallerInfo->getAssumed().getUpper().getZExtValue(); + ConstantRange CallerRange(APInt(32, Min), APInt(32, Max)); IntegerRangeState CallerRangeState(CallerRange); Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState); shiltian wrote: @arsenm did I get this part correctly? https://github.com/llvm/llvm-project/pull/123995 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Rework update of waves per eu (PR #123995)
https://github.com/shiltian edited https://github.com/llvm/llvm-project/pull/123995 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang-RT] Build libflang_rt.so (PR #121782)
https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/121782 >From a3037ab5557dcc4a4deb5bb40f801ca9770e3854 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 6 Jan 2025 16:44:08 +0100 Subject: [PATCH 1/7] Add FLANG_RT_ENABLE_STATIC and FLANG_RT_ENABLE_SHARED --- flang-rt/CMakeLists.txt | 30 ++ flang-rt/cmake/modules/AddFlangRT.cmake | 291 -- .../cmake/modules/AddFlangRTOffload.cmake | 8 +- flang-rt/cmake/modules/GetToolchainDirs.cmake | 254 +++ flang-rt/lib/flang_rt/CMakeLists.txt | 20 +- flang-rt/test/CMakeLists.txt | 2 +- flang-rt/test/lit.cfg.py | 2 +- 7 files changed, 366 insertions(+), 241 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 7b3d22e454a108..7effa6012a078f 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -113,6 +113,15 @@ cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) # Determine subdirectories for build output and install destinations. +# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good +#destination because it is not a ld.so default search path. +#The machine where the executable is eventually executed may not be the +#machine where the Flang compiler and its resource dir is installed, so +#setting RPath by the driver is not an solution. It should belong into +#/usr/lib//libflang_rt.so, like e.g. libgcc_s.so. +#But the linker as invoked by the Flang driver also requires +#libflang_rt.so to be found when linking and the resource lib dir is +#the only reliable location. get_toolchain_library_subdir(toolchain_lib_subdir) extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") @@ -130,6 +139,27 @@ cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) option(FLANG_RT_INCLUDE_TESTS "Generate build targets for the flang-rt unit and regression-tests." "${LLVM_INCLUDE_TESTS}") +option(FLANG_RT_ENABLE_STATIC "Build Flang-RT as a static library." ON) +if (WIN32) + # Windows DLL currently not implemented. + set(FLANG_RT_ENABLE_SHARED OFF) +else () + # TODO: Enable by default to increase test coverage, and which version of the + # library should be the user's choice anyway. + # Currently, the Flang driver adds `-L"libdir" -lflang_rt` as linker + # argument, which leaves the choice which library to use to the linker. + # Since most linkers prefer the shared library, this would constitute a + # breaking change unless the driver is changed. + option(FLANG_RT_ENABLE_SHARED "Build Flang-RT as a shared library." OFF) +endif () +if (NOT FLANG_RT_ENABLE_STATIC AND NOT FLANG_RT_ENABLE_SHARED) + message(FATAL_ERROR " + Must build at least one type of library + (FLANG_RT_ENABLE_STATIC=ON, FLANG_RT_ENABLE_SHARED=ON, or both) +") +endif () + + set(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT "" CACHE STRING "Compile Flang-RT with GPU support (CUDA or OpenMP)") set_property(CACHE FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT PROPERTY STRINGS "" diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index 1f8b5111433825..5f493a80c35f20 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -16,7 +16,8 @@ # STATIC # Build a static (.a/.lib) library # OBJECT -# Create only object files without static/dynamic library +# Always create an object library. +# Without SHARED/STATIC, build only the object library. # INSTALL_WITH_TOOLCHAIN # Install library into Clang's resource directory so it can be found by the # Flang driver during compilation, including tests @@ -44,17 +45,73 @@ function (add_flangrt_library name) ") endif () - # Forward libtype to add_library - set(extra_args "") - if (ARG_SHARED) -list(APPEND extra_args SHARED) + # Internal names of libraries. If called with just single type option, use + # the default name for it. Name of targets must only depend on function + # arguments to be predictable for callers. + set(name_static "${name}.static") + set(name_shared "${name}.shared") + set(name_object "obj.${name}") + if (ARG_STATIC AND NOT ARG_SHARED) +set(name_static "${name}") + elseif (NOT ARG_STATIC AND ARG_SHARED) +set(name_shared "${name}") + elseif (NOT ARG_STATIC AND NOT ARG_SHARED AND ARG_OBJECT) +set(name_object "${name}") + elseif (NOT ARG_STATIC AND NOT ARG_SHARED AND NOT ARG_OBJECT) +# Only one of them will actually be built. +set(name_static "${name}") +set(name_shared "${name}") + endif () + + # Determine what to build. If not explicitly
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -468,15 +505,49 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); assert(hsb && "Host symbol box not found"); - mlir::Type symType = hsb.getAddr().getType(); + mlir::Value privVal = hsb.getAddr(); + mlir::Type allocType; + if (mlir::isa(privVal.getType())) +allocType = privVal.getType(); + else +allocType = fir::unwrapRefType(privVal.getType()); + mlir::Location symLoc = hsb.getAddr().getLoc(); std::string privatizerName = sym->name().ToString() + ".privatizer"; bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate); + if (auto poly = mlir::dyn_cast(allocType)) { +if (!mlir::isa(poly.getEleTy()) && isFirstPrivate) + TODO(symLoc, "create polymorphic host associated copy"); + } tblah wrote: Done in [7cb83e3](https://github.com/llvm/llvm-project/pull/124019/commits/7cb83e335cfd25ab750e8c15c951dfb9198ac7a1) https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)
@@ -488,44 +559,34 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, mlir::OpBuilder::InsertionGuard guard(firOpBuilder); firOpBuilder.setInsertionPointToStart(moduleOp.getBody()); auto result = firOpBuilder.create( -symLoc, uniquePrivatizerName, symType, +symLoc, uniquePrivatizerName, allocType, isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate : mlir::omp::DataSharingClauseType::Private); fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym); lower::SymMapScope outerScope(symTable); -// Populate the `alloc` region. -{ - mlir::Region &allocRegion = result.getAllocRegion(); - mlir::Block *allocEntryBlock = firOpBuilder.createBlock( - &allocRegion, /*insertPt=*/{}, symType, symLoc); - - firOpBuilder.setInsertionPointToEnd(allocEntryBlock); - - fir::ExtendedValue localExV = - hlfir::translateToExtendedValue( - symLoc, firOpBuilder, hlfir::Entity{allocRegion.getArgument(0)}, - /*contiguousHint=*/ - evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext())) - .first; - - symTable.addSymbol(*sym, localExV); - lower::SymMapScope innerScope(symTable); - cloneSymbol(sym); - mlir::Value cloneAddr = symTable.shallowLookupSymbol(*sym).getAddr(); - mlir::Type cloneType = cloneAddr.getType(); - - // A `convert` op is required for variables that are storage associated - // via `equivalence`. The problem is that these variables are declared as - // `fir.ptr`s while their privatized storage is declared as `fir.ref`, - // therefore we convert to proper symbol type. - mlir::Value yieldedValue = - (symType == cloneType) ? cloneAddr - : firOpBuilder.createConvert( - cloneAddr.getLoc(), symType, cloneAddr); - - firOpBuilder.create(hsb.getAddr().getLoc(), - yieldedValue); +// Populate the `init` region. +const bool needsInitialization = tblah wrote: Done in [7cb83e3](https://github.com/llvm/llvm-project/pull/124019/commits/7cb83e335cfd25ab750e8c15c951dfb9198ac7a1) https://github.com/llvm/llvm-project/pull/124019 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Parse METADIRECTIVE in specification part (PR #123397)
https://github.com/kiranchandramohan approved this pull request. LG. The specification part has to be emitted in module files. But this is not necessary for producing the TODOs. https://github.com/llvm/llvm-project/pull/123397 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [PassBuilder][CodeGen] Add callback style pass buider (PR #116913)
paperchalice wrote: > Will new codegen support disabling individual passes? Instead of having > separate arguments like `-disable-machine-sink` we could do > `-disable-passes=machine-sink`. I created #76714, but disabling arbitrary passes is not we expect. Maybe we could add an allowlist as a compromise... https://github.com/llvm/llvm-project/pull/116913 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 396698f - Revert "Revert "[LoopVectorizer] Add support for chaining partial reductions …"
Author: Vitaly Buka Date: 2025-01-23T14:00:43-08:00 New Revision: 396698f10d6c3b5433320a28c314d33c6356ff03 URL: https://github.com/llvm/llvm-project/commit/396698f10d6c3b5433320a28c314d33c6356ff03 DIFF: https://github.com/llvm/llvm-project/commit/396698f10d6c3b5433320a28c314d33c6356ff03.diff LOG: Revert "Revert "[LoopVectorizer] Add support for chaining partial reductions …" This reverts commit 0e213834df114484ca9525c0e60522b40ecf24e8. Added: llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll Modified: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h llvm/lib/Transforms/Vectorize/VPlan.h Removed: diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7167e2179af535..dec7a87ba9c50b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8682,12 +8682,12 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I, /// are valid so recipes can be formed later. void VPRecipeBuilder::collectScaledReductions(VFRange &Range) { // Find all possible partial reductions. - SmallVector, 1> + SmallVector> PartialReductionChains; - for (const auto &[Phi, RdxDesc] : Legal->getReductionVars()) -if (std::optional> Pair = -getScaledReduction(Phi, RdxDesc, Range)) - PartialReductionChains.push_back(*Pair); + for (const auto &[Phi, RdxDesc] : Legal->getReductionVars()) { +if (auto SR = getScaledReduction(Phi, RdxDesc.getLoopExitInstr(), Range)) + PartialReductionChains.append(*SR); + } // A partial reduction is invalid if any of its extends are used by // something that isn't another partial reduction. This is because the @@ -8715,26 +8715,44 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) { } } -std::optional> -VPRecipeBuilder::getScaledReduction(PHINode *PHI, -const RecurrenceDescriptor &Rdx, +std::optional>> +VPRecipeBuilder::getScaledReduction(Instruction *PHI, Instruction *RdxExitInstr, VFRange &Range) { + + if (!CM.TheLoop->contains(RdxExitInstr)) +return std::nullopt; + // TODO: Allow scaling reductions when predicating. The select at // the end of the loop chooses between the phi value and most recent // reduction result, both of which have diff erent VFs to the active lane // mask when scaling. - if (CM.blockNeedsPredicationForAnyReason(Rdx.getLoopExitInstr()->getParent())) + if (CM.blockNeedsPredicationForAnyReason(RdxExitInstr->getParent())) return std::nullopt; - auto *Update = dyn_cast(Rdx.getLoopExitInstr()); + auto *Update = dyn_cast(RdxExitInstr); if (!Update) return std::nullopt; Value *Op = Update->getOperand(0); Value *PhiOp = Update->getOperand(1); - if (Op == PHI) { -Op = Update->getOperand(1); -PhiOp = Update->getOperand(0); + if (Op == PHI) +std::swap(Op, PhiOp); + + SmallVector> Chains; + + // Try and get a scaled reduction from the first non-phi operand. + // If one is found, we use the discovered reduction instruction in + // place of the accumulator for costing. + if (auto *OpInst = dyn_cast(Op)) { +if (auto SR0 = getScaledReduction(PHI, OpInst, Range)) { + Chains.append(*SR0); + PHI = SR0->rbegin()->first.Reduction; + + Op = Update->getOperand(0); + PhiOp = Update->getOperand(1); + if (Op == PHI) +std::swap(Op, PhiOp); +} } if (PhiOp != PHI) return std::nullopt; @@ -8757,7 +8775,7 @@ VPRecipeBuilder::getScaledReduction(PHINode *PHI, TTI::PartialReductionExtendKind OpBExtend = TargetTransformInfo::getPartialReductionExtendKind(ExtB); - PartialReductionChain Chain(Rdx.getLoopExitInstr(), ExtA, ExtB, BinOp); + PartialReductionChain Chain(RdxExitInstr, ExtA, ExtB, BinOp); unsigned TargetScaleFactor = PHI->getType()->getPrimitiveSizeInBits().getKnownScalarFactor( @@ -8772,9 +8790,9 @@ VPRecipeBuilder::getScaledReduction(PHINode *PHI, return Cost.isValid(); }, Range)) -return std::make_pair(Chain, TargetScaleFactor); +Chains.push_back(std::make_pair(Chain, TargetScaleFactor)); - return std::nullopt; + return Chains; } VPRecipeBase * @@ -8869,12 +8887,14 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction, "Unexpected number of operands for partial reduction"); VPValue *BinOp = Operands[0]; - VPValue *Phi = Operands[1]; - if (isa(BinOp->getDefiningRecipe())) -std::swap(BinOp, Phi); - - return new VPPartialReductionRecipe(Reduction->getOpcode(), BinOp, Phi, - Reduction); + VPValue *Accumulator = Operands[1]; + VPRecipeBase *BinOpRecipe = BinOp->getDefiningRecipe(); + if
[llvm-branch-commits] [llvm] [Analysis] Add DebugInfoCache analysis (PR #118629)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118629 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass (PR #118630)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118630 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118628 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Analysis] Add DebugInfoCache analysis (PR #118629)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118629 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PeepholeOpt: Simplify tracking of current op for copy and reg_sequence (PR #124224)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/124224 Set the starting index in the constructor instead of treating 0 as a special case. There should also be no need for bounds checking in the rewrite. >From 5092973f8640de1323594a63338e20aee0a3fe89 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 23 Jan 2025 11:49:01 +0700 Subject: [PATCH] PeepholeOpt: Simplify tracking of current op for copy and reg_sequence Set the starting index in the constructor instead of treating 0 as a special case. There should also be no need for bounds checking in the rewrite. --- llvm/lib/CodeGen/PeepholeOptimizer.cpp | 31 +++--- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index af4f2dc49b690b..2fc48209126acd 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -153,7 +153,7 @@ class RecurrenceInstr; class Rewriter { protected: MachineInstr &CopyLike; - unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten. + int CurrentSrcIdx = 0; ///< The index of the source being rewritten. public: Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {} virtual ~Rewriter() = default; @@ -201,12 +201,9 @@ class CopyRewriter : public Rewriter { bool getNextRewritableSource(RegSubRegPair &Src, RegSubRegPair &Dst) override { -// CurrentSrcIdx > 0 means this function has already been called. -if (CurrentSrcIdx > 0) +if (CurrentSrcIdx++ > 1) return false; -// This is the first call to getNextRewritableSource. -// Move the CurrentSrcIdx to remember that we made that call. -CurrentSrcIdx = 1; + // The rewritable source is the argument. const MachineOperand &MOSrc = CopyLike.getOperand(1); Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg()); @@ -217,8 +214,6 @@ class CopyRewriter : public Rewriter { } bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { -if (CurrentSrcIdx != 1) - return false; MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); MOSrc.setReg(NewReg); MOSrc.setSubReg(NewSubReg); @@ -229,7 +224,7 @@ class CopyRewriter : public Rewriter { /// Helper class to rewrite uncoalescable copy like instructions /// into new COPY (coalescable friendly) instructions. class UncoalescableRewriter : public Rewriter { - unsigned NumDefs; ///< Number of defs in the bitcast. + int NumDefs; ///< Number of defs in the bitcast. public: UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) { @@ -383,6 +378,7 @@ class RegSequenceRewriter : public Rewriter { public: RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) { assert(MI.isRegSequence() && "Invalid instruction"); +CurrentSrcIdx = -1; } /// \see Rewriter::getNextRewritableSource() @@ -404,16 +400,10 @@ class RegSequenceRewriter : public Rewriter { bool getNextRewritableSource(RegSubRegPair &Src, RegSubRegPair &Dst) override { // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. +CurrentSrcIdx += 2; +if (static_cast(CurrentSrcIdx) >= CopyLike.getNumOperands()) + return false; -// If this is the first call, move to the first argument. -if (CurrentSrcIdx == 0) { - CurrentSrcIdx = 1; -} else { - // Otherwise, move to the next argument and check that it is valid. - CurrentSrcIdx += 2; - if (CurrentSrcIdx >= CopyLike.getNumOperands()) -return false; -} const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); Src.Reg = MOInsertedReg.getReg(); // If we have to compose sub-register indices, bail out. @@ -431,11 +421,6 @@ class RegSequenceRewriter : public Rewriter { } bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { -// We cannot rewrite out of bound operands. -// Moreover, rewritable sources are at odd positions. -if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) - return false; - // Do not introduce new subregister uses in a reg_sequence. Until composing // subregister indices is supported while folding, we're just blocking // folding of subregister copies later in the function. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PeepholeOpt: Simplify tracking of current op for copy and reg_sequence (PR #124224)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes Set the starting index in the constructor instead of treating 0 as a special case. There should also be no need for bounds checking in the rewrite. --- Full diff: https://github.com/llvm/llvm-project/pull/124224.diff 1 Files Affected: - (modified) llvm/lib/CodeGen/PeepholeOptimizer.cpp (+8-23) ``diff diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index af4f2dc49b690b..2fc48209126acd 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -153,7 +153,7 @@ class RecurrenceInstr; class Rewriter { protected: MachineInstr &CopyLike; - unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten. + int CurrentSrcIdx = 0; ///< The index of the source being rewritten. public: Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {} virtual ~Rewriter() = default; @@ -201,12 +201,9 @@ class CopyRewriter : public Rewriter { bool getNextRewritableSource(RegSubRegPair &Src, RegSubRegPair &Dst) override { -// CurrentSrcIdx > 0 means this function has already been called. -if (CurrentSrcIdx > 0) +if (CurrentSrcIdx++ > 1) return false; -// This is the first call to getNextRewritableSource. -// Move the CurrentSrcIdx to remember that we made that call. -CurrentSrcIdx = 1; + // The rewritable source is the argument. const MachineOperand &MOSrc = CopyLike.getOperand(1); Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg()); @@ -217,8 +214,6 @@ class CopyRewriter : public Rewriter { } bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { -if (CurrentSrcIdx != 1) - return false; MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); MOSrc.setReg(NewReg); MOSrc.setSubReg(NewSubReg); @@ -229,7 +224,7 @@ class CopyRewriter : public Rewriter { /// Helper class to rewrite uncoalescable copy like instructions /// into new COPY (coalescable friendly) instructions. class UncoalescableRewriter : public Rewriter { - unsigned NumDefs; ///< Number of defs in the bitcast. + int NumDefs; ///< Number of defs in the bitcast. public: UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) { @@ -383,6 +378,7 @@ class RegSequenceRewriter : public Rewriter { public: RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) { assert(MI.isRegSequence() && "Invalid instruction"); +CurrentSrcIdx = -1; } /// \see Rewriter::getNextRewritableSource() @@ -404,16 +400,10 @@ class RegSequenceRewriter : public Rewriter { bool getNextRewritableSource(RegSubRegPair &Src, RegSubRegPair &Dst) override { // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. +CurrentSrcIdx += 2; +if (static_cast(CurrentSrcIdx) >= CopyLike.getNumOperands()) + return false; -// If this is the first call, move to the first argument. -if (CurrentSrcIdx == 0) { - CurrentSrcIdx = 1; -} else { - // Otherwise, move to the next argument and check that it is valid. - CurrentSrcIdx += 2; - if (CurrentSrcIdx >= CopyLike.getNumOperands()) -return false; -} const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); Src.Reg = MOInsertedReg.getReg(); // If we have to compose sub-register indices, bail out. @@ -431,11 +421,6 @@ class RegSequenceRewriter : public Rewriter { } bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { -// We cannot rewrite out of bound operands. -// Moreover, rewritable sources are at odd positions. -if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) - return false; - // Do not introduce new subregister uses in a reg_sequence. Until composing // subregister indices is supported while folding, we're just blocking // folding of subregister copies later in the function. `` https://github.com/llvm/llvm-project/pull/124224 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PeepholeOpt: Simplify tracking of current op for copy and reg_sequence (PR #124224)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/124224 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PeepholeOpt: Simplify tracking of current op for copy and reg_sequence (PR #124224)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/124224?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#124224** https://app.graphite.dev/github/pr/llvm/llvm-project/124224?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/124224?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#124111** https://app.graphite.dev/github/pr/llvm/llvm-project/124111?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/124224 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)
@@ -20,107 +20,172 @@ using namespace llvm; -static cl::opt Mode( +static cl::opt Mode( "regalloc-enable-priority-advisor", cl::Hidden, -cl::init(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default), +cl::init(RegAllocPriorityAdvisorProvider::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values( -clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default, +clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Default, "default", "Default"), -clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release, +clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Release, "release", "precompiled"), -clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development, +clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Development, "development", "for training"), clEnumValN( -RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy, "dummy", +RegAllocPriorityAdvisorProvider::AdvisorMode::Dummy, "dummy", "prioritize low virtual register numbers for test and debug"))); -char RegAllocPriorityAdvisorAnalysis::ID = 0; -INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysis, "regalloc-priority", +char RegAllocPriorityAdvisorAnalysisLegacy::ID = 0; +INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysisLegacy, "regalloc-priority", "Regalloc priority policy", false, true) namespace { -class DefaultPriorityAdvisorAnalysis final -: public RegAllocPriorityAdvisorAnalysis { + +class DefaultPriorityAdvisorProvider final +: public RegAllocPriorityAdvisorProvider { +public: + DefaultPriorityAdvisorProvider(bool NotAsRequested, LLVMContext &Ctx) + : RegAllocPriorityAdvisorProvider(AdvisorMode::Default) { +if (NotAsRequested) + Ctx.emitError("Requested regalloc priority advisor analysis " +"could be created. Using default"); + } + + // support for isa<> and dyn_cast. + static bool classof(const RegAllocPriorityAdvisorProvider *R) { +return R->getAdvisorMode() == AdvisorMode::Default; + } + + std::unique_ptr + getAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes *SI) override { +assert(SI && "SlotIndexes result must be set"); +return std::make_unique(MF, RA, SI); + } +}; + +class DummyPriorityAdvisorProvider final +: public RegAllocPriorityAdvisorProvider { +public: + DummyPriorityAdvisorProvider() + : RegAllocPriorityAdvisorProvider(AdvisorMode::Dummy) {} + + static bool classof(const RegAllocPriorityAdvisorProvider *R) { +return R->getAdvisorMode() == AdvisorMode::Dummy; + } + + std::unique_ptr + getAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes *SI) override { +assert(SI && "SlotIndexes result must be set"); +return std::make_unique(MF, RA, SI); + } +}; + +class DefaultPriorityAdvisorAnalysisLegacy final +: public RegAllocPriorityAdvisorAnalysisLegacy { public: - DefaultPriorityAdvisorAnalysis(bool NotAsRequested) - : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Default), + DefaultPriorityAdvisorAnalysisLegacy(bool NotAsRequested) + : RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode::Default), NotAsRequested(NotAsRequested) {} // support for isa<> and dyn_cast. - static bool classof(const RegAllocPriorityAdvisorAnalysis *R) { + static bool classof(const RegAllocPriorityAdvisorAnalysisLegacy *R) { return R->getAdvisorMode() == AdvisorMode::Default; } private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); -RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); - } - std::unique_ptr - getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { -return std::make_unique( -MF, RA, &getAnalysis().getSI()); +RegAllocPriorityAdvisorAnalysisLegacy::getAnalysisUsage(AU); } + bool doInitialization(Module &M) override { -if (NotAsRequested) - M.getContext().emitError("Requested regalloc priority advisor analysis " - "could be created. Using default"); -return RegAllocPriorityAdvisorAnalysis::doInitialization(M); +Provider.reset( +new DefaultPriorityAdvisorProvider(NotAsRequested, M.getContext())); +return false; } + const bool NotAsRequested; }; class DummyPriorityAdvisorAnalysis final -: public RegAllocPriorityAdvisorAnalysis { +: public RegAllocPriorityAdvisorAnalysisLegacy { public: + using RegAllocPriorityAdvisorAnalysisLegacy::AdvisorMode; DummyPriorityAdvisorAnalysis() - : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Dummy) {} + : RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode::Dummy) {} // support for isa<> and dyn_cast. - static bool classof(const RegAllocPriorityAdvisorAnalysis *R) { + static bool classof(const RegAl
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)
@@ -357,3 +391,21 @@ DevelopmentModePriorityAdvisor::getPriority(const LiveInterval &LI) const { } #endif // #ifdef LLVM_HAVE_TFLITE + +void RegAllocPriorityAdvisorAnalysis::initializeMLProvider( +RegAllocPriorityAdvisorProvider::AdvisorMode Mode, LLVMContext &Ctx) { + if (Provider) +return; + switch (Mode) { + case RegAllocPriorityAdvisorProvider::AdvisorMode::Development: +#if defined(LLVM_HAVE_TFLITE) +Provider.reset(new DevelopmentModePriorityAdvisorProvider(Ctx)); +#endif +break; + case RegAllocPriorityAdvisorProvider::AdvisorMode::Release: +Provider.reset(new ReleaseModePriorityAdvisorProvider()); +break; + default: +break; arsenm wrote: llvm_unreachable? This should be a fully covered switch https://github.com/llvm/llvm-project/pull/118462 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)
@@ -68,20 +70,72 @@ class DummyPriorityAdvisor : public RegAllocPriorityAdvisor { unsigned getPriority(const LiveInterval &LI) const override; }; -class RegAllocPriorityAdvisorAnalysis : public ImmutablePass { +/// Common provider for getting the priority advisor and logging rewards. +/// Legacy analysis forwards all calls to this provider. +/// New analysis serves the provider as the analysis result. +/// Expensive setup is done in the constructor, so that the advisor can be +/// created quickly for every machine function. +/// TODO: Remove once legacy PM support is dropped. +class RegAllocPriorityAdvisorProvider { public: enum class AdvisorMode : int { Default, Release, Development, Dummy }; - RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode) - : ImmutablePass(ID), Mode(Mode){}; + RegAllocPriorityAdvisorProvider(AdvisorMode Mode) : Mode(Mode) {} + + virtual ~RegAllocPriorityAdvisorProvider() = default; + + virtual void logRewardIfNeeded(const MachineFunction &MF, + function_ref GetReward) {}; + + virtual std::unique_ptr + getAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes *SI) = 0; arsenm wrote: Make SI a reference, it is required https://github.com/llvm/llvm-project/pull/118462 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)
@@ -20,107 +20,172 @@ using namespace llvm; -static cl::opt Mode( +static cl::opt Mode( "regalloc-enable-priority-advisor", cl::Hidden, -cl::init(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default), +cl::init(RegAllocPriorityAdvisorProvider::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values( -clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default, +clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Default, "default", "Default"), -clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release, +clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Release, "release", "precompiled"), -clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development, +clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Development, "development", "for training"), clEnumValN( -RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy, "dummy", +RegAllocPriorityAdvisorProvider::AdvisorMode::Dummy, "dummy", "prioritize low virtual register numbers for test and debug"))); -char RegAllocPriorityAdvisorAnalysis::ID = 0; -INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysis, "regalloc-priority", +char RegAllocPriorityAdvisorAnalysisLegacy::ID = 0; +INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysisLegacy, "regalloc-priority", "Regalloc priority policy", false, true) namespace { -class DefaultPriorityAdvisorAnalysis final -: public RegAllocPriorityAdvisorAnalysis { + +class DefaultPriorityAdvisorProvider final +: public RegAllocPriorityAdvisorProvider { +public: + DefaultPriorityAdvisorProvider(bool NotAsRequested, LLVMContext &Ctx) + : RegAllocPriorityAdvisorProvider(AdvisorMode::Default) { +if (NotAsRequested) + Ctx.emitError("Requested regalloc priority advisor analysis " +"could be created. Using default"); + } + + // support for isa<> and dyn_cast. + static bool classof(const RegAllocPriorityAdvisorProvider *R) { +return R->getAdvisorMode() == AdvisorMode::Default; + } + + std::unique_ptr + getAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes *SI) override { +assert(SI && "SlotIndexes result must be set"); +return std::make_unique(MF, RA, SI); + } +}; + +class DummyPriorityAdvisorProvider final +: public RegAllocPriorityAdvisorProvider { +public: + DummyPriorityAdvisorProvider() + : RegAllocPriorityAdvisorProvider(AdvisorMode::Dummy) {} + + static bool classof(const RegAllocPriorityAdvisorProvider *R) { +return R->getAdvisorMode() == AdvisorMode::Dummy; + } + + std::unique_ptr + getAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes *SI) override { +assert(SI && "SlotIndexes result must be set"); +return std::make_unique(MF, RA, SI); + } +}; + +class DefaultPriorityAdvisorAnalysisLegacy final +: public RegAllocPriorityAdvisorAnalysisLegacy { public: - DefaultPriorityAdvisorAnalysis(bool NotAsRequested) - : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Default), + DefaultPriorityAdvisorAnalysisLegacy(bool NotAsRequested) + : RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode::Default), NotAsRequested(NotAsRequested) {} // support for isa<> and dyn_cast. - static bool classof(const RegAllocPriorityAdvisorAnalysis *R) { + static bool classof(const RegAllocPriorityAdvisorAnalysisLegacy *R) { return R->getAdvisorMode() == AdvisorMode::Default; } private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); -RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); - } - std::unique_ptr - getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { -return std::make_unique( -MF, RA, &getAnalysis().getSI()); +RegAllocPriorityAdvisorAnalysisLegacy::getAnalysisUsage(AU); } + bool doInitialization(Module &M) override { -if (NotAsRequested) - M.getContext().emitError("Requested regalloc priority advisor analysis " - "could be created. Using default"); -return RegAllocPriorityAdvisorAnalysis::doInitialization(M); +Provider.reset( +new DefaultPriorityAdvisorProvider(NotAsRequested, M.getContext())); +return false; } + const bool NotAsRequested; }; class DummyPriorityAdvisorAnalysis final -: public RegAllocPriorityAdvisorAnalysis { +: public RegAllocPriorityAdvisorAnalysisLegacy { public: + using RegAllocPriorityAdvisorAnalysisLegacy::AdvisorMode; DummyPriorityAdvisorAnalysis() - : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Dummy) {} + : RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode::Dummy) {} // support for isa<> and dyn_cast. - static bool classof(const RegAllocPriorityAdvisorAnalysis *R) { + static bool classof(const RegAl
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)
@@ -150,12 +139,37 @@ class ReleaseModePriorityAdvisorAnalysis final InteractiveChannelBaseName + ".out", InteractiveChannelBaseName + ".in"); } -return std::make_unique( -MF, RA, &getAnalysis().getSI(), Runner.get()); +assert(SI && "SlotIndexes result must be set"); arsenm wrote: Make a reference and drop the assert https://github.com/llvm/llvm-project/pull/118462 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)
@@ -146,11 +149,138 @@ static cl::opt SplitThresholdForRegWithHint( static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); -char RAGreedy::ID = 0; -char &llvm::RAGreedyID = RAGreedy::ID; +namespace { +class RAGreedyLegacy : public MachineFunctionPass { + RegAllocFilterFunc F; -INITIALIZE_PASS_BEGIN(RAGreedy, "greedy", -"Greedy Register Allocator", false, false) +public: + RAGreedyLegacy(const RegAllocFilterFunc F = nullptr); + + static char ID; + /// Return the pass name. + StringRef getPassName() const override { return "Greedy Register Allocator"; } + + /// RAGreedy analysis usage. + void getAnalysisUsage(AnalysisUsage &AU) const override; + /// Perform register allocation. + bool runOnMachineFunction(MachineFunction &mf) override; + + MachineFunctionProperties getRequiredProperties() const override { +return MachineFunctionProperties().set( +MachineFunctionProperties::Property::NoPHIs); + } + + MachineFunctionProperties getClearedProperties() const override { +return MachineFunctionProperties().set( +MachineFunctionProperties::Property::IsSSA); + } +}; + +} // end anonymous namespace + +RAGreedyLegacy::RAGreedyLegacy(const RegAllocFilterFunc F) +: MachineFunctionPass(ID), F(F) { + initializeRAGreedyLegacyPass(*PassRegistry::getPassRegistry()); +} + +RAGreedy::RAGreedy(RequiredAnalyses &Analyses, const RegAllocFilterFunc F) : RegAllocBase(F) { + setAnalyses(Analyses); +} + +void RAGreedy::setAnalyses(RequiredAnalyses &Analyses) { arsenm wrote: Just do this directly in the constructor, there's no other user of setAnalyses https://github.com/llvm/llvm-project/pull/119540 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)
https://github.com/arsenm commented: Missing test https://github.com/llvm/llvm-project/pull/119540 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/119540 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)
@@ -146,11 +149,138 @@ static cl::opt SplitThresholdForRegWithHint( static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); -char RAGreedy::ID = 0; -char &llvm::RAGreedyID = RAGreedy::ID; +namespace { +class RAGreedyLegacy : public MachineFunctionPass { + RegAllocFilterFunc F; -INITIALIZE_PASS_BEGIN(RAGreedy, "greedy", -"Greedy Register Allocator", false, false) +public: + RAGreedyLegacy(const RegAllocFilterFunc F = nullptr); + + static char ID; + /// Return the pass name. + StringRef getPassName() const override { return "Greedy Register Allocator"; } + + /// RAGreedy analysis usage. + void getAnalysisUsage(AnalysisUsage &AU) const override; + /// Perform register allocation. + bool runOnMachineFunction(MachineFunction &mf) override; + + MachineFunctionProperties getRequiredProperties() const override { +return MachineFunctionProperties().set( +MachineFunctionProperties::Property::NoPHIs); + } + + MachineFunctionProperties getClearedProperties() const override { +return MachineFunctionProperties().set( +MachineFunctionProperties::Property::IsSSA); + } +}; + +} // end anonymous namespace + +RAGreedyLegacy::RAGreedyLegacy(const RegAllocFilterFunc F) +: MachineFunctionPass(ID), F(F) { + initializeRAGreedyLegacyPass(*PassRegistry::getPassRegistry()); +} + +RAGreedy::RAGreedy(RequiredAnalyses &Analyses, const RegAllocFilterFunc F) : RegAllocBase(F) { + setAnalyses(Analyses); +} + +void RAGreedy::setAnalyses(RequiredAnalyses &Analyses) { + VRM = Analyses.VRM; + LIS = Analyses.LIS; + Matrix = Analyses.LRM; + Indexes = Analyses.Indexes; + MBFI = Analyses.MBFI; + DomTree = Analyses.DomTree; + Loops = Analyses.Loops; + ORE = Analyses.ORE; + Bundles = Analyses.Bundles; + SpillPlacer = Analyses.SpillPlacer; + DebugVars = Analyses.DebugVars; + LSS = Analyses.LSS; + EvictProvider = Analyses.EvictProvider; + PriorityProvider = Analyses.PriorityProvider; +} + +void RAGreedyPass::printPipeline(raw_ostream &OS, function_ref MapClassName2PassName) const { + StringRef FilterName = Opts.FilterName.empty() ? "all" : Opts.FilterName; + OS << "regallocgreedy<" << FilterName << '>'; arsenm wrote: ```suggestion OS << "regalloc-greedy<" << FilterName << '>'; ``` https://github.com/llvm/llvm-project/pull/119540 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)
@@ -1,16 +1,21 @@ // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -// CHECK-DAG: @[[CB:.+]] = external constant { float } +// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s cbuffer A { -float a; - // CHECK-DAG:@_ZL1b = internal global float 3.00e+00, align 4 + // CHECK: @a = external addrspace(2) externally_initialized global float, align 4 + float a; + // CHECK: @_ZL1b = internal global float 3.00e+00, align 4 static float b = 3; hekota wrote: At this point it is not clear what is the end goal. The static decl here tests that is does not get added to the cbuffer layout struct or the new address space. When/if we prohibit static decls in cbuffers this test will surely flare up and will be fixed up. https://github.com/llvm/llvm-project/pull/123411 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
@@ -148,6 +148,333 @@ bool RootSignatureLexer::LexToken(RootSignatureToken &Result) { return false; } +// Parser Definitions + +RootSignatureParser::RootSignatureParser( +SmallVector &Elements, +const SmallVector &Tokens) +: Elements(Elements) { + CurTok = Tokens.begin(); + LastTok = Tokens.end(); +} + +bool RootSignatureParser::ReportError() { return true; } + +bool RootSignatureParser::Parse() { + CurTok--; // Decrement once here so we can use the ...ExpectedToken api + + // Iterate as many RootElements as possible + bool HasComma = true; + while (HasComma && + !TryConsumeExpectedToken(ArrayRef{TokenKind::kw_DescriptorTable})) { +if (ParseRootElement()) + return true; +HasComma = !TryConsumeExpectedToken(TokenKind::pu_comma); + } + if (HasComma) +return ReportError(); // report 'comma' denotes a required extra item + + // Ensure that we are at the end of the tokens + CurTok++; + if (CurTok != LastTok) +return ReportError(); // report expected end of input but got more + return false; +} + +bool RootSignatureParser::ParseRootElement() { + // Dispatch onto the correct parse method + switch (CurTok->Kind) { + case TokenKind::kw_DescriptorTable: +return ParseDescriptorTable(); + default: +llvm_unreachable("Switch for an expected token was not provided"); +return true; + } +} + +bool RootSignatureParser::ParseDescriptorTable() { + DescriptorTable Table; + + if (ConsumeExpectedToken(TokenKind::pu_l_paren)) +return true; + + // Iterate as many DescriptorTableClaues as possible + bool HasComma = true; + while (!TryConsumeExpectedToken({TokenKind::kw_CBV, TokenKind::kw_SRV, + TokenKind::kw_UAV, TokenKind::kw_Sampler})) { +if (ParseDescriptorTableClause()) + return true; +Table.NumClauses++; +HasComma = !TryConsumeExpectedToken(TokenKind::pu_comma); + } + + // Consume optional 'visibility' paramater + if (HasComma && !TryConsumeExpectedToken(TokenKind::kw_visibility)) { +if (ConsumeExpectedToken(TokenKind::pu_equal)) + return true; + +if (ParseShaderVisibility(Table.Visibility)) + return true; + +HasComma = !TryConsumeExpectedToken(TokenKind::pu_comma); + } + + if (HasComma && Table.NumClauses != 0) +return ReportError(); // report 'comma' denotes a required extra item + + if (ConsumeExpectedToken(TokenKind::pu_r_paren)) +return true; + + Elements.push_back(RootElement(Table)); + return false; +} + +bool RootSignatureParser::ParseDescriptorTableClause() { + // Determine the type of Clause first so we can initialize the struct with + // the correct default flags + ClauseType CT; + switch (CurTok->Kind) { + case TokenKind::kw_CBV: +CT = ClauseType::CBV; +break; + case TokenKind::kw_SRV: +CT = ClauseType::SRV; +break; + case TokenKind::kw_UAV: +CT = ClauseType::UAV; +break; + case TokenKind::kw_Sampler: +CT = ClauseType::Sampler; +break; + default: +llvm_unreachable("Switch for an expected token was not provided"); +return true; + } + DescriptorTableClause Clause(CT); + + if (ConsumeExpectedToken(TokenKind::pu_l_paren)) +return true; + + // Consume mandatory Register paramater + if (ConsumeExpectedToken( + {TokenKind::bReg, TokenKind::tReg, TokenKind::uReg, TokenKind::sReg})) +return true; + if (ParseRegister(Clause.Register)) +return true; + + // Start parsing the optional parameters inbelic wrote: Confirmed that we should accept the parameters in any order. So will need to refactor this. https://github.com/llvm/llvm-project/pull/122982 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
https://github.com/llvm-beanz commented: I think that the way you're breaking up this change is sub-optimal from a review perspective. You've added a lot of code that partially handles parsing a very complex root signature. The problem is that to complete this implementation you're going to go back over this code over and over again fleshing it out, and from a reviewer's perspective we're going to need to keep paging back in extra context. If instead you started with a much simpler root signature (even just an empty one), but implement more complete handling for it, we can review that and incrementally build up without revisiting the same code over and over again in each subsequent patch. https://github.com/llvm/llvm-project/pull/122982 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
inbelic wrote: Sounds good, and I appreciate the feedback. I will restructure the changes to be of smaller granularity, which will be better self-contained and directly include their diagnostics testing. https://github.com/llvm/llvm-project/pull/122982 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
https://github.com/inbelic converted_to_draft https://github.com/llvm/llvm-project/pull/122982 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Clang][CWG2369] Implement GCC's heuristic for DR 2369 (PR #124231)
https://github.com/zyn0217 updated https://github.com/llvm/llvm-project/pull/124231 >From c36dd4fcac367b206072b36ccc9be4106a22ec3b Mon Sep 17 00:00:00 2001 From: Younan Zhang Date: Fri, 24 Jan 2025 13:52:37 +0800 Subject: [PATCH 1/2] Implement GCC's CWG 2369 heuristic --- clang/include/clang/Sema/Sema.h | 7 +- clang/lib/Sema/SemaOverload.cpp | 70 +++- clang/lib/Sema/SemaTemplateDeduction.cpp | 13 +- .../SemaTemplate/concepts-recursive-inst.cpp | 169 ++ 4 files changed, 246 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 87d9a335763e31..fd4d1f7e0d8f9c 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10236,7 +10236,8 @@ class Sema final : public SemaBase { FunctionTemplateDecl *FunctionTemplate, ArrayRef ParamTypes, ArrayRef Args, OverloadCandidateSet &CandidateSet, ConversionSequenceList &Conversions, bool SuppressUserConversions, - CXXRecordDecl *ActingContext = nullptr, QualType ObjectType = QualType(), + bool NonInstOnly, CXXRecordDecl *ActingContext = nullptr, + QualType ObjectType = QualType(), Expr::Classification ObjectClassification = {}, OverloadCandidateParamOrder PO = {}); @@ -12272,7 +12273,7 @@ class Sema final : public SemaBase { sema::TemplateDeductionInfo &Info, SmallVectorImpl const *OriginalCallArgs = nullptr, bool PartialOverloading = false, - llvm::function_ref CheckNonDependent = [] { return false; }); + llvm::function_ref CheckNonDependent = [](bool) { return false; }); /// Perform template argument deduction from a function call /// (C++ [temp.deduct.call]). @@ -12306,7 +12307,7 @@ class Sema final : public SemaBase { FunctionDecl *&Specialization, sema::TemplateDeductionInfo &Info, bool PartialOverloading, bool AggregateDeductionCandidate, QualType ObjectType, Expr::Classification ObjectClassification, - llvm::function_ref)> CheckNonDependent); + llvm::function_ref, bool)> CheckNonDependent); /// Deduce template arguments when taking the address of a function /// template (C++ [temp.deduct.funcaddr]) or matching a specialization to diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 3be9ade80f1d94..aded8abe5b4f7b 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -7733,10 +7733,10 @@ void Sema::AddMethodTemplateCandidate( MethodTmpl, ExplicitTemplateArgs, Args, Specialization, Info, PartialOverloading, /*AggregateDeductionCandidate=*/false, ObjectType, ObjectClassification, - [&](ArrayRef ParamTypes) { + [&](ArrayRef ParamTypes, bool NonInstOnly) { return CheckNonDependentConversions( MethodTmpl, ParamTypes, Args, CandidateSet, Conversions, -SuppressUserConversions, ActingContext, ObjectType, +SuppressUserConversions, NonInstOnly, ActingContext, ObjectType, ObjectClassification, PO); }); Result != TemplateDeductionResult::Success) { @@ -7818,10 +7818,11 @@ void Sema::AddTemplateOverloadCandidate( PartialOverloading, AggregateCandidateDeduction, /*ObjectType=*/QualType(), /*ObjectClassification=*/Expr::Classification(), - [&](ArrayRef ParamTypes) { + [&](ArrayRef ParamTypes, bool NonInstOnly) { return CheckNonDependentConversions( FunctionTemplate, ParamTypes, Args, CandidateSet, Conversions, -SuppressUserConversions, nullptr, QualType(), {}, PO); +SuppressUserConversions, NonInstOnly, nullptr, QualType(), {}, +PO); }); Result != TemplateDeductionResult::Success) { OverloadCandidate &Candidate = @@ -7863,7 +7864,7 @@ bool Sema::CheckNonDependentConversions( FunctionTemplateDecl *FunctionTemplate, ArrayRef ParamTypes, ArrayRef Args, OverloadCandidateSet &CandidateSet, ConversionSequenceList &Conversions, bool SuppressUserConversions, -CXXRecordDecl *ActingContext, QualType ObjectType, +bool NonInstOnly, CXXRecordDecl *ActingContext, QualType ObjectType, Expr::Classification ObjectClassification, OverloadCandidateParamOrder PO) { // FIXME: The cases in which we allow explicit conversions for constructor // arguments never consider calling a constructor template. It's not clear @@ -7900,6 +7901,63 @@ bool Sema::CheckNonDependentConversions( } } + // https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=2154bcd6d43cfd821ca70e1583880c4ed955355d + auto ConversionMightInduceInstantiation = [&](QualType ParmType, +QualType ArgType) { +ParmType = ParmType.getNonReferenceType(); +ArgType = ArgType.getNonReferenceType(); +bool Pointe
[llvm-branch-commits] [llvm] [JITLink][LoongArch] Add label addition and subtraction relocations (PR #122262)
https://github.com/wangleiat approved this pull request. LGTM, thanks. https://github.com/llvm/llvm-project/pull/122262 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
@@ -89,6 +91,75 @@ class RootSignatureLexer { } }; +class RootSignatureParser { +public: + RootSignatureParser(SmallVector &Elements, + const SmallVector &Tokens); + + // Iterates over the provided tokens and constructs the in-memory + // representations of the RootElements. + // + // The return value denotes if there was a failure and the method will + // return on the first encountered failure, or, return false if it + // can sucessfully reach the end of the tokens. + bool Parse(); + +private: + bool ReportError(); // TODO: Implement this to report error through Diags + + // Root Element helpers + bool ParseRootElement(); + bool ParseDescriptorTable(); + bool ParseDescriptorTableClause(); + + // Common parsing helpers + bool ParseRegister(Register &Register); + + // Various flags/enum parsing helpers + bool ParseDescriptorRangeFlags(DescriptorRangeFlags &Flags); + bool ParseShaderVisibility(ShaderVisibility &Flag); + + // Increment the token iterator if we have not reached the end. + // Return value denotes if we were already at the last token. + bool ConsumeNextToken(); + + // Attempt to retrieve the next token, if TokenKind is invalid then there was + // no next token. + RootSignatureToken PeekNextToken(); + + // Is the current token one of the expected kinds + bool IsCurExpectedToken(ArrayRef AnyExpected); damyanp wrote: Any reason this doesn't have an overload that takes a single `Expected` like all the ones below? Alternatively, if this one doesn't need the overload then do we need the other ones? https://github.com/llvm/llvm-project/pull/122982 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118628 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass (PR #118630)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118630 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
@@ -0,0 +1,140 @@ +//===- HLSLRootSignature.h - HLSL Root Signature helper objects ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +/// +/// \file This file contains helper objects for working with HLSL Root +/// Signatures. +/// +//===--===// + +#ifndef LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H +#define LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H + +#include + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Endian.h" + +namespace llvm { +namespace hlsl { +namespace root_signature { + +// This is a copy from DebugInfo/CodeView/CodeView.h +#define RS_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(Class) \ + inline Class operator|(Class a, Class b) { \ +return static_cast(llvm::to_underlying(a) | \ + llvm::to_underlying(b)); \ + } \ + inline Class operator&(Class a, Class b) { \ +return static_cast(llvm::to_underlying(a) & \ + llvm::to_underlying(b)); \ + } \ + inline Class operator~(Class a) { \ +return static_cast(~llvm::to_underlying(a)); \ + } \ + inline Class &operator|=(Class &a, Class b) { \ +a = a | b; \ +return a; \ + } \ + inline Class &operator&=(Class &a, Class b) { \ +a = a & b; \ +return a; \ + } + +// Definition of the various enumerations and flags +enum class DescriptorRangeFlags : unsigned { + None = 0, + DescriptorsVolatile = 0x1, + DataVolatile = 0x2, + DataStaticWhileSetAtExecute = 0x4, + DataStatic = 0x8, + DescriptorsStaticKeepingBufferBoundsChecks = 0x1, + ValidFlags = 0x1000f, + ValidSamplerFlags = DescriptorsVolatile, +}; +RS_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(DescriptorRangeFlags) + +enum class ShaderVisibility { + All = 0, + Vertex = 1, + Hull = 2, + Domain = 3, + Geometry = 4, + Pixel = 5, + Amplification = 6, + Mesh = 7, +}; + +// Definitions of the in-memory data layout structures + +// Models the different registers: bReg | tReg | uReg | sReg +enum class RegisterType { BReg, TReg, UReg, SReg }; +struct Register { + RegisterType ViewType; + uint32_t Number; +}; + +static const uint32_t DescriptorTableOffsetAppend = 0x; +// Models DTClause : CBV | SRV | UAV | Sampler by collecting like parameters +enum class ClauseType { CBV, SRV, UAV, Sampler }; +struct DescriptorTableClause { + ClauseType Type; + Register Register; + uint32_t NumDescriptors = 1; + uint32_t Space = 0; + uint32_t Offset = DescriptorTableOffsetAppend; + DescriptorRangeFlags Flags; + + DescriptorTableClause(ClauseType Type) : Type(Type) { +switch (Type) { +case ClauseType::CBV: + Flags = DescriptorRangeFlags::DataStaticWhileSetAtExecute; + break; +case ClauseType::SRV: + Flags = DescriptorRangeFlags::DataStaticWhileSetAtExecute; + break; +case ClauseType::UAV: + Flags = DescriptorRangeFlags::DataVolatile; + break; +case ClauseType::Sampler: + Flags = DescriptorRangeFlags::None; + break; +} + } +}; + +// Models the end of a descriptor table and stores its visibility +struct DescriptorTable { + ShaderVisibility Visibility = ShaderVisibility::All; + uint32_t NumClauses = 0; // The number of clauses in the table +}; + +// Models RootElement : DescriptorTable | DescriptorTableClause +struct RootElement { damyanp wrote: Is there a reason not to use `std::variant`? https://github.com/llvm/llvm-project/pull/122982 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
@@ -148,6 +148,347 @@ bool RootSignatureLexer::LexToken(RootSignatureToken &Result) { return false; } +// Parser Definitions + +RootSignatureParser::RootSignatureParser( +SmallVector &Elements, +const SmallVector &Tokens) +: Elements(Elements) { + CurTok = Tokens.begin(); + LastTok = Tokens.end(); +} + +bool RootSignatureParser::ReportError() { return true; } + +bool RootSignatureParser::Parse() { + // Handle edge-case of empty RootSignature() + if (CurTok == LastTok) +return false; + + // Iterate as many RootElements as possible + bool HasComma = true; + while (HasComma && + IsCurExpectedToken(ArrayRef{TokenKind::kw_DescriptorTable})) { +if (ParseRootElement()) + return true; +HasComma = !TryConsumeExpectedToken(TokenKind::pu_comma); +if (HasComma) + ConsumeNextToken(); + } + + if (HasComma) +return ReportError(); // report 'comma' denotes a required extra item + + // Ensure that we are at the end of the tokens + CurTok++; + if (CurTok != LastTok) +return ReportError(); // report expected end of input but got more + return false; +} + +bool RootSignatureParser::ParseRootElement() { + // Dispatch onto the correct parse method + switch (CurTok->Kind) { + case TokenKind::kw_DescriptorTable: +return ParseDescriptorTable(); + default: +llvm_unreachable("Switch for an expected token was not provided"); +return true; damyanp wrote: Is the `return true` after `llvm_unreachable` the right thing to do here? https://github.com/llvm/llvm-project/pull/122982 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
@@ -0,0 +1,140 @@ +//===- HLSLRootSignature.h - HLSL Root Signature helper objects ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +/// +/// \file This file contains helper objects for working with HLSL Root +/// Signatures. +/// +//===--===// + +#ifndef LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H +#define LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H + +#include + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Endian.h" + +namespace llvm { +namespace hlsl { +namespace root_signature { + +// This is a copy from DebugInfo/CodeView/CodeView.h +#define RS_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(Class) \ + inline Class operator|(Class a, Class b) { \ +return static_cast(llvm::to_underlying(a) | \ + llvm::to_underlying(b)); \ + } \ + inline Class operator&(Class a, Class b) { \ +return static_cast(llvm::to_underlying(a) & \ + llvm::to_underlying(b)); \ + } \ + inline Class operator~(Class a) { \ +return static_cast(~llvm::to_underlying(a)); \ + } \ + inline Class &operator|=(Class &a, Class b) { \ +a = a | b; \ +return a; \ + } \ + inline Class &operator&=(Class &a, Class b) { \ +a = a & b; \ +return a; \ + } + +// Definition of the various enumerations and flags +enum class DescriptorRangeFlags : unsigned { + None = 0, + DescriptorsVolatile = 0x1, + DataVolatile = 0x2, + DataStaticWhileSetAtExecute = 0x4, + DataStatic = 0x8, + DescriptorsStaticKeepingBufferBoundsChecks = 0x1, + ValidFlags = 0x1000f, + ValidSamplerFlags = DescriptorsVolatile, +}; +RS_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(DescriptorRangeFlags) + +enum class ShaderVisibility { + All = 0, + Vertex = 1, + Hull = 2, + Domain = 3, + Geometry = 4, + Pixel = 5, + Amplification = 6, + Mesh = 7, +}; + +// Definitions of the in-memory data layout structures + +// Models the different registers: bReg | tReg | uReg | sReg +enum class RegisterType { BReg, TReg, UReg, SReg }; +struct Register { + RegisterType ViewType; + uint32_t Number; +}; + +static const uint32_t DescriptorTableOffsetAppend = 0x; +// Models DTClause : CBV | SRV | UAV | Sampler by collecting like parameters +enum class ClauseType { CBV, SRV, UAV, Sampler }; +struct DescriptorTableClause { + ClauseType Type; + Register Register; damyanp wrote: It looks like there's nothing enforcing Register's initialization? Since this struct has a constructor, I'd expect to get a fully initialized object back when I construct it. https://github.com/llvm/llvm-project/pull/122982 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
@@ -0,0 +1,140 @@ +//===- HLSLRootSignature.h - HLSL Root Signature helper objects ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +/// +/// \file This file contains helper objects for working with HLSL Root +/// Signatures. +/// +//===--===// + +#ifndef LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H +#define LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H + +#include + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Endian.h" + +namespace llvm { +namespace hlsl { +namespace root_signature { + +// This is a copy from DebugInfo/CodeView/CodeView.h +#define RS_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(Class) \ + inline Class operator|(Class a, Class b) { \ +return static_cast(llvm::to_underlying(a) | \ + llvm::to_underlying(b)); \ + } \ + inline Class operator&(Class a, Class b) { \ +return static_cast(llvm::to_underlying(a) & \ + llvm::to_underlying(b)); \ + } \ + inline Class operator~(Class a) { \ +return static_cast(~llvm::to_underlying(a)); \ + } \ + inline Class &operator|=(Class &a, Class b) { \ +a = a | b; \ +return a; \ + } \ + inline Class &operator&=(Class &a, Class b) { \ +a = a & b; \ +return a; \ + } + +// Definition of the various enumerations and flags +enum class DescriptorRangeFlags : unsigned { + None = 0, + DescriptorsVolatile = 0x1, + DataVolatile = 0x2, + DataStaticWhileSetAtExecute = 0x4, + DataStatic = 0x8, + DescriptorsStaticKeepingBufferBoundsChecks = 0x1, + ValidFlags = 0x1000f, + ValidSamplerFlags = DescriptorsVolatile, +}; +RS_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(DescriptorRangeFlags) + +enum class ShaderVisibility { + All = 0, + Vertex = 1, + Hull = 2, + Domain = 3, + Geometry = 4, + Pixel = 5, + Amplification = 6, + Mesh = 7, +}; + +// Definitions of the in-memory data layout structures + +// Models the different registers: bReg | tReg | uReg | sReg +enum class RegisterType { BReg, TReg, UReg, SReg }; +struct Register { + RegisterType ViewType; + uint32_t Number; +}; + +static const uint32_t DescriptorTableOffsetAppend = 0x; +// Models DTClause : CBV | SRV | UAV | Sampler by collecting like parameters +enum class ClauseType { CBV, SRV, UAV, Sampler }; llvm-beanz wrote: Can we do this instead? ```suggestion using ClauseType = llvm::dxil::ResourceClass ``` This will change `CBV` to `CBuffer`, but otherwise those enums need to be the same right? https://github.com/llvm/llvm-project/pull/122982 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
@@ -89,6 +91,72 @@ class RootSignatureLexer { } }; +class RootSignatureParser { +public: + RootSignatureParser(SmallVector &Elements, + const SmallVector &Tokens); + + // Iterates over the provided tokens and constructs the in-memory + // representations of the RootElements. + // + // The return value denotes if there was a failure and the method will + // return on the first encountered failure, or, return false if it + // can sucessfully reach the end of the tokens. + bool Parse(); + +private: + bool ReportError(); // TODO: Implement this to report error through Diags llvm-beanz wrote: I don't think this should be separate. It's going to be really hard to ensure that any follow-up that adds error reporting properly covers all the cases. https://github.com/llvm/llvm-project/pull/122982 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)
https://github.com/llvm-beanz edited https://github.com/llvm/llvm-project/pull/122982 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
@@ -5907,6 +5910,82 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + EVT LdVT= LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector()); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + // Allow wider loads if they are sufficiently aligned to avoid memory faults + // and if the original load is simple. + unsigned LdAlign = + (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value(); + + // Find the vector type that can load from. + std::optional FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, LdAlign, + WidthDiff.getKnownMinValue()); + + if (!FirstVT) +return SDValue(); + + SmallVector MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + SDValue LdOp = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, *FirstVT, *FirstVT, Chain, + BasePtr, LD->getMemOperand()); + + // Load the element with one instruction. + SDValue Result; + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + if (!FirstVT->isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +Result = DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } + else if (FirstVT == WidenVT) +Result = LdOp; + else { +// TODO: We don't currently have any tests that exercise this code path. +assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0); +unsigned NumConcat = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +SmallVector ConcatOps(NumConcat); +SDValue UndefVal = DAG.getUNDEF(*FirstVT); +ConcatOps[0] = LdOp; +for (unsigned i = 1; i != NumConcat; ++i) + ConcatOps[i] = UndefVal; +Result = DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); + } + arsenm wrote: The type coercion code is the sharable part that could be extracted into a helper function https://github.com/llvm/llvm-project/pull/120598 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Split via Concat vector types for atomic load (PR #120640)
@@ -194,8 +194,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, return false; } -/// Parses tree in Ptr for base, index, offset addresses. -static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, +template +static BaseIndexOffset matchSDNode(const T *N, arsenm wrote: That's fine, the optimization can be another PR https://github.com/llvm/llvm-project/pull/120640 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Split via Concat vector types for atomic load (PR #120640)
@@ -1395,6 +1398,34 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + SDLoc dl(LD); + + EVT MemoryVT = LD->getMemoryVT(); + unsigned NumElts = MemoryVT.getVectorMinNumElements(); + + EVT IntMemoryVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); + EVT ElemVT = EVT::getVectorVT(*DAG.getContext(), +MemoryVT.getVectorElementType(), 1); + + // Create a single atomic to load all the elements at once. + SDValue Atomic = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, IntMemoryVT, IntMemoryVT, + LD->getChain(), LD->getBasePtr(), + LD->getMemOperand()); + + // Instead of splitting, put all the elements back into a vector. + SmallVector Ops; + for (unsigned i = 0; i < NumElts; ++i) { +SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Atomic, + DAG.getVectorIdxConstant(i, dl)); +Elt = DAG.getBitcast(ElemVT, Elt); +Ops.push_back(Elt); + } + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MemoryVT, Ops); arsenm wrote: I'd consider this a bug, I would expect this to assert in getNode https://github.com/llvm/llvm-project/pull/120640 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Clang][CWG2369] Implement GCC's heuristic for DR 2369 (PR #124231)
https://github.com/zyn0217 created https://github.com/llvm/llvm-project/pull/124231 None >From c36dd4fcac367b206072b36ccc9be4106a22ec3b Mon Sep 17 00:00:00 2001 From: Younan Zhang Date: Fri, 24 Jan 2025 13:52:37 +0800 Subject: [PATCH] Implement GCC's CWG 2369 heuristic --- clang/include/clang/Sema/Sema.h | 7 +- clang/lib/Sema/SemaOverload.cpp | 70 +++- clang/lib/Sema/SemaTemplateDeduction.cpp | 13 +- .../SemaTemplate/concepts-recursive-inst.cpp | 169 ++ 4 files changed, 246 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 87d9a335763e31..fd4d1f7e0d8f9c 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10236,7 +10236,8 @@ class Sema final : public SemaBase { FunctionTemplateDecl *FunctionTemplate, ArrayRef ParamTypes, ArrayRef Args, OverloadCandidateSet &CandidateSet, ConversionSequenceList &Conversions, bool SuppressUserConversions, - CXXRecordDecl *ActingContext = nullptr, QualType ObjectType = QualType(), + bool NonInstOnly, CXXRecordDecl *ActingContext = nullptr, + QualType ObjectType = QualType(), Expr::Classification ObjectClassification = {}, OverloadCandidateParamOrder PO = {}); @@ -12272,7 +12273,7 @@ class Sema final : public SemaBase { sema::TemplateDeductionInfo &Info, SmallVectorImpl const *OriginalCallArgs = nullptr, bool PartialOverloading = false, - llvm::function_ref CheckNonDependent = [] { return false; }); + llvm::function_ref CheckNonDependent = [](bool) { return false; }); /// Perform template argument deduction from a function call /// (C++ [temp.deduct.call]). @@ -12306,7 +12307,7 @@ class Sema final : public SemaBase { FunctionDecl *&Specialization, sema::TemplateDeductionInfo &Info, bool PartialOverloading, bool AggregateDeductionCandidate, QualType ObjectType, Expr::Classification ObjectClassification, - llvm::function_ref)> CheckNonDependent); + llvm::function_ref, bool)> CheckNonDependent); /// Deduce template arguments when taking the address of a function /// template (C++ [temp.deduct.funcaddr]) or matching a specialization to diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 3be9ade80f1d94..aded8abe5b4f7b 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -7733,10 +7733,10 @@ void Sema::AddMethodTemplateCandidate( MethodTmpl, ExplicitTemplateArgs, Args, Specialization, Info, PartialOverloading, /*AggregateDeductionCandidate=*/false, ObjectType, ObjectClassification, - [&](ArrayRef ParamTypes) { + [&](ArrayRef ParamTypes, bool NonInstOnly) { return CheckNonDependentConversions( MethodTmpl, ParamTypes, Args, CandidateSet, Conversions, -SuppressUserConversions, ActingContext, ObjectType, +SuppressUserConversions, NonInstOnly, ActingContext, ObjectType, ObjectClassification, PO); }); Result != TemplateDeductionResult::Success) { @@ -7818,10 +7818,11 @@ void Sema::AddTemplateOverloadCandidate( PartialOverloading, AggregateCandidateDeduction, /*ObjectType=*/QualType(), /*ObjectClassification=*/Expr::Classification(), - [&](ArrayRef ParamTypes) { + [&](ArrayRef ParamTypes, bool NonInstOnly) { return CheckNonDependentConversions( FunctionTemplate, ParamTypes, Args, CandidateSet, Conversions, -SuppressUserConversions, nullptr, QualType(), {}, PO); +SuppressUserConversions, NonInstOnly, nullptr, QualType(), {}, +PO); }); Result != TemplateDeductionResult::Success) { OverloadCandidate &Candidate = @@ -7863,7 +7864,7 @@ bool Sema::CheckNonDependentConversions( FunctionTemplateDecl *FunctionTemplate, ArrayRef ParamTypes, ArrayRef Args, OverloadCandidateSet &CandidateSet, ConversionSequenceList &Conversions, bool SuppressUserConversions, -CXXRecordDecl *ActingContext, QualType ObjectType, +bool NonInstOnly, CXXRecordDecl *ActingContext, QualType ObjectType, Expr::Classification ObjectClassification, OverloadCandidateParamOrder PO) { // FIXME: The cases in which we allow explicit conversions for constructor // arguments never consider calling a constructor template. It's not clear @@ -7900,6 +7901,63 @@ bool Sema::CheckNonDependentConversions( } } + // https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=2154bcd6d43cfd821ca70e1583880c4ed955355d + auto ConversionMightInduceInstantiation = [&](QualType ParmType, +QualType ArgType) { +ParmType = ParmType.getNonReferenceType(); +ArgType = ArgType.getNonReferenceType(); +bool Poin
[llvm-branch-commits] [clang] [Clang][CWG2369] Implement GCC's heuristic for DR 2369 (PR #124231)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 816bda32a6e9c52d93a06e6e19a5ac3fcc66 c36dd4fcac367b206072b36ccc9be4106a22ec3b --extensions cpp,h -- clang/include/clang/Sema/Sema.h clang/lib/Sema/SemaOverload.cpp clang/lib/Sema/SemaTemplateDeduction.cpp clang/test/SemaTemplate/concepts-recursive-inst.cpp `` View the diff from clang-format here. ``diff diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index fd4d1f7e0d..99ca651591 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12273,7 +12273,9 @@ public: sema::TemplateDeductionInfo &Info, SmallVectorImpl const *OriginalCallArgs = nullptr, bool PartialOverloading = false, - llvm::function_ref CheckNonDependent = [](bool) { return false; }); + llvm::function_ref CheckNonDependent = [](bool) { +return false; + }); /// Perform template argument deduction from a function call /// (C++ [temp.deduct.call]). diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index aded8abe5b..6f3400cf79 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -7978,7 +7978,8 @@ bool Sema::CheckNonDependentConversions( // For members, 'this' got ConvIdx = 0 previously. ConvIdx = ThisConversions + I; } - if (NonInstOnly && ConversionMightInduceInstantiation(ParamType, Args[I]->getType())) + if (NonInstOnly && + ConversionMightInduceInstantiation(ParamType, Args[I]->getType())) continue; Conversions[ConvIdx] = TryCopyInitialization(*this, Args[I], ParamType, diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index a44ad00d2c..dcaaed8613 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -4702,7 +4702,8 @@ TemplateDeductionResult Sema::DeduceTemplateArguments( runWithSufficientStackSpace(Info.getLocation(), [&] { Result = FinishTemplateArgumentDeduction( FunctionTemplate, Deduced, NumExplicitlySpecified, Specialization, Info, -&OriginalCallArgs, PartialOverloading, [&, CallingCtx](bool NonInstOnly) { +&OriginalCallArgs, PartialOverloading, +[&, CallingCtx](bool NonInstOnly) { ContextRAII SavedContext(*this, CallingCtx); return CheckNonDependent(ParamTypesForArgChecking, NonInstOnly); }); `` https://github.com/llvm/llvm-project/pull/124231 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits