[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)

2025-01-23 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/118462

>From 0eae465092e76474a7e87f5617748d091a5d7ca3 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 3 Dec 2024 10:12:36 +
Subject: [PATCH 1/6] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to
 NPM

---
 .../llvm}/CodeGen/RegAllocPriorityAdvisor.h   |  78 +++-
 llvm/include/llvm/InitializePasses.h  |   2 +-
 .../llvm/Passes/MachinePassRegistry.def   |   1 +
 llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp   |   6 +-
 .../lib/CodeGen/MLRegAllocPriorityAdvisor.cpp | 184 +++---
 llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp  |   2 +-
 llvm/lib/CodeGen/RegAllocGreedy.cpp   |   9 +-
 llvm/lib/CodeGen/RegAllocGreedy.h |   2 +-
 llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp  | 155 +++
 llvm/lib/Passes/PassBuilder.cpp   |   1 +
 10 files changed, 320 insertions(+), 120 deletions(-)
 rename llvm/{lib => include/llvm}/CodeGen/RegAllocPriorityAdvisor.h (57%)

diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h 
b/llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h
similarity index 57%
rename from llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
rename to llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h
index 0758743c2b1403..a53739fdc3fc40 100644
--- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
+++ b/llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h
@@ -9,8 +9,10 @@
 #ifndef LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H
 #define LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H
 
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/RegAllocEvictionAdvisor.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
 namespace llvm {
@@ -68,12 +70,72 @@ class DummyPriorityAdvisor : public RegAllocPriorityAdvisor 
{
   unsigned getPriority(const LiveInterval &LI) const override;
 };
 
-class RegAllocPriorityAdvisorAnalysis : public ImmutablePass {
+/// Common provider for getting the priority advisor and logging rewards.
+/// Legacy analysis forwards all calls to this provider.
+/// New analysis serves the provider as the analysis result.
+/// Expensive setup is done in the constructor, so that the advisor can be
+/// created quickly for every machine function.
+/// TODO: Remove once legacy PM support is dropped.
+class RegAllocPriorityAdvisorProvider {
 public:
   enum class AdvisorMode : int { Default, Release, Development, Dummy };
 
-  RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode)
-  : ImmutablePass(ID), Mode(Mode){};
+  RegAllocPriorityAdvisorProvider(AdvisorMode Mode) : Mode(Mode) {}
+
+  virtual ~RegAllocPriorityAdvisorProvider() = default;
+
+  virtual void logRewardIfNeeded(const MachineFunction &MF,
+ llvm::function_ref GetReward) {};
+
+  virtual std::unique_ptr
+  getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0;
+
+  void setAnalyses(SlotIndexes *SI) { this->SI = SI; }
+
+  AdvisorMode getAdvisorMode() const { return Mode; }
+
+protected:
+  SlotIndexes *SI;
+
+private:
+  const AdvisorMode Mode;
+};
+
+RegAllocPriorityAdvisorProvider *createReleaseModePriorityAdvisorProvider();
+
+RegAllocPriorityAdvisorProvider *
+createDevelopmentModePriorityAdvisorProvider(LLVMContext &Ctx);
+
+class RegAllocPriorityAdvisorAnalysis
+: public AnalysisInfoMixin {
+  static AnalysisKey Key;
+  friend AnalysisInfoMixin;
+
+public:
+  struct Result {
+// Owned by this analysis.
+RegAllocPriorityAdvisorProvider *Provider;
+
+bool invalidate(MachineFunction &MF, const PreservedAnalyses &PA,
+MachineFunctionAnalysisManager::Invalidator &Inv) {
+  auto PAC = PA.getChecker();
+  return !PAC.preservedWhenStateless() ||
+ Inv.invalidate(MF, PA);
+}
+  };
+
+  Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM);
+
+private:
+  void initializeProvider(LLVMContext &Ctx);
+  std::unique_ptr Provider;
+};
+
+class RegAllocPriorityAdvisorAnalysisLegacy : public ImmutablePass {
+public:
+  using AdvisorMode = RegAllocPriorityAdvisorProvider::AdvisorMode;
+  RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode Mode)
+  : ImmutablePass(ID), Mode(Mode) {};
   static char ID;
 
   /// Get an advisor for the given context (i.e. machine function, etc)
@@ -81,7 +143,7 @@ class RegAllocPriorityAdvisorAnalysis : public ImmutablePass 
{
   getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0;
   AdvisorMode getAdvisorMode() const { return Mode; }
   virtual void logRewardIfNeeded(const MachineFunction &MF,
- llvm::function_ref GetReward){};
+ llvm::function_ref GetReward) {};
 
 protected:
   // This analysis preserves everything, and subclasses may have additional
@@ -97,11 +159,13 @@ class RegAllocPriorityAdvisorAnalysis : public 
ImmutablePass {
 
 /// Specialization for the API used by the analysis infrastructure to create
 /// an instan

[llvm-branch-commits] [lldb] 21f62ea - Revert "[lldb][DWARFASTParserClang] Make C++ method parsing aware of explicit…"

2025-01-23 Thread via llvm-branch-commits

Author: Michael Buch
Date: 2025-01-23T11:19:37Z
New Revision: 21f62eaa7e28867b02d356a97a4fe134eb5d1f59

URL: 
https://github.com/llvm/llvm-project/commit/21f62eaa7e28867b02d356a97a4fe134eb5d1f59
DIFF: 
https://github.com/llvm/llvm-project/commit/21f62eaa7e28867b02d356a97a4fe134eb5d1f59.diff

LOG: Revert "[lldb][DWARFASTParserClang] Make C++ method parsing aware of 
explicit…"

This reverts commit ad6d808906075c3386bbeada3c37d8d3e6afe248.

Added: 


Modified: 
lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp

Removed: 




diff  --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp 
b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 682ee6d287bf5c..f54b7fc9cdad24 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -173,9 +173,7 @@ GetCXXObjectParameter(const DWARFDIE &subprogram,
   if (!DeclKindIsCXXClass(containing_decl_ctx.getDeclKind()))
 return {};
 
-  if (DWARFDIE object_parameter =
-  subprogram.GetAttributeValueAsReferenceDIE(DW_AT_object_pointer))
-return object_parameter;
+  // FIXME: if subprogram has a explicit DW_AT_object_pointer, use it.
 
   // If no DW_AT_object_pointer was specified, assume the implicit object
   // parameter is the first parameter to the function, is called "this" and is
@@ -217,6 +215,11 @@ static unsigned GetCXXMethodCVQuals(const DWARFDIE 
&subprogram,
 return 0;
 
   uint32_t encoding_mask = this_type->GetEncodingMask();
+
+  // FIXME: explicit object parameters need not to be pointers
+  if (!(encoding_mask & (1u << Type::eEncodingIsPointerUID)))
+return 0;
+
   unsigned cv_quals = 0;
   if (encoding_mask & (1u << Type::eEncodingIsConstUID))
 cv_quals |= clang::Qualifiers::Const;

diff  --git a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp 
b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp
index 9c0300be08a78a..b31f56aa372d58 100644
--- a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp
+++ b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp
@@ -902,181 +902,3 @@ TEST_F(DWARFASTParserClangTests, 
TestParseDWARFAttributes_ObjectPointer) {
   EXPECT_TRUE(attrs.object_pointer.IsValid());
   EXPECT_EQ(attrs.object_pointer, param_die);
 }
-
-TEST_F(DWARFASTParserClangTests, TestParseSubroutine_ExplicitObjectParameter) {
-  // Tests parsing of a C++ non-static member function with an explicit object
-  // parameter that isn't called "this" and is not a pointer (but a 
CV-qualified
-  // rvalue reference instead).
-
-  const char *yamldata = R"(
 !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:ELFDATA2LSB
-  Type:ET_EXEC
-  Machine: EM_AARCH64
-DWARF:
-  debug_str:
-- Context
-- func
-- mySelf
-  debug_abbrev:
-- ID:  0
-  Table:
-- Code:0x1
-  Tag: DW_TAG_compile_unit
-  Children:DW_CHILDREN_yes
-  Attributes:
-- Attribute:   DW_AT_language
-  Form:DW_FORM_data2
-- Code:0x2
-  Tag: DW_TAG_structure_type
-  Children:DW_CHILDREN_yes
-  Attributes:
-- Attribute:   DW_AT_name
-  Form:DW_FORM_strp
-- Code:0x3
-  Tag: DW_TAG_subprogram
-  Children:DW_CHILDREN_yes
-  Attributes:
-- Attribute:   DW_AT_name
-  Form:DW_FORM_strp
-- Attribute:   DW_AT_declaration
-  Form:DW_FORM_flag_present
-- Attribute:   DW_AT_object_pointer
-  Form:DW_FORM_ref4
-- Attribute:   DW_AT_external
-  Form:DW_FORM_flag_present
-- Code:0x4
-  Tag: DW_TAG_formal_parameter
-  Children:DW_CHILDREN_no
-  Attributes:
-- Attribute:   DW_AT_name
-  Form:DW_FORM_strp
-- Attribute:   DW_AT_type
-  Form:DW_FORM_ref4
-- Code:0x5
-  Tag: DW_TAG_rvalue_reference_type
-  Children:DW_CHILDREN_no
-  Attributes:
-- Attribute:   DW_AT_type
-  Form:DW_FORM_ref4
-- Code:0x6
-  Tag: DW_TAG_const_type
-  Children:DW_CHILDREN_no
-  Attributes:
-- Attribute:   DW_AT_type
-  Form:DW_FORM_ref4
-- Code:0x7
-  Tag: DW_TAG_volatile_type
-  Children:DW_CHILDREN_no
-  Attributes:
-  

[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Tom Eccles via llvm-branch-commits

tblah wrote:

I'm not sure why the bot didn't run on this.

@llvm/pr-subscribers-flang-openmp

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah edited https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lldb] bb21661 - Revert "[lldb][test] Remove compiler version check and use regex (#123393)"

2025-01-23 Thread via llvm-branch-commits

Author: Michael Buch
Date: 2025-01-23T11:27:19Z
New Revision: bb21661782242f931f3d04eb8fed9be792bd4ef8

URL: 
https://github.com/llvm/llvm-project/commit/bb21661782242f931f3d04eb8fed9be792bd4ef8
DIFF: 
https://github.com/llvm/llvm-project/commit/bb21661782242f931f3d04eb8fed9be792bd4ef8.diff

LOG: Revert "[lldb][test] Remove compiler version check and use regex (#123393)"

This reverts commit b62e55803c52ca04093a0eea361407e849dc23e1.

Added: 


Modified: 

lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py

lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py

Removed: 




diff  --git 
a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py
 
b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py
index 759077302bfca4..1c3e64f14c 100644
--- 
a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py
+++ 
b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py
@@ -23,6 +23,13 @@ def test(self):
 
 self.runCmd("settings set target.import-std-module true")
 
+if self.expectedCompiler(["clang"]) and self.expectedCompilerVersion(
+[">", "16.0"]
+):
+vector_type = "std::vector"
+else:
+vector_type = "std::vector >"
+
 size_type = "size_type"
 value_type = "value_type"
 iterator = "iterator"
@@ -34,14 +41,13 @@ def test(self):
 ValueCheck(name="current"),
 ]
 
-self.expect(
-"expr a",
-patterns=[
-"""\(std::vector )*>\) \$0 = size=3 
\{
-  \[0\] = \(a = 3\)
-  \[1\] = \(a = 1\)
-  \[2\] = \(a = 2\)
-\}"""
+self.expect_expr(
+"a",
+result_type=vector_type,
+result_children=[
+ValueCheck(children=[ValueCheck(value="3")]),
+ValueCheck(children=[ValueCheck(value="1")]),
+ValueCheck(children=[ValueCheck(value="2")]),
 ],
 )
 

diff  --git 
a/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py
 
b/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py
index e18785ec1359cc..a1f33271f39d2f 100644
--- 
a/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py
+++ 
b/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py
@@ -17,26 +17,42 @@ def test(self):
 self, "// Set break point at this line.", 
lldb.SBFileSpec("main.cpp")
 )
 
+if self.expectedCompiler(["clang"]) and self.expectedCompilerVersion(
+[">", "16.0"]
+):
+vector_type = "std::vector"
+vector_of_vector_type = "std::vector >"
+else:
+vector_type = "std::vector"
+vector_of_vector_type = (
+"std::vector, std::allocator 
> >"
+)
+
 size_type = "size_type"
 value_type = "value_type"
 
 self.runCmd("settings set target.import-std-module true")
 
-self.expect(
-"expr a",
-patterns=[
-"""\(std::vector(, 
std::allocator )* >\) \$0 = size=2 \{
-  \[0\] = size=3 \{
-\[0\] = 1
-\[1\] = 2
-\[2\] = 3
-  \}
-  \[1\] = size=3 \{
-\[0\] = 3
-\[1\] = 2
-\[2\] = 1
-  \}
-\}"""
+self.expect_expr(
+"a",
+result_type=vector_of_vector_type,
+result_children=[
+ValueCheck(
+type=vector_type,
+children=[
+ValueCheck(value="1"),
+ValueCheck(value="2"),
+ValueCheck(value="3"),
+],
+),
+ValueCheck(
+type=vector_type,
+children=[
+ValueCheck(value="3"),
+ValueCheck(value="2"),
+ValueCheck(value="1"),
+],
+),
 ],
 )
 self.expect_expr("a.size()", result_type=size_type, result_value="2")



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah edited https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits


@@ -488,44 +559,34 @@ void DataSharingProcessor::doPrivatize(const 
semantics::Symbol *sym,
 mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
 firOpBuilder.setInsertionPointToStart(moduleOp.getBody());
 auto result = firOpBuilder.create(
-symLoc, uniquePrivatizerName, symType,
+symLoc, uniquePrivatizerName, allocType,
 isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
: mlir::omp::DataSharingClauseType::Private);
 fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
 lower::SymMapScope outerScope(symTable);
 
-// Populate the `alloc` region.
-{
-  mlir::Region &allocRegion = result.getAllocRegion();
-  mlir::Block *allocEntryBlock = firOpBuilder.createBlock(
-  &allocRegion, /*insertPt=*/{}, symType, symLoc);
-
-  firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
-
-  fir::ExtendedValue localExV =
-  hlfir::translateToExtendedValue(
-  symLoc, firOpBuilder, hlfir::Entity{allocRegion.getArgument(0)},
-  /*contiguousHint=*/
-  evaluate::IsSimplyContiguous(*sym, 
converter.getFoldingContext()))
-  .first;
-
-  symTable.addSymbol(*sym, localExV);
-  lower::SymMapScope innerScope(symTable);
-  cloneSymbol(sym);
-  mlir::Value cloneAddr = symTable.shallowLookupSymbol(*sym).getAddr();
-  mlir::Type cloneType = cloneAddr.getType();
-
-  // A `convert` op is required for variables that are storage associated
-  // via `equivalence`. The problem is that these variables are declared as
-  // `fir.ptr`s while their privatized storage is declared as `fir.ref`,
-  // therefore we convert to proper symbol type.
-  mlir::Value yieldedValue =
-  (symType == cloneType) ? cloneAddr
- : firOpBuilder.createConvert(
-   cloneAddr.getLoc(), symType, cloneAddr);
-
-  firOpBuilder.create(hsb.getAddr().getLoc(),
-  yieldedValue);
+// Populate the `init` region.
+const bool needsInitialization =

ergawy wrote:

Can you comment this variable providing examples for each case where it is set 
to true? The condition is a bit complex specially the firts part.

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy edited 
https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits


@@ -20,25 +20,42 @@ namespace mlir {
 class Region;
 } // namespace mlir
 
+namespace Fortran {
+namespace semantics {
+class Symbol;
+} // namespace semantics
+} // namespace Fortran
+
 namespace fir {
 class FirOpBuilder;
 class ShapeShiftOp;
 } // namespace fir
 
 namespace Fortran {
 namespace lower {
+class AbstractConverter;
+
 namespace omp {
 
+enum class DeclOperationKind { Private, FirstPrivate, Reduction };
+inline bool isPrivatization(DeclOperationKind kind) {
+  return (kind == DeclOperationKind::FirstPrivate) ||
+ (kind == DeclOperationKind::Private);
+}
+inline bool isReduction(DeclOperationKind kind) {
+  return kind == DeclOperationKind::Reduction;
+}
+
 /// Generate init and cleanup regions suitable for reduction or privatizer
 /// declarations. `scalarInitValue` may be nullptr if there is no default
-/// initialization (for privatization).
-void populateByRefInitAndCleanupRegions(fir::FirOpBuilder &builder,
-mlir::Location loc, mlir::Type argType,
-mlir::Value scalarInitValue,
-mlir::Block *initBlock,
-mlir::Value allocatedPrivVarArg,
-mlir::Value moldArg,
-mlir::Region &cleanupRegion);
+/// initialization (for privatization). If this is for a privatizer, set
+/// `isPrivate` to `true`.

ergawy wrote:

I think this needs to be updated to refer to `kind`?

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits


@@ -132,7 +168,7 @@ void DataSharingProcessor::cloneSymbol(const 
semantics::Symbol *sym) {
 
   if (needInitClone()) {
 Fortran::lower::initializeCloneAtRuntime(converter, *sym, symTable);
-callsInitClone = true;
+mightHaveReadMoldArg = true;

ergawy wrote:

The name of this field is a bit confusing because:
1. it does not specify where the mold arg comes from (in this case it is the 
`init` region's mold arg),
2. its use here, somewhat implies that `DataSharingProcessor::cloneSymbol` is 
exclusively called for the delayed privatization case, which seems to be the 
opposite looking the changes below.

Maybe it can be named: `initializedCloneFromHostSym`, or something similar?

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits


@@ -96,17 +149,118 @@ fir::ShapeShiftOp 
Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder,
   return shapeShift;
 }
 
+// Initialize box newBox using moldBox. These should both have the same type 
and
+// be boxes containing derived types e.g.
+// fir.box>
+// fir.box>
+// fir.box>>
+// fir.class<...>>
+// If the type doesn't match , this does nothing
+static void initializeIfDerivedTypeBox(fir::FirOpBuilder &builder,
+   mlir::Location loc, mlir::Value newBox,
+   mlir::Value moldBox, bool 
hasInitializer,
+   bool isFirstPrivate) {
+  fir::BoxType boxTy = mlir::dyn_cast(newBox.getType());
+  fir::ClassType classTy = mlir::dyn_cast(newBox.getType());
+  if (!boxTy && !classTy)
+return;
+
+  // remove pointer and array types in the middle
+  mlir::Type eleTy;
+  if (boxTy)
+eleTy = boxTy.getElementType();
+  if (classTy)
+eleTy = classTy.getEleTy();
+  mlir::Type derivedTy = fir::unwrapRefType(eleTy);
+  if (auto array = mlir::dyn_cast(derivedTy))
+derivedTy = array.getElementType();
+
+  if (!fir::isa_derived(derivedTy))
+return;
+  assert(moldBox.getType() == newBox.getType());
+
+  if (hasInitializer)
+fir::runtime::genDerivedTypeInitialize(builder, loc, newBox);
+
+  if (hlfir::mayHaveAllocatableComponent(derivedTy) && !isFirstPrivate)
+fir::runtime::genDerivedTypeInitializeClone(builder, loc, newBox, moldBox);
+}
+
+static void getLengthParameters(fir::FirOpBuilder &builder, mlir::Location loc,
+mlir::Value moldArg,
+llvm::SmallVectorImpl &lenParams) 
{
+  // We pass derived types unboxed and so are not self-contained entities.
+  // Assume that unboxed derived types won't need length paramters.
+  if (!hlfir::isFortranEntity(moldArg))
+return;
+
+  hlfir::genLengthParameters(loc, builder, hlfir::Entity{moldArg}, lenParams);
+  if (lenParams.empty())
+return;
+
+  // The verifier for EmboxOp doesn't allow length parameters when the the
+  // character already has static LEN. genLengthParameters may still return 
them
+  // in this case.
+  mlir::Type unwrappedType =
+  fir::unwrapRefType(fir::unwrapSeqOrBoxedSeqType(moldArg.getType()));
+  if (auto strTy = mlir::dyn_cast(unwrappedType)) {
+if (strTy.hasConstantLen())
+  lenParams.resize(0);
+  }
+}
+
+static bool
+isDerivedTypeNeedingInitialization(const Fortran::semantics::Symbol &sym) {
+  // Fortran::lower::hasDefaultInitialization returns false for ALLOCATABLE, so
+  // re-implement here.
+  // ignorePointer=true because either the pointer points to the same target as
+  // the original variable, or it is uninitialized.
+  if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType())
+if (const Fortran::semantics::DerivedTypeSpec *derivedTypeSpec =
+declTypeSpec->AsDerived())
+  if (derivedTypeSpec->HasDefaultInitialization(
+  /*ignoreAllocatable=*/false, /*ignorePointer=*/true))
+return true;

ergawy wrote:

nit
```suggestion
  return derivedTypeSpec->HasDefaultInitialization(
  /*ignoreAllocatable=*/false, /*ignorePointer=*/true);
```

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits


@@ -55,15 +55,19 @@ class MapsForPrivatizedSymbolsPass
 std::underlying_type_t>(
 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO);
 Operation *definingOp = var.getDefiningOp();
-auto declOp = llvm::dyn_cast_or_null(definingOp);
-assert(declOp &&
-   "Expected defining Op of privatized var to be hlfir.declare");
+assert(definingOp &&
+   "Privatizing a block argument without any hlfir.declare");
 
+Value varPtr = var;
 // We want the first result of the hlfir.declare op because our goal
 // is to map the descriptor (fir.box or fir.boxchar) and the first
 // result for hlfir.declare is the descriptor if a the symbol being
 // decalred needs a descriptor.
-Value varPtr = declOp.getBase();
+// Some types are boxed immediately before privatization. These have other
+// operations in between the privatization and the declaration. It is safe
+// to use var directly here because they will be boxed anyay.

ergawy wrote:

```suggestion
// to use var directly here because they will be boxed anyway.
```

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits


@@ -468,15 +505,49 @@ void DataSharingProcessor::doPrivatize(const 
semantics::Symbol *sym,
   lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
   assert(hsb && "Host symbol box not found");
 
-  mlir::Type symType = hsb.getAddr().getType();
+  mlir::Value privVal = hsb.getAddr();

ergawy wrote:

Isn't this rather `hostVal`?

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits


@@ -468,15 +505,49 @@ void DataSharingProcessor::doPrivatize(const 
semantics::Symbol *sym,
   lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
   assert(hsb && "Host symbol box not found");
 
-  mlir::Type symType = hsb.getAddr().getType();
+  mlir::Value privVal = hsb.getAddr();
+  mlir::Type allocType;
+  if (mlir::isa(privVal.getType()))
+allocType = privVal.getType();
+  else
+allocType = fir::unwrapRefType(privVal.getType());
+
   mlir::Location symLoc = hsb.getAddr().getLoc();
   std::string privatizerName = sym->name().ToString() + ".privatizer";
   bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate);
 
+  if (auto poly = mlir::dyn_cast(allocType)) {
+if (!mlir::isa(poly.getEleTy()) && isFirstPrivate)
+  TODO(symLoc, "create polymorphic host associated copy");
+  }

ergawy wrote:

nit: move above closer to where we initialize `allocType`. Same for the next 2 
if conditions as well as the declaration of `argType`. Just keeps all type 
handling in one visual block instead of being interrupted by privatizer stuff.

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits


@@ -34,52 +34,48 @@ def PrivateClauseOp : OpenMP_Op<"private", 
[IsolatedFromAbove, RecipeInterface]>
   let description = [{
 This operation provides a declaration of how to implement the
 [first]privatization of a variable. The dialect users should provide
-information about how to create an instance of the type in the alloc 
region,
-how to initialize the copy from the original item in the copy region, and 
if
-needed, how to deallocate allocated memory in the dealloc region.
+which type should be allocated for this variable. The allocated (usually by
+alloca) variable is passed to the initialization region which does 
everything
+else (e.g. initialization of Fortran runtime descriptors). Information 
about
+how to initialize the copy from the original item should be given in the
+copy region, and if needed, how to deallocate memory (allocated by the
+initialization region) in the dealloc region.

ergawy wrote:

```suggestion
copy region, and if needed, how to deallocate memory (allocated implicitely 
by the
operation) in the dealloc region.
```

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits


@@ -468,15 +505,49 @@ void DataSharingProcessor::doPrivatize(const 
semantics::Symbol *sym,
   lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
   assert(hsb && "Host symbol box not found");
 
-  mlir::Type symType = hsb.getAddr().getType();
+  mlir::Value privVal = hsb.getAddr();
+  mlir::Type allocType;
+  if (mlir::isa(privVal.getType()))
+allocType = privVal.getType();
+  else
+allocType = fir::unwrapRefType(privVal.getType());

ergawy wrote:

Just a bit easier to read.
```suggestion
  mlir::Type allocType = privVal.getType();
  if (!mlir::isa(allocType))
allocType = fir::unwrapRefType(allocType);
```

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits


@@ -96,17 +149,118 @@ fir::ShapeShiftOp 
Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder,
   return shapeShift;
 }
 
+// Initialize box newBox using moldBox. These should both have the same type 
and
+// be boxes containing derived types e.g.
+// fir.box>
+// fir.box>
+// fir.box>>
+// fir.class<...>>
+// If the type doesn't match , this does nothing
+static void initializeIfDerivedTypeBox(fir::FirOpBuilder &builder,
+   mlir::Location loc, mlir::Value newBox,
+   mlir::Value moldBox, bool 
hasInitializer,
+   bool isFirstPrivate) {
+  fir::BoxType boxTy = mlir::dyn_cast(newBox.getType());
+  fir::ClassType classTy = mlir::dyn_cast(newBox.getType());
+  if (!boxTy && !classTy)
+return;
+
+  // remove pointer and array types in the middle
+  mlir::Type eleTy;
+  if (boxTy)
+eleTy = boxTy.getElementType();
+  if (classTy)
+eleTy = classTy.getEleTy();
+  mlir::Type derivedTy = fir::unwrapRefType(eleTy);
+  if (auto array = mlir::dyn_cast(derivedTy))
+derivedTy = array.getElementType();
+
+  if (!fir::isa_derived(derivedTy))
+return;
+  assert(moldBox.getType() == newBox.getType());
+
+  if (hasInitializer)
+fir::runtime::genDerivedTypeInitialize(builder, loc, newBox);
+
+  if (hlfir::mayHaveAllocatableComponent(derivedTy) && !isFirstPrivate)
+fir::runtime::genDerivedTypeInitializeClone(builder, loc, newBox, moldBox);
+}
+
+static void getLengthParameters(fir::FirOpBuilder &builder, mlir::Location loc,
+mlir::Value moldArg,
+llvm::SmallVectorImpl &lenParams) 
{
+  // We pass derived types unboxed and so are not self-contained entities.
+  // Assume that unboxed derived types won't need length paramters.
+  if (!hlfir::isFortranEntity(moldArg))
+return;
+
+  hlfir::genLengthParameters(loc, builder, hlfir::Entity{moldArg}, lenParams);
+  if (lenParams.empty())
+return;
+
+  // The verifier for EmboxOp doesn't allow length parameters when the the
+  // character already has static LEN. genLengthParameters may still return 
them
+  // in this case.
+  mlir::Type unwrappedType =
+  fir::unwrapRefType(fir::unwrapSeqOrBoxedSeqType(moldArg.getType()));
+  if (auto strTy = mlir::dyn_cast(unwrappedType)) {
+if (strTy.hasConstantLen())
+  lenParams.resize(0);
+  }
+}
+
+static bool
+isDerivedTypeNeedingInitialization(const Fortran::semantics::Symbol &sym) {
+  // Fortran::lower::hasDefaultInitialization returns false for ALLOCATABLE, so
+  // re-implement here.
+  // ignorePointer=true because either the pointer points to the same target as
+  // the original variable, or it is uninitialized.
+  if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType())
+if (const Fortran::semantics::DerivedTypeSpec *derivedTypeSpec =
+declTypeSpec->AsDerived())
+  if (derivedTypeSpec->HasDefaultInitialization(
+  /*ignoreAllocatable=*/false, /*ignorePointer=*/true))
+return true;
+  return false;
+}
+
+static mlir::Value generateZeroShapeForRank(fir::FirOpBuilder &builder,
+mlir::Location loc,
+mlir::Value moldArg) {
+  mlir::Type moldVal = fir::unwrapRefType(moldArg.getType());

ergawy wrote:

```suggestion
  mlir::Type moldType = fir::unwrapRefType(moldArg.getType());
```

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits


@@ -96,17 +149,118 @@ fir::ShapeShiftOp 
Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder,
   return shapeShift;
 }
 
+// Initialize box newBox using moldBox. These should both have the same type 
and
+// be boxes containing derived types e.g.
+// fir.box>
+// fir.box>
+// fir.box>>
+// fir.class<...>>
+// If the type doesn't match , this does nothing
+static void initializeIfDerivedTypeBox(fir::FirOpBuilder &builder,
+   mlir::Location loc, mlir::Value newBox,
+   mlir::Value moldBox, bool 
hasInitializer,
+   bool isFirstPrivate) {
+  fir::BoxType boxTy = mlir::dyn_cast(newBox.getType());
+  fir::ClassType classTy = mlir::dyn_cast(newBox.getType());
+  if (!boxTy && !classTy)
+return;
+
+  // remove pointer and array types in the middle
+  mlir::Type eleTy;
+  if (boxTy)
+eleTy = boxTy.getElementType();
+  if (classTy)
+eleTy = classTy.getEleTy();
+  mlir::Type derivedTy = fir::unwrapRefType(eleTy);
+  if (auto array = mlir::dyn_cast(derivedTy))
+derivedTy = array.getElementType();
+
+  if (!fir::isa_derived(derivedTy))
+return;
+  assert(moldBox.getType() == newBox.getType());

ergawy wrote:

nit: move to the function start to document the pre-conditions expected by it.

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits


@@ -148,41 +309,113 @@ void 
Fortran::lower::omp::populateByRefInitAndCleanupRegions(
 
 builder.setInsertionPointToEnd(initBlock);
 mlir::Value boxAlloca = allocatedPrivVarArg;
+
+moldArg = builder.loadIfRef(loc, moldArg);
+getLengthParameters(builder, loc, moldArg, lenParams);
+
+// The initial state of a private pointer is undefined so we don't need to
+// match the mold argument (OpenMP 5.2 end of page 106).
+if (isPrivatization(kind) &&
+mlir::isa(boxTy.getEleTy())) {
+  // we need a shape with the right rank so that the embox op is lowered
+  // to an llvm struct of the right type. This returns nullptr if the types
+  // aren't right.
+  mlir::Value shape = generateZeroShapeForRank(builder, loc, moldArg);
+  // Just incase, do initialize the box with a null value
+  mlir::Value null = builder.createNullConstant(loc, boxTy.getEleTy());
+  mlir::Value nullBox;
+  if (shape)
+nullBox = builder.create(
+loc, boxTy, null, shape, /*slice=*/mlir::Value{}, lenParams);

ergawy wrote:

I think we can get rid of the `else`, right? If `shape` is an empty value, it 
is the same as passing `Value{}` I think.

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: AMDGPURegBankLegalize (PR #112864)

2025-01-23 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle approved this pull request.


https://github.com/llvm/llvm-project/pull/112864
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi (PR #112866)

2025-01-23 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle approved this pull request.


https://github.com/llvm/llvm-project/pull/112866
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] X86: Fix convertToThreeAddress losing subregister indexes (PR #124098)

2025-01-23 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: Matt Arsenault (arsenm)


Changes

This avoids dozens of regressions in a future patch. These
primarily manifested as assertions where we had copies of 64-bit
registers to 32-bit registers.

This is testable in principle with hand written MIR, but that's
a bit too much x86 for me.

---
Full diff: https://github.com/llvm/llvm-project/pull/124098.diff


2 Files Affected:

- (modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+44-33) 
- (modified) llvm/lib/Target/X86/X86InstrInfo.h (+3-2) 


``diff
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp 
b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 1baac05827c47c..ec9e8ca4ee1447 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1158,8 +1158,9 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
 
 bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
   unsigned Opc, bool AllowSP, Register &NewSrc,
-  bool &isKill, MachineOperand &ImplicitOp,
-  LiveVariables *LV, LiveIntervals *LIS) const 
{
+  unsigned &NewSrcSubReg, bool &isKill,
+  MachineOperand &ImplicitOp, LiveVariables 
*LV,
+  LiveIntervals *LIS) const {
   MachineFunction &MF = *MI.getParent()->getParent();
   const TargetRegisterClass *RC;
   if (AllowSP) {
@@ -1168,12 +1169,14 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, 
const MachineOperand &Src,
 RC = Opc != X86::LEA32r ? &X86::GR64_NOSPRegClass : 
&X86::GR32_NOSPRegClass;
   }
   Register SrcReg = Src.getReg();
+  unsigned SubReg = Src.getSubReg();
   isKill = MI.killsRegister(SrcReg, /*TRI=*/nullptr);
 
   // For both LEA64 and LEA32 the register already has essentially the right
   // type (32-bit or 64-bit) we may just need to forbid SP.
   if (Opc != X86::LEA64_32r) {
 NewSrc = SrcReg;
+NewSrcSubReg = SubReg;
 assert(!Src.isUndef() && "Undef op doesn't need optimization");
 
 if (NewSrc.isVirtual() && !MF.getRegInfo().constrainRegClass(NewSrc, RC))
@@ -1189,6 +1192,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const 
MachineOperand &Src,
 ImplicitOp.setImplicit();
 
 NewSrc = getX86SubSuperRegister(SrcReg, 64);
+assert(!SubReg);
 assert(NewSrc.isValid() && "Invalid Operand");
 assert(!Src.isUndef() && "Undef op doesn't need optimization");
   } else {
@@ -1198,7 +1202,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const 
MachineOperand &Src,
 MachineInstr *Copy =
 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
 .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
-.addReg(SrcReg, getKillRegState(isKill));
+.addReg(SrcReg, getKillRegState(isKill), SubReg);
 
 // Which is obviously going to be dead after we're done with it.
 isKill = true;
@@ -1258,7 +1262,9 @@ MachineInstr 
*X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
   MachineBasicBlock::iterator MBBI = MI.getIterator();
   Register Dest = MI.getOperand(0).getReg();
   Register Src = MI.getOperand(1).getReg();
+  unsigned SrcSubReg = MI.getOperand(1).getSubReg();
   Register Src2;
+  unsigned Src2SubReg;
   bool IsDead = MI.getOperand(0).isDead();
   bool IsKill = MI.getOperand(1).isKill();
   unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
@@ -1268,7 +1274,7 @@ MachineInstr 
*X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
   MachineInstr *InsMI =
   BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
   .addReg(InRegLEA, RegState::Define, SubReg)
-  .addReg(Src, getKillRegState(IsKill));
+  .addReg(Src, getKillRegState(IsKill), SrcSubReg);
   MachineInstr *ImpDef2 = nullptr;
   MachineInstr *InsMI2 = nullptr;
 
@@ -1306,6 +1312,7 @@ MachineInstr 
*X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
   case X86::ADD16rr:
   case X86::ADD16rr_DB: {
 Src2 = MI.getOperand(2).getReg();
+Src2SubReg = MI.getOperand(2).getSubReg();
 bool IsKill2 = MI.getOperand(2).isKill();
 assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need 
optimization");
 if (Src == Src2) {
@@ -1323,7 +1330,7 @@ MachineInstr 
*X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
 InRegLEA2);
   InsMI2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(InRegLEA2, RegState::Define, SubReg)
-   .addReg(Src2, getKillRegState(IsKill2));
+   .addReg(Src2, getKillRegState(IsKill2), Src2SubReg);
   addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
 }
 if (LV && IsKill2 && InsMI2)
@@ -1428,6 +1435,7 @@ MachineInstr 
*X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
 
   MachineInstr *NewMI = nullptr;
   Regi

[llvm-branch-commits] [llvm] X86: Fix convertToThreeAddress losing subregister indexes (PR #124098)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/124098

This avoids dozens of regressions in a future patch. These
primarily manifested as assertions where we had copies of 64-bit
registers to 32-bit registers.

This is testable in principle with hand written MIR, but that's
a bit too much x86 for me.

>From 84222b6a4872e768cf951df27d867c59de4cc49a Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 23 Jan 2025 16:34:03 +0700
Subject: [PATCH] X86: Fix convertToThreeAddress losing subregister indexes

This avoids dozens of regressions in a future patch. These
primarily manifested as assertions where we had copies of 64-bit
registers to 32-bit registers.

This is testable in principle with hand written MIR, but that's
a bit too much x86 for me.
---
 llvm/lib/Target/X86/X86InstrInfo.cpp | 77 
 llvm/lib/Target/X86/X86InstrInfo.h   |  5 +-
 2 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp 
b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 1baac05827c47c..ec9e8ca4ee1447 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1158,8 +1158,9 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
 
 bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
   unsigned Opc, bool AllowSP, Register &NewSrc,
-  bool &isKill, MachineOperand &ImplicitOp,
-  LiveVariables *LV, LiveIntervals *LIS) const 
{
+  unsigned &NewSrcSubReg, bool &isKill,
+  MachineOperand &ImplicitOp, LiveVariables 
*LV,
+  LiveIntervals *LIS) const {
   MachineFunction &MF = *MI.getParent()->getParent();
   const TargetRegisterClass *RC;
   if (AllowSP) {
@@ -1168,12 +1169,14 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, 
const MachineOperand &Src,
 RC = Opc != X86::LEA32r ? &X86::GR64_NOSPRegClass : 
&X86::GR32_NOSPRegClass;
   }
   Register SrcReg = Src.getReg();
+  unsigned SubReg = Src.getSubReg();
   isKill = MI.killsRegister(SrcReg, /*TRI=*/nullptr);
 
   // For both LEA64 and LEA32 the register already has essentially the right
   // type (32-bit or 64-bit) we may just need to forbid SP.
   if (Opc != X86::LEA64_32r) {
 NewSrc = SrcReg;
+NewSrcSubReg = SubReg;
 assert(!Src.isUndef() && "Undef op doesn't need optimization");
 
 if (NewSrc.isVirtual() && !MF.getRegInfo().constrainRegClass(NewSrc, RC))
@@ -1189,6 +1192,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const 
MachineOperand &Src,
 ImplicitOp.setImplicit();
 
 NewSrc = getX86SubSuperRegister(SrcReg, 64);
+assert(!SubReg);
 assert(NewSrc.isValid() && "Invalid Operand");
 assert(!Src.isUndef() && "Undef op doesn't need optimization");
   } else {
@@ -1198,7 +1202,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const 
MachineOperand &Src,
 MachineInstr *Copy =
 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
 .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
-.addReg(SrcReg, getKillRegState(isKill));
+.addReg(SrcReg, getKillRegState(isKill), SubReg);
 
 // Which is obviously going to be dead after we're done with it.
 isKill = true;
@@ -1258,7 +1262,9 @@ MachineInstr 
*X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
   MachineBasicBlock::iterator MBBI = MI.getIterator();
   Register Dest = MI.getOperand(0).getReg();
   Register Src = MI.getOperand(1).getReg();
+  unsigned SrcSubReg = MI.getOperand(1).getSubReg();
   Register Src2;
+  unsigned Src2SubReg;
   bool IsDead = MI.getOperand(0).isDead();
   bool IsKill = MI.getOperand(1).isKill();
   unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
@@ -1268,7 +1274,7 @@ MachineInstr 
*X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
   MachineInstr *InsMI =
   BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
   .addReg(InRegLEA, RegState::Define, SubReg)
-  .addReg(Src, getKillRegState(IsKill));
+  .addReg(Src, getKillRegState(IsKill), SrcSubReg);
   MachineInstr *ImpDef2 = nullptr;
   MachineInstr *InsMI2 = nullptr;
 
@@ -1306,6 +1312,7 @@ MachineInstr 
*X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
   case X86::ADD16rr:
   case X86::ADD16rr_DB: {
 Src2 = MI.getOperand(2).getReg();
+Src2SubReg = MI.getOperand(2).getSubReg();
 bool IsKill2 = MI.getOperand(2).isKill();
 assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need 
optimization");
 if (Src == Src2) {
@@ -1323,7 +1330,7 @@ MachineInstr 
*X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
 InRegLEA2);
   InsMI2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(InRegL

[llvm-branch-commits] [llvm] X86: Fix convertToThreeAddress losing subregister indexes (PR #124098)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/124098
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] X86: Fix convertToThreeAddress losing subregister indexes (PR #124098)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/124098?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#124098** https://app.graphite.dev/github/pr/llvm/llvm-project/124098?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/124098?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#124095** https://app.graphite.dev/github/pr/llvm/llvm-project/124095?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/124098
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [PassBuilder][CodeGen] Add callback style pass buider (PR #116913)

2025-01-23 Thread Akshat Oke via llvm-branch-commits

optimisan wrote:

Will new codegen support disabling individual passes? Instead of having 
separate arguments like `-disable-machine-sink` we could do 
`-disable-passes=machine-sink`. 

https://github.com/llvm/llvm-project/pull/116913
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)

2025-01-23 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan edited 
https://github.com/llvm/llvm-project/pull/119540
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)

2025-01-23 Thread Akshat Oke via llvm-branch-commits


@@ -146,11 +149,137 @@ static cl::opt SplitThresholdForRegWithHint(
 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
 
-char RAGreedy::ID = 0;
-char &llvm::RAGreedyID = RAGreedy::ID;
+namespace {
+class RAGreedyLegacy : public MachineFunctionPass {
+  RegAllocFilterFunc F;
 
-INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
-"Greedy Register Allocator", false, false)
+public:
+  RAGreedyLegacy(const RegAllocFilterFunc F = nullptr);
+
+  static char ID;
+  /// Return the pass name.
+  StringRef getPassName() const override { return "Greedy Register Allocator"; 
}
+
+  /// RAGreedy analysis usage.
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  /// Perform register allocation.
+  bool runOnMachineFunction(MachineFunction &mf) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+return MachineFunctionProperties().set(
+MachineFunctionProperties::Property::NoPHIs);
+  }
+
+  MachineFunctionProperties getClearedProperties() const override {
+return MachineFunctionProperties().set(
+MachineFunctionProperties::Property::IsSSA);
+  }
+};
+
+} // end anonymous namespace
+
+RAGreedyLegacy::RAGreedyLegacy(const RegAllocFilterFunc F)
+: MachineFunctionPass(ID), F(F) {
+  initializeRAGreedyLegacyPass(*PassRegistry::getPassRegistry());
+}
+
+RAGreedy::RAGreedy(const RegAllocFilterFunc F) : RegAllocBase(F) {}
+
+void RAGreedy::setAnalyses(RequiredAnalyses &Analyses) {
+  VRM = Analyses.VRM;
+  LIS = Analyses.LIS;
+  Matrix = Analyses.LRM;
+  Indexes = Analyses.Indexes;
+  MBFI = Analyses.MBFI;
+  DomTree = Analyses.DomTree;
+  Loops = Analyses.Loops;
+  ORE = Analyses.ORE;
+  Bundles = Analyses.Bundles;
+  SpillPlacer = Analyses.SpillPlacer;
+  DebugVars = Analyses.DebugVars;
+  LSS = Analyses.LSS;
+  EvictProvider = Analyses.EvictProvider;
+  PriorityProvider = Analyses.PriorityProvider;
+}
+
+void RAGreedyPass::printPipeline(raw_ostream &OS, 
function_ref MapClassName2PassName) const {
+  StringRef FilterName = Opts.FilterName.empty() ? "all" : Opts.FilterName;
+  OS << "regallocgreedy<" << FilterName << ">";
+}
+
+PreservedAnalyses RAGreedyPass::run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM) {
+  MFPropsModifier _(*this, MF);
+
+  RAGreedy Impl(Opts.Filter);
+  RAGreedy::RequiredAnalyses Analyses;
+
+  Analyses.LIS = &MFAM.getResult(MF);
+  Analyses.LRM = &MFAM.getResult(MF);
+  Analyses.LSS = &MFAM.getResult(MF);
+  Analyses.Indexes = &MFAM.getResult(MF);
+  Analyses.MBFI = &MFAM.getResult(MF);
+  Analyses.DomTree = &MFAM.getResult(MF);
+  Analyses.ORE = &MFAM.getResult(MF);
+  Analyses.Loops = &MFAM.getResult(MF);
+  Analyses.Bundles = &MFAM.getResult(MF);
+  Analyses.SpillPlacer = &MFAM.getResult(MF);
+  Analyses.DebugVars = &MFAM.getResult(MF);
+  Analyses.EvictProvider =
+  MFAM.getResult(MF).Provider;
+  Analyses.PriorityProvider =
+  MFAM.getResult(MF).Provider;
+  Analyses.VRM = &MFAM.getResult(MF);
+
+  Impl.setAnalyses(Analyses);

optimisan wrote:

Oh, I changed the evictionadvisor provider api so forgot to simplify this out.

https://github.com/llvm/llvm-project/pull/119540
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)

2025-01-23 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/119540

>From 74007cb20fbf8508ca457fc30d693ffa4526b432 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 11 Dec 2024 08:51:55 +
Subject: [PATCH 1/5] [CodeGen][NewPM] Port RegAllocGreedy to NPM

---
 llvm/include/llvm/CodeGen/MachineFunction.h   |   1 +
 llvm/include/llvm/CodeGen/Passes.h|   2 +-
 llvm/include/llvm/InitializePasses.h  |   2 +-
 .../llvm/Passes/MachinePassRegistry.def   |   9 +
 llvm/lib/CodeGen/CodeGen.cpp  |   2 +-
 llvm/lib/CodeGen/RegAllocGreedy.cpp   | 185 ++
 llvm/lib/CodeGen/RegAllocGreedy.h |  57 +++---
 llvm/lib/Passes/PassBuilder.cpp   |   1 +
 8 files changed, 196 insertions(+), 63 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h 
b/llvm/include/llvm/CodeGen/MachineFunction.h
index d517b5e6647291..c2a82888c65211 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -911,6 +911,7 @@ class LLVM_ABI MachineFunction {
 
   /// Run the current MachineFunction through the machine code verifier, useful
   /// for debugger use.
+  /// TODO: Add the param LiveStks
   /// \returns true if no problems were found.
   bool verify(LiveIntervals *LiveInts, SlotIndexes *Indexes,
   const char *Banner = nullptr, raw_ostream *OS = nullptr,
diff --git a/llvm/include/llvm/CodeGen/Passes.h 
b/llvm/include/llvm/CodeGen/Passes.h
index d1fac4a304cffe..1096c34b307f9b 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -167,7 +167,7 @@ namespace llvm {
   extern char &LiveRangeShrinkID;
 
   /// Greedy register allocator.
-  extern char &RAGreedyID;
+  extern char &RAGreedyLegacyID;
 
   /// Basic register allocator.
   extern char &RABasicID;
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index e74b85c0de886f..afe0aa6113dd21 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -250,7 +250,7 @@ void 
initializeProfileSummaryInfoWrapperPassPass(PassRegistry &);
 void initializePromoteLegacyPassPass(PassRegistry &);
 void initializeRABasicPass(PassRegistry &);
 void initializePseudoProbeInserterPass(PassRegistry &);
-void initializeRAGreedyPass(PassRegistry &);
+void initializeRAGreedyLegacyPass(PassRegistry &);
 void initializeReachingDefAnalysisPass(PassRegistry &);
 void initializeReassociateLegacyPassPass(PassRegistry &);
 void initializeRegAllocEvictionAdvisorAnalysisLegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index d512c6459b5a4e..00aae5a4c30b97 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -187,6 +187,15 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
   return parseRegAllocFastPassOptions(*PB, Params);
 },
 "filter=reg-filter;no-clear-vregs")
+
+MACHINE_FUNCTION_PASS_WITH_PARAMS(
+"regallocgreedy", "RAGreedy",
+[](RegAllocFilterFunc F) { return RAGreedyPass(F); },
+[PB = this](StringRef Params) {
+  // TODO: parseRegAllocFilter(*PB, Params);
+  return Expected(nullptr);
+}, ""
+)
 #undef MACHINE_FUNCTION_PASS_WITH_PARAMS
 
 // After a pass is converted to new pass manager, its entry should be moved 
from
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 925d9af7d0e06d..0f76024bc24e15 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -112,7 +112,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
   initializeProcessImplicitDefsPass(Registry);
   initializeRABasicPass(Registry);
-  initializeRAGreedyPass(Registry);
+  initializeRAGreedyLegacyPass(Registry);
   initializeRegAllocFastPass(Registry);
   initializeRegUsageInfoCollectorLegacyPass(Registry);
   initializeRegUsageInfoPropagationLegacyPass(Registry);
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp 
b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 72c38ab8c7d07b..49d251b4fe4d57 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -43,8 +43,10 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegAllocEvictionAdvisor.h"
+#include "llvm/CodeGen/RegAllocGreedyPass.h"
 #include "llvm/CodeGen/RegAllocPriorityAdvisor.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
@@ -55,6 +57,7 @@
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Analysis.h"
 #include "l

[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy edited 
https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy commented:

Thanks for the great work @tblah! I partially reviewed since this is a huge PR 
(understandably). I will come back and continue reviewing the rest laster.

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Tom Eccles via llvm-branch-commits


@@ -34,52 +34,48 @@ def PrivateClauseOp : OpenMP_Op<"private", 
[IsolatedFromAbove, RecipeInterface]>
   let description = [{
 This operation provides a declaration of how to implement the
 [first]privatization of a variable. The dialect users should provide
-information about how to create an instance of the type in the alloc 
region,
-how to initialize the copy from the original item in the copy region, and 
if
-needed, how to deallocate allocated memory in the dealloc region.
+which type should be allocated for this variable. The allocated (usually by
+alloca) variable is passed to the initialization region which does 
everything
+else (e.g. initialization of Fortran runtime descriptors). Information 
about
+how to initialize the copy from the original item should be given in the
+copy region, and if needed, how to deallocate memory (allocated by the
+initialization region) in the dealloc region.

tblah wrote:

Thanks for reviewing the PR. These readability comments are very helpful.

The LLVM lowering is responsible for managing the implicitly allocated memory 
(it is usually just a stack allocation).

The `dealloc` region undoes what is done in the `init` region. So for example 
for a `!fir.box>>`, the box is allocated implicitly 
on the stack and then the `init` region performs the heap allocation for the 
array and the dealloc region frees that heap allocation.

I will add an example in the omp.private documentation to make it clearer.

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Tom Eccles via llvm-branch-commits


@@ -488,44 +559,34 @@ void DataSharingProcessor::doPrivatize(const 
semantics::Symbol *sym,
 mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
 firOpBuilder.setInsertionPointToStart(moduleOp.getBody());
 auto result = firOpBuilder.create(
-symLoc, uniquePrivatizerName, symType,
+symLoc, uniquePrivatizerName, allocType,
 isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
: mlir::omp::DataSharingClauseType::Private);
 fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
 lower::SymMapScope outerScope(symTable);
 
-// Populate the `alloc` region.
-{
-  mlir::Region &allocRegion = result.getAllocRegion();
-  mlir::Block *allocEntryBlock = firOpBuilder.createBlock(
-  &allocRegion, /*insertPt=*/{}, symType, symLoc);
-
-  firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
-
-  fir::ExtendedValue localExV =
-  hlfir::translateToExtendedValue(
-  symLoc, firOpBuilder, hlfir::Entity{allocRegion.getArgument(0)},
-  /*contiguousHint=*/
-  evaluate::IsSimplyContiguous(*sym, 
converter.getFoldingContext()))
-  .first;
-
-  symTable.addSymbol(*sym, localExV);
-  lower::SymMapScope innerScope(symTable);
-  cloneSymbol(sym);
-  mlir::Value cloneAddr = symTable.shallowLookupSymbol(*sym).getAddr();
-  mlir::Type cloneType = cloneAddr.getType();
-
-  // A `convert` op is required for variables that are storage associated
-  // via `equivalence`. The problem is that these variables are declared as
-  // `fir.ptr`s while their privatized storage is declared as `fir.ref`,
-  // therefore we convert to proper symbol type.
-  mlir::Value yieldedValue =
-  (symType == cloneType) ? cloneAddr
- : firOpBuilder.createConvert(
-   cloneAddr.getLoc(), symType, cloneAddr);
-
-  firOpBuilder.create(hsb.getAddr().getLoc(),
-  yieldedValue);
+// Populate the `init` region.
+const bool needsInitialization =
+(Fortran::lower::hasDefaultInitialization(sym->GetUltimate()) &&
+ (!isFirstPrivate || hlfir::mayHaveAllocatableComponent(allocType))) ||
+mlir::isa(allocType) ||
+mlir::isa(allocType);
+if (needsInitialization) {
+  mlir::Region &initRegion = result.getInitRegion();
+  mlir::Block *initBlock = firOpBuilder.createBlock(
+  &initRegion, /*insertPt=*/{}, {argType, argType}, {symLoc, symLoc});
+
+  populateByRefInitAndCleanupRegions(
+  converter, symLoc, argType, /*scalarInitValue=*/nullptr, initBlock,
+  result.getInitPrivateArg(), result.getInitMoldArg(),
+  result.getDeallocRegion(),
+  isFirstPrivate ? DeclOperationKind::FirstPrivate
+ : DeclOperationKind::Private,
+  sym);
+  // TODO: currently there are false positives from dead uses of the mold
+  // arg
+  if (!result.getInitMoldArg().getUses().empty())
+mightHaveReadMoldArg = true;

tblah wrote:

Quite a lot of cases actually. Some examples:
- Derived types that need a runtime initialization call but which have no 
allocatable components (the allocatable component initialization does read from 
the mold argument)
- Pointers only need to be set to NULL
- Arrays with compile-time known sizes. In this case the box is allocated 
implicitly but memory for the actual array needs to be allocated (on the heap) 
in the init region and the box has to be set up to point to that allocated 
array and contain the correct shape.
- Characters with compile-time known sizes

Currently there are some cases where the there are dead loads from the mold 
argument or maybe even reading character length parameters which never get 
used. These are eventually removed by DCE but would lead to a false positive 
here. I hope to improve the init region generation in a future patch to remove 
these cases.

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi (PR #112866)

2025-01-23 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/112866

>From d3f746c12fc0e741dfe754f19f394d770a22e84c Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 31 Oct 2024 14:10:57 +0100
Subject: [PATCH] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi

Change existing code for G_PHI to match what LLVM-IR version is doing
via PHINode::hasConstantOrUndefValue. This is not safe for regular PHI
since it may appear with an undef operand and getVRegDef can fail.
Most notably this improves number of values that can be allocated
to sgpr register bank in AMDGPURegBankSelect.
Common case here are phis that appear in structurize-cfg lowering
for cycles with multiple exits:
Undef incoming value is coming from block that reached cycle exit
condition, if other incoming is uniform keep the phi uniform despite
the fact it is joining values from pair of blocks that are entered
via divergent condition branch.
---
 llvm/lib/CodeGen/MachineSSAContext.cpp| 27 +-
 .../AMDGPU/MIR/hidden-diverge-gmir.mir| 28 +++
 .../AMDGPU/MIR/hidden-loop-diverge.mir|  4 +-
 .../AMDGPU/MIR/uses-value-from-cycle.mir  |  8 +-
 .../GlobalISel/divergence-structurizer.mir| 80 --
 .../regbankselect-mui-regbanklegalize.mir | 69 ---
 .../regbankselect-mui-regbankselect.mir   | 18 ++--
 .../AMDGPU/GlobalISel/regbankselect-mui.ll| 84 ++-
 .../AMDGPU/GlobalISel/regbankselect-mui.mir   | 51 ++-
 9 files changed, 191 insertions(+), 178 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp 
b/llvm/lib/CodeGen/MachineSSAContext.cpp
index e384187b6e8593..8e13c0916dd9e1 100644
--- a/llvm/lib/CodeGen/MachineSSAContext.cpp
+++ b/llvm/lib/CodeGen/MachineSSAContext.cpp
@@ -54,9 +54,34 @@ const MachineBasicBlock 
*MachineSSAContext::getDefBlock(Register value) const {
   return F->getRegInfo().getVRegDef(value)->getParent();
 }
 
+static bool isUndef(const MachineInstr &MI) {
+  return MI.getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
+ MI.getOpcode() == TargetOpcode::IMPLICIT_DEF;
+}
+
+/// MachineInstr equivalent of PHINode::hasConstantOrUndefValue() for G_PHI.
 template <>
 bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) {
-  return Phi.isConstantValuePHI();
+  if (!Phi.isPHI())
+return false;
+
+  // In later passes PHI may appear with an undef operand, getVRegDef can fail.
+  if (Phi.getOpcode() == TargetOpcode::PHI)
+return Phi.isConstantValuePHI();
+
+  // For G_PHI we do equivalent of PHINode::hasConstantOrUndefValue().
+  const MachineRegisterInfo &MRI = Phi.getMF()->getRegInfo();
+  Register This = Phi.getOperand(0).getReg();
+  Register ConstantValue;
+  for (unsigned i = 1, e = Phi.getNumOperands(); i < e; i += 2) {
+Register Incoming = Phi.getOperand(i).getReg();
+if (Incoming != This && !isUndef(*MRI.getVRegDef(Incoming))) {
+  if (ConstantValue && ConstantValue != Incoming)
+return false;
+  ConstantValue = Incoming;
+}
+  }
+  return true;
 }
 
 template <>
diff --git 
a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir 
b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
index ce00edf3363f77..9694a340b5e906 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
@@ -1,24 +1,24 @@
 # RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | 
FileCheck %s
 # CHECK-LABEL: MachineUniformityInfo for function: hidden_diverge
 # CHECK-LABEL: BLOCK bb.0
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC 
intrinsic(@llvm.amdgcn.workitem.id.x)
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt)
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, 
%{{[0-9]*}}:_
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = 
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = 
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
-# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1
-# CHECK: DIVERGENT: G_BR %bb.2
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC 
intrinsic(@llvm.amdgcn.workitem.id.x)
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt)
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, 
%{{[0-9]*}}:_
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = 
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = 
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
+# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1
+# CHECK: DIVERGENT: G_BR %bb.2
 # CHECK-LABEL: BLOCK bb.1
 # CHECK-LABEL: BLOCK bb.2
-# CHECK: D

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: RegBankLegalize rules for load (PR #112882)

2025-01-23 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/112882

>From 0adced1346e563e75aab408c2a948cdbd8c449c3 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 23 Jan 2025 13:35:07 +0100
Subject: [PATCH] AMDGPU/GlobalISel: RegBankLegalize rules for load

Add IDs for bit width that cover multiple LLTs: B32 B64 etc.
"Predicate" wrapper class for bool predicate functions used to
write pretty rules. Predicates can be combined using &&, || and !.
Lowering for splitting and widening loads.
Write rules for loads to not change existing mir tests from old
regbankselect.
---
 .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 288 +++-
 .../AMDGPU/AMDGPURegBankLegalizeHelper.h  |   5 +
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 278 ++-
 .../AMDGPU/AMDGPURegBankLegalizeRules.h   |  65 +++-
 .../AMDGPU/GlobalISel/regbankselect-load.mir  | 320 +++---
 .../GlobalISel/regbankselect-zextload.mir |   9 +-
 6 files changed, 900 insertions(+), 65 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index d27fa1f62538b6..3c007987b84947 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -50,6 +50,83 @@ void 
RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
   lower(MI, Mapping, WaterfallSgprs);
 }
 
+void RegBankLegalizeHelper::splitLoad(MachineInstr &MI,
+  ArrayRef LLTBreakdown, LLT MergeTy) 
{
+  MachineFunction &MF = B.getMF();
+  assert(MI.getNumMemOperands() == 1);
+  MachineMemOperand &BaseMMO = **MI.memoperands_begin();
+  Register Dst = MI.getOperand(0).getReg();
+  const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
+  Register Base = MI.getOperand(1).getReg();
+  LLT PtrTy = MRI.getType(Base);
+  const RegisterBank *PtrRB = MRI.getRegBankOrNull(Base);
+  LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
+  SmallVector LoadPartRegs;
+
+  unsigned ByteOffset = 0;
+  for (LLT PartTy : LLTBreakdown) {
+Register BasePlusOffset;
+if (ByteOffset == 0) {
+  BasePlusOffset = Base;
+} else {
+  auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset);
+  BasePlusOffset = B.buildPtrAdd({PtrRB, PtrTy}, Base, Offset).getReg(0);
+}
+auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy);
+auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO);
+LoadPartRegs.push_back(LoadPart.getReg(0));
+ByteOffset += PartTy.getSizeInBytes();
+  }
+
+  if (!MergeTy.isValid()) {
+// Loads are of same size, concat or merge them together.
+B.buildMergeLikeInstr(Dst, LoadPartRegs);
+  } else {
+// Loads are not all of same size, need to unmerge them to smaller pieces
+// of MergeTy type, then merge pieces to Dst.
+SmallVector MergeTyParts;
+for (Register Reg : LoadPartRegs) {
+  if (MRI.getType(Reg) == MergeTy) {
+MergeTyParts.push_back(Reg);
+  } else {
+auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, Reg);
+for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i)
+  MergeTyParts.push_back(Unmerge.getReg(i));
+  }
+}
+B.buildMergeLikeInstr(Dst, MergeTyParts);
+  }
+  MI.eraseFromParent();
+}
+
+void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy,
+  LLT MergeTy) {
+  MachineFunction &MF = B.getMF();
+  assert(MI.getNumMemOperands() == 1);
+  MachineMemOperand &BaseMMO = **MI.memoperands_begin();
+  Register Dst = MI.getOperand(0).getReg();
+  const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
+  Register Base = MI.getOperand(1).getReg();
+
+  MachineMemOperand *WideMMO = MF.getMachineMemOperand(&BaseMMO, 0, WideTy);
+  auto WideLoad = B.buildLoad({DstRB, WideTy}, Base, *WideMMO);
+
+  if (WideTy.isScalar()) {
+B.buildTrunc(Dst, WideLoad);
+  } else {
+SmallVector MergeTyParts;
+auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, WideLoad);
+
+LLT DstTy = MRI.getType(Dst);
+unsigned NumElts = DstTy.getSizeInBits() / MergeTy.getSizeInBits();
+for (unsigned i = 0; i < NumElts; ++i) {
+  MergeTyParts.push_back(Unmerge.getReg(i));
+}
+B.buildMergeLikeInstr(Dst, MergeTyParts);
+  }
+  MI.eraseFromParent();
+}
+
 void RegBankLegalizeHelper::lower(MachineInstr &MI,
   const RegBankLLTMapping &Mapping,
   SmallSet &WaterfallSgprs) {
@@ -128,6 +205,54 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
 MI.eraseFromParent();
 break;
   }
+  case SplitLoad: {
+LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+unsigned Size = DstTy.getSizeInBits();
+// Even split to 128-bit loads
+if (Size > 128) {
+  LLT B128;
+  if (DstTy.isVector()) {
+LLT EltTy = DstTy.getElementType();
+B128 = LLT::f

[llvm-branch-commits] [llvm] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi (PR #112866)

2025-01-23 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/112866

>From d3f746c12fc0e741dfe754f19f394d770a22e84c Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 31 Oct 2024 14:10:57 +0100
Subject: [PATCH] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi

Change existing code for G_PHI to match what LLVM-IR version is doing
via PHINode::hasConstantOrUndefValue. This is not safe for regular PHI
since it may appear with an undef operand and getVRegDef can fail.
Most notably this improves number of values that can be allocated
to sgpr register bank in AMDGPURegBankSelect.
Common case here are phis that appear in structurize-cfg lowering
for cycles with multiple exits:
Undef incoming value is coming from block that reached cycle exit
condition, if other incoming is uniform keep the phi uniform despite
the fact it is joining values from pair of blocks that are entered
via divergent condition branch.
---
 llvm/lib/CodeGen/MachineSSAContext.cpp| 27 +-
 .../AMDGPU/MIR/hidden-diverge-gmir.mir| 28 +++
 .../AMDGPU/MIR/hidden-loop-diverge.mir|  4 +-
 .../AMDGPU/MIR/uses-value-from-cycle.mir  |  8 +-
 .../GlobalISel/divergence-structurizer.mir| 80 --
 .../regbankselect-mui-regbanklegalize.mir | 69 ---
 .../regbankselect-mui-regbankselect.mir   | 18 ++--
 .../AMDGPU/GlobalISel/regbankselect-mui.ll| 84 ++-
 .../AMDGPU/GlobalISel/regbankselect-mui.mir   | 51 ++-
 9 files changed, 191 insertions(+), 178 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp 
b/llvm/lib/CodeGen/MachineSSAContext.cpp
index e384187b6e8593..8e13c0916dd9e1 100644
--- a/llvm/lib/CodeGen/MachineSSAContext.cpp
+++ b/llvm/lib/CodeGen/MachineSSAContext.cpp
@@ -54,9 +54,34 @@ const MachineBasicBlock 
*MachineSSAContext::getDefBlock(Register value) const {
   return F->getRegInfo().getVRegDef(value)->getParent();
 }
 
+static bool isUndef(const MachineInstr &MI) {
+  return MI.getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
+ MI.getOpcode() == TargetOpcode::IMPLICIT_DEF;
+}
+
+/// MachineInstr equivalent of PHINode::hasConstantOrUndefValue() for G_PHI.
 template <>
 bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) {
-  return Phi.isConstantValuePHI();
+  if (!Phi.isPHI())
+return false;
+
+  // In later passes PHI may appear with an undef operand, getVRegDef can fail.
+  if (Phi.getOpcode() == TargetOpcode::PHI)
+return Phi.isConstantValuePHI();
+
+  // For G_PHI we do equivalent of PHINode::hasConstantOrUndefValue().
+  const MachineRegisterInfo &MRI = Phi.getMF()->getRegInfo();
+  Register This = Phi.getOperand(0).getReg();
+  Register ConstantValue;
+  for (unsigned i = 1, e = Phi.getNumOperands(); i < e; i += 2) {
+Register Incoming = Phi.getOperand(i).getReg();
+if (Incoming != This && !isUndef(*MRI.getVRegDef(Incoming))) {
+  if (ConstantValue && ConstantValue != Incoming)
+return false;
+  ConstantValue = Incoming;
+}
+  }
+  return true;
 }
 
 template <>
diff --git 
a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir 
b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
index ce00edf3363f77..9694a340b5e906 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
@@ -1,24 +1,24 @@
 # RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | 
FileCheck %s
 # CHECK-LABEL: MachineUniformityInfo for function: hidden_diverge
 # CHECK-LABEL: BLOCK bb.0
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC 
intrinsic(@llvm.amdgcn.workitem.id.x)
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt)
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, 
%{{[0-9]*}}:_
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = 
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = 
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
-# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1
-# CHECK: DIVERGENT: G_BR %bb.2
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC 
intrinsic(@llvm.amdgcn.workitem.id.x)
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt)
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, 
%{{[0-9]*}}:_
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = 
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = 
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
+# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1
+# CHECK: DIVERGENT: G_BR %bb.2
 # CHECK-LABEL: BLOCK bb.1
 # CHECK-LABEL: BLOCK bb.2
-# CHECK: D

[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)

2025-01-23 Thread Nathan Gauër via llvm-branch-commits


@@ -1,16 +1,21 @@
 // RUN: %clang_cc1 -finclude-default-header -triple 
dxil-pc-shadermodel6.3-library %s \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
-// CHECK-DAG: @[[CB:.+]] = external constant { float }
+// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-library %s 
\
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
 cbuffer A {
-float a;
-  // CHECK-DAG:@_ZL1b = internal global float 3.00e+00, align 4
+  // CHECK: @a = external addrspace(2) externally_initialized global float, 
align 4
+  float a;
+  // CHECK: @_ZL1b = internal global float 3.00e+00, align 4
   static float b = 3;

Keenuts wrote:

If the end-goal is not to support this, but we "do" because we just inherit the 
behavior for now, I'd be in favor of checking that this is disallowed and mark 
as XFAIL with this issued referenced 

https://github.com/llvm/llvm-project/pull/123411
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] Rename libFortranRuntime.a to libflang_rt.a (PR #122341)

2025-01-23 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur updated 
https://github.com/llvm/llvm-project/pull/122341

>From c77098f90a5c20bdbce078a0ee3aec1fe53772e3 Mon Sep 17 00:00:00 2001
From: Michael Kruse 
Date: Wed, 8 Jan 2025 11:23:02 +0100
Subject: [PATCH 1/5] clang-format to sort headers

---
 flang/tools/f18-parse-demo/f18-parse-demo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/tools/f18-parse-demo/f18-parse-demo.cpp 
b/flang/tools/f18-parse-demo/f18-parse-demo.cpp
index 90bbce246e3f16..a50c88dc840643 100644
--- a/flang/tools/f18-parse-demo/f18-parse-demo.cpp
+++ b/flang/tools/f18-parse-demo/f18-parse-demo.cpp
@@ -21,7 +21,6 @@
 // scaffolding compiler driver that can test some semantic passes of the
 // F18 compiler under development.
 
-#include "flang/Support/Fortran-features.h"
 #include "flang/Parser/characters.h"
 #include "flang/Parser/dump-parse-tree.h"
 #include "flang/Parser/message.h"
@@ -30,6 +29,7 @@
 #include "flang/Parser/parsing.h"
 #include "flang/Parser/provenance.h"
 #include "flang/Parser/unparse.h"
+#include "flang/Support/Fortran-features.h"
 #include "flang/Support/default-kinds.h"
 #include "llvm/Support/Errno.h"
 #include "llvm/Support/FileSystem.h"

>From 22ed7ebde19d4003fa3036039f75977b1e6b9f60 Mon Sep 17 00:00:00 2001
From: Michael Kruse 
Date: Wed, 8 Jan 2025 14:15:45 +0100
Subject: [PATCH 2/5] FortranRuntime -> flang_rt

---
 clang/lib/Driver/ToolChains/CommonArgs.cpp|  4 +-
 clang/lib/Driver/ToolChains/Flang.cpp |  8 ++--
 flang-rt/unittests/Evaluate/CMakeLists.txt| 21 ++
 flang/CMakeLists.txt  |  2 +-
 flang/docs/FlangDriver.md |  8 ++--
 flang/docs/GettingStarted.md  |  6 +--
 flang/docs/OpenACC-descriptor-management.md   |  2 +-
 flang/docs/ReleaseNotes.md|  2 +
 .../ExternalHelloWorld/CMakeLists.txt |  2 +-
 flang/runtime/CMakeLists.txt  | 40 +++
 flang/runtime/CUDA/CMakeLists.txt |  2 +-
 flang/runtime/Float128Math/CMakeLists.txt |  2 +-
 flang/runtime/time-intrinsic.cpp  |  2 +-
 flang/test/CMakeLists.txt |  6 ++-
 .../test/Driver/gcc-toolchain-install-dir.f90 |  2 +-
 flang/test/Driver/linker-flags.f90|  8 ++--
 .../test/Driver/msvc-dependent-lib-flags.f90  |  8 ++--
 flang/test/Driver/nostdlib.f90|  2 +-
 flang/test/Runtime/no-cpp-dep.c   |  2 +-
 flang/test/lit.cfg.py |  2 +-
 flang/tools/f18/CMakeLists.txt|  8 ++--
 flang/unittests/CMakeLists.txt|  2 +-
 flang/unittests/Evaluate/CMakeLists.txt   |  9 +++--
 flang/unittests/Frontend/CMakeLists.txt   |  1 +
 flang/unittests/Runtime/CMakeLists.txt|  2 +-
 flang/unittests/Runtime/CUDA/CMakeLists.txt   |  2 +-
 lld/COFF/MinGW.cpp|  2 +-
 27 files changed, 97 insertions(+), 60 deletions(-)
 create mode 100644 flang-rt/unittests/Evaluate/CMakeLists.txt

diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp 
b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 9e9872975de9c2..4c6b9f29f362ca 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1317,7 +1317,7 @@ void tools::addOpenMPHostOffloadingArgs(const Compilation 
&C,
 /// Add Fortran runtime libs
 void tools::addFortranRuntimeLibs(const ToolChain &TC, const ArgList &Args,
   llvm::opt::ArgStringList &CmdArgs) {
-  // Link FortranRuntime
+  // Link flang_rt
   // These are handled earlier on Windows by telling the frontend driver to
   // add the correct libraries to link against as dependents in the object
   // file.
@@ -1333,7 +1333,7 @@ void tools::addFortranRuntimeLibs(const ToolChain &TC, 
const ArgList &Args,
   if (AsNeeded)
 addAsNeededOption(TC, Args, CmdArgs, /*as_needed=*/false);
 }
-CmdArgs.push_back("-lFortranRuntime");
+CmdArgs.push_back("-lflang_rt");
 addArchSpecificRPath(TC, Args, CmdArgs);
   }
 
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 749af4ada9a696..2cf1108b28dab3 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -356,26 +356,26 @@ static void processVSRuntimeLibrary(const ToolChain &TC, 
const ArgList &Args,
   case options::OPT__SLASH_MT:
 CmdArgs.push_back("-D_MT");
 CmdArgs.push_back("--dependent-lib=libcmt");
-CmdArgs.push_back("--dependent-lib=FortranRuntime.static.lib");
+CmdArgs.push_back("--dependent-lib=flang_rt.static.lib");
 break;
   case options::OPT__SLASH_MTd:
 CmdArgs.push_back("-D_MT");
 CmdArgs.push_back("-D_DEBUG");
 CmdArgs.push_back("--dependent-lib=libcmtd");
-CmdArgs.push_back("--dependent-lib=FortranRuntime.static_dbg.lib");
+CmdArgs.push_back("--dependent-lib=flang_rt.static_dbg.lib");
 break;
   case options::OPT__SLASH_M

[llvm-branch-commits] [flang] [Flang] Promote FortranEvaluateTesting library (PR #122334)

2025-01-23 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur updated 
https://github.com/llvm/llvm-project/pull/122334

>From 74432e2d5d4916f09ee6f60a4d80f3f5a96f1b12 Mon Sep 17 00:00:00 2001
From: Michael Kruse 
Date: Thu, 9 Jan 2025 11:31:23 +0100
Subject: [PATCH 1/3] Promote Testing lib

---
 .../flang/Testing}/fp-testing.h| 14 +++---
 .../Evaluate => include/flang/Testing}/testing.h   | 14 +++---
 .../Evaluate => lib/Testing}/fp-testing.cpp| 10 +-
 .../Evaluate => lib/Testing}/testing.cpp   | 10 +-
 flang/unittests/Evaluate/CMakeLists.txt|  4 ++--
 flang/unittests/Evaluate/ISO-Fortran-binding.cpp   |  2 +-
 flang/unittests/Evaluate/bit-population-count.cpp  |  2 +-
 flang/unittests/Evaluate/expression.cpp|  2 +-
 flang/unittests/Evaluate/folding.cpp   |  2 +-
 flang/unittests/Evaluate/integer.cpp   |  2 +-
 flang/unittests/Evaluate/intrinsics.cpp|  2 +-
 .../unittests/Evaluate/leading-zero-bit-count.cpp  |  2 +-
 flang/unittests/Evaluate/logical.cpp   |  2 +-
 flang/unittests/Evaluate/real.cpp  |  4 ++--
 flang/unittests/Evaluate/reshape.cpp   |  2 +-
 flang/unittests/Evaluate/uint128.cpp   |  2 +-
 16 files changed, 54 insertions(+), 22 deletions(-)
 rename flang/{unittests/Evaluate => include/flang/Testing}/fp-testing.h (54%)
 rename flang/{unittests/Evaluate => include/flang/Testing}/testing.h (74%)
 rename flang/{unittests/Evaluate => lib/Testing}/fp-testing.cpp (87%)
 rename flang/{unittests/Evaluate => lib/Testing}/testing.cpp (88%)

diff --git a/flang/unittests/Evaluate/fp-testing.h 
b/flang/include/flang/Testing/fp-testing.h
similarity index 54%
rename from flang/unittests/Evaluate/fp-testing.h
rename to flang/include/flang/Testing/fp-testing.h
index 9091963a99b32d..e223d2ef7d1b8b 100644
--- a/flang/unittests/Evaluate/fp-testing.h
+++ b/flang/include/flang/Testing/fp-testing.h
@@ -1,5 +1,13 @@
-#ifndef FORTRAN_TEST_EVALUATE_FP_TESTING_H_
-#define FORTRAN_TEST_EVALUATE_FP_TESTING_H_
+//===-- include/flang/Testing/fp-testing.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_TESTING_FP_TESTING_H_
+#define FORTRAN_TESTING_FP_TESTING_H_
 
 #include "flang/Common/target-rounding.h"
 #include 
@@ -24,4 +32,4 @@ class ScopedHostFloatingPointEnvironment {
 #endif
 };
 
-#endif // FORTRAN_TEST_EVALUATE_FP_TESTING_H_
+#endif /* FORTRAN_TESTING_FP_TESTING_H_ */
diff --git a/flang/unittests/Evaluate/testing.h 
b/flang/include/flang/Testing/testing.h
similarity index 74%
rename from flang/unittests/Evaluate/testing.h
rename to flang/include/flang/Testing/testing.h
index 422e2853c05bc6..404650c9a89f2c 100644
--- a/flang/unittests/Evaluate/testing.h
+++ b/flang/include/flang/Testing/testing.h
@@ -1,5 +1,13 @@
-#ifndef FORTRAN_EVALUATE_TESTING_H_
-#define FORTRAN_EVALUATE_TESTING_H_
+//===-- include/flang/Testing/testing.h -*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_TESTING_TESTING_H_
+#define FORTRAN_TESTING_TESTING_H_
 
 #include 
 #include 
@@ -33,4 +41,4 @@ FailureDetailPrinter Match(const char *file, int line, const 
std::string &want,
 FailureDetailPrinter Compare(const char *file, int line, const char *xs,
 const char *rel, const char *ys, std::uint64_t x, std::uint64_t y);
 } // namespace testing
-#endif // FORTRAN_EVALUATE_TESTING_H_
+#endif /* FORTRAN_TESTING_TESTING_H_ */
diff --git a/flang/unittests/Evaluate/fp-testing.cpp 
b/flang/lib/Testing/fp-testing.cpp
similarity index 87%
rename from flang/unittests/Evaluate/fp-testing.cpp
rename to flang/lib/Testing/fp-testing.cpp
index 1a1d7425d58249..5e1728e8df5e4b 100644
--- a/flang/unittests/Evaluate/fp-testing.cpp
+++ b/flang/lib/Testing/fp-testing.cpp
@@ -1,4 +1,12 @@
-#include "fp-testing.h"
+//===-- lib/Testing/fp-testing.cpp --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "flang/Testing/fp-testing.h"
 #include "llvm/Support/Errno.h"
 #include 
 #include 
diff --git a/flang/unittests/Evaluate/testing.cpp 
b/flang/lib/Testing/testing.cpp
similarity index 88%
rename from flang/unittests/Evaluate/

[llvm-branch-commits] [flang] [Flang] Optionally do not compile the runtime in-tree (PR #122336)

2025-01-23 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur updated 
https://github.com/llvm/llvm-project/pull/122336

>From dd3ac2e6d8d8d57cd639c25bea3b8d5c99a2f81e Mon Sep 17 00:00:00 2001
From: Michael Kruse 
Date: Thu, 9 Jan 2025 15:58:48 +0100
Subject: [PATCH 1/9] Introduce FLANG_INCLUDE_RUNTIME

---
 flang/CMakeLists.txt|  7 +++-
 flang/test/CMakeLists.txt   |  6 +++-
 flang/test/Driver/ctofortran.f90|  1 +
 flang/test/Driver/exec.f90  |  1 +
 flang/test/Runtime/no-cpp-dep.c |  2 +-
 flang/test/lit.cfg.py   |  5 ++-
 flang/test/lit.site.cfg.py.in   |  1 +
 flang/tools/f18/CMakeLists.txt  |  4 +--
 flang/unittests/CMakeLists.txt  |  6 ++--
 flang/unittests/Evaluate/CMakeLists.txt | 46 ++---
 10 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 68947eaa9c9bd7..69e963a43d0b97 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -233,6 +233,9 @@ else()
   include_directories(SYSTEM ${MLIR_TABLEGEN_OUTPUT_DIR})
 endif()
 
+option(FLANG_INCLUDE_RUNTIME "Build the runtime in-tree (deprecated; to be 
replaced with LLVM_ENABLE_RUNTIMES=flang-rt)" ON)
+pythonize_bool(FLANG_INCLUDE_RUNTIME)
+
 set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH
 "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')")
 mark_as_advanced(FLANG_TOOLS_INSTALL_DIR)
@@ -473,7 +476,9 @@ if (FLANG_CUF_RUNTIME)
   find_package(CUDAToolkit REQUIRED)
 endif()
 
-add_subdirectory(runtime)
+if (FLANG_INCLUDE_RUNTIME)
+  add_subdirectory(runtime)
+endif ()
 
 if (LLVM_INCLUDE_EXAMPLES)
   add_subdirectory(examples)
diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt
index cab214c2ef4c8c..e398e0786147aa 100644
--- a/flang/test/CMakeLists.txt
+++ b/flang/test/CMakeLists.txt
@@ -71,9 +71,13 @@ set(FLANG_TEST_DEPENDS
   llvm-objdump
   llvm-readobj
   split-file
-  FortranRuntime
   FortranDecimal
 )
+
+if (FLANG_INCLUDE_RUNTIME)
+  list(APPEND FLANG_TEST_DEPENDS FortranRuntime)
+endif ()
+
 if (LLVM_ENABLE_PLUGINS AND NOT WIN32)
   list(APPEND FLANG_TEST_DEPENDS Bye)
 endif()
diff --git a/flang/test/Driver/ctofortran.f90 b/flang/test/Driver/ctofortran.f90
index 78eac32133b18e..10c7adaccc9588 100644
--- a/flang/test/Driver/ctofortran.f90
+++ b/flang/test/Driver/ctofortran.f90
@@ -1,4 +1,5 @@
 ! UNSUPPORTED: system-windows
+! REQUIRES: flang-rt
 ! RUN: split-file %s %t
 ! RUN: chmod +x %t/runtest.sh
 ! RUN: %t/runtest.sh %t %t/ffile.f90 %t/cfile.c %flang | FileCheck %s
diff --git a/flang/test/Driver/exec.f90 b/flang/test/Driver/exec.f90
index fd174005ddf62a..9ca91ee24011c9 100644
--- a/flang/test/Driver/exec.f90
+++ b/flang/test/Driver/exec.f90
@@ -1,4 +1,5 @@
 ! UNSUPPORTED: system-windows
+! REQUIRES: flang-rt
 ! Verify that flang can correctly build executables.
 
 ! RUN: %flang %s -o %t
diff --git a/flang/test/Runtime/no-cpp-dep.c b/flang/test/Runtime/no-cpp-dep.c
index b1a5fa004014cc..7303ce63fdec41 100644
--- a/flang/test/Runtime/no-cpp-dep.c
+++ b/flang/test/Runtime/no-cpp-dep.c
@@ -3,7 +3,7 @@ This test makes sure that flang's runtime does not depend on 
the C++ runtime
 library. It tries to link this simple file against libFortranRuntime.a with
 a C compiler.
 
-REQUIRES: c-compiler
+REQUIRES: c-compiler, flang-rt
 
 RUN: %if system-aix %{ export OBJECT_MODE=64 %}
 RUN: %cc -std=c99 %s -I%include %libruntime -lm  \
diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py
index c452b6d231c89f..78378bf5f413e8 100644
--- a/flang/test/lit.cfg.py
+++ b/flang/test/lit.cfg.py
@@ -163,10 +163,13 @@
 ToolSubst("%not_todo_abort_cmd", command=FindTool("not"), 
unresolved="fatal")
 )
 
+if config.flang_include_runtime:
+  config.available_features.add("flang-rt")
+
 # Define some variables to help us test that the flang runtime doesn't depend 
on
 # the C++ runtime libraries. For this we need a C compiler. If for some reason
 # we don't have one, we can just disable the test.
-if config.cc:
+if config.flang_include_runtime and config.cc:
 libruntime = os.path.join(config.flang_lib_dir, "libFortranRuntime.a")
 include = os.path.join(config.flang_src_dir, "include")
 
diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in
index d1a0ac763cf8a0..19f9330f93ae14 100644
--- a/flang/test/lit.site.cfg.py.in
+++ b/flang/test/lit.site.cfg.py.in
@@ -32,6 +32,7 @@ else:
 config.openmp_module_dir = None
 config.flang_runtime_f128_math_lib = "@FLANG_RUNTIME_F128_MATH_LIB@"
 config.have_ldbl_mant_dig_113 = "@HAVE_LDBL_MANT_DIG_113@"
+config.flang_include_runtime = @FLANG_INCLUDE_RUNTIME@
 
 import lit.llvm
 lit.llvm.initialize(lit_config, config)
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 4362fcf0537616..022c346aabdbde 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -72,7 +72,7 @@ if (NOT CMAKE_CROSSCOMPILING)
   set(depends ${FLANG_

[llvm-branch-commits] [flang] [Flang] Remove FLANG_INCLUDE_RUNTIME (PR #124126)

2025-01-23 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur created 
https://github.com/llvm/llvm-project/pull/124126

Remove the FLANG_INCLUDE_RUNTIME option which was replaced by 
LLVM_ENABLE_RUNTIMES=flang-rt.

This PR does not (yet) include adding `LLVM_ENABLE_RUNTIMES=flang-rt` 
implicitly. CMake command lines must be updated to get a working Fortran 
toolchain.

>From bd152c56900698f727c8ece9889a03ec693c13ad Mon Sep 17 00:00:00 2001
From: Michael Kruse 
Date: Wed, 22 Jan 2025 20:45:26 +0100
Subject: [PATCH] [Flang] Remove FLANG_INCLUDE_RUNTIME

---
 flang/CMakeLists.txt  |  25 +-
 .../modules/AddFlangOffloadRuntime.cmake  | 146 
 flang/runtime/CMakeLists.txt  | 350 --
 flang/runtime/CUDA/CMakeLists.txt |  41 --
 flang/runtime/Float128Math/CMakeLists.txt | 133 ---
 flang/test/CMakeLists.txt |  10 -
 flang/test/lit.cfg.py |   3 -
 flang/test/lit.site.cfg.py.in |   1 -
 flang/tools/f18/CMakeLists.txt|  17 +-
 flang/unittests/CMakeLists.txt|  43 +--
 flang/unittests/Evaluate/CMakeLists.txt   |  16 -
 11 files changed, 5 insertions(+), 780 deletions(-)
 delete mode 100644 flang/cmake/modules/AddFlangOffloadRuntime.cmake
 delete mode 100644 flang/runtime/CMakeLists.txt
 delete mode 100644 flang/runtime/CUDA/CMakeLists.txt
 delete mode 100644 flang/runtime/Float128Math/CMakeLists.txt

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 38004c149b7835..aceb2d09c54388 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -23,7 +23,6 @@ if (LLVM_ENABLE_EH)
 endif()
 
 set(FLANG_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
-set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang-rt")
 
 if (CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE)
   message(FATAL_ERROR "In-source builds are not allowed. \
@@ -237,24 +236,8 @@ else()
   include_directories(SYSTEM ${MLIR_TABLEGEN_OUTPUT_DIR})
 endif()
 
-set(FLANG_INCLUDE_RUNTIME_default ON)
-if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES)
-  set(FLANG_INCLUDE_RUNTIME_default OFF)
-endif ()
-option(FLANG_INCLUDE_RUNTIME "Build the runtime in-tree (deprecated; to be 
replaced with LLVM_ENABLE_RUNTIMES=flang-rt)" FLANG_INCLUDE_RUNTIME_default)
-if (FLANG_INCLUDE_RUNTIME)
-  if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES)
-message(WARNING "Building Flang-RT using LLVM_ENABLE_RUNTIMES. 
FLANG_INCLUDE_RUNTIME=${FLANG_INCLUDE_RUNTIME} ignored.")
-set(FLANG_INCLUDE_RUNTIME OFF)
-  else ()
- message(STATUS "Building flang_rt in-tree")
-  endif ()
-else ()
-  if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES)
-message(STATUS "Building Flang-RT using LLVM_ENABLE_RUNTIMES.")
-  else ()
-message(STATUS "Not building Flang-RT. For a usable Fortran toolchain, 
compile a standalone Flang-RT")
-  endif ()
+if (NOT "flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES)
+  message(STATUS "Not building Flang-RT. For a usable Fortran toolchain, 
compile a standalone Flang-RT")
 endif ()
 
 set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH
@@ -484,10 +467,6 @@ if (FLANG_CUF_RUNTIME)
   find_package(CUDAToolkit REQUIRED)
 endif()
 
-if (FLANG_INCLUDE_RUNTIME)
-  add_subdirectory(runtime)
-endif ()
-
 if (LLVM_INCLUDE_EXAMPLES)
   add_subdirectory(examples)
 endif()
diff --git a/flang/cmake/modules/AddFlangOffloadRuntime.cmake 
b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
deleted file mode 100644
index 8e4f47d18535dc..00
--- a/flang/cmake/modules/AddFlangOffloadRuntime.cmake
+++ /dev/null
@@ -1,146 +0,0 @@
-option(FLANG_EXPERIMENTAL_CUDA_RUNTIME
-  "Compile Fortran runtime as CUDA sources (experimental)" OFF
-  )
-
-option(FLANG_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS
-  "Do not compile global variables' definitions when producing PTX library" OFF
-  )
-
-set(FLANG_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation")
-
-set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING
-  "Compile Fortran runtime as OpenMP target offload sources (experimental). 
Valid options are 'off', 'host_device', 'nohost'")
-
-set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING
-  "List of OpenMP device architectures to be used to compile the Fortran 
runtime (e.g. 'gfx1103;sm_90')")
-
-macro(enable_cuda_compilation name files)
-  if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
-if (BUILD_SHARED_LIBS)
-  message(FATAL_ERROR
-"BUILD_SHARED_LIBS is not supported for CUDA build of Fortran runtime"
-)
-endif()
-
-enable_language(CUDA)
-
-# TODO: figure out how to make target property CUDA_SEPARABLE_COMPILATION
-# work, and avoid setting CMAKE_CUDA_SEPARABLE_COMPILATION.
-set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
-
-# Treat all supported sources as CUDA files.
-set_source_files_properties(${files} PROPERTIES LANGUAGE CUDA)
-set(CUDA_COMPILE_OPTIONS)
-if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "Clang")
-  # Allow varargs.
-  set(CUDA_COMP

[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Tom Eccles via llvm-branch-commits


@@ -55,15 +55,19 @@ class MapsForPrivatizedSymbolsPass
 std::underlying_type_t>(
 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO);
 Operation *definingOp = var.getDefiningOp();
-auto declOp = llvm::dyn_cast_or_null(definingOp);
-assert(declOp &&
-   "Expected defining Op of privatized var to be hlfir.declare");
+assert(definingOp &&
+   "Privatizing a block argument without any hlfir.declare");

tblah wrote:

MLIR values can come from two places:
1. the result of an operation
2. a block argument

We can't assume that getting the defining operation produces a non-null result 
because the value might be a block argument.

For example,
```
func.func @func(%arg0 : !type0, %arg1 : !type1) {
  %0 = hlfir.declare %arg0
  omp.private(%0, %arg1)
}
```
Here we can get a defining operation for `%0` because it is the `hlfir.declare` 
for that function argument (which is the normal way flang would lower a 
function argument). `%arg1` has no defining operation because it is a block 
argument.

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah edited https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 8573475 - Revert "[InstCombine] Teach foldSelectOpOp about samesign (#122723)"

2025-01-23 Thread via llvm-branch-commits

Author: Alexander Kornienko
Date: 2025-01-23T15:40:05+01:00
New Revision: 8573475d5cc1c40679a86d6992a42e5c564a1f23

URL: 
https://github.com/llvm/llvm-project/commit/8573475d5cc1c40679a86d6992a42e5c564a1f23
DIFF: 
https://github.com/llvm/llvm-project/commit/8573475d5cc1c40679a86d6992a42e5c564a1f23.diff

LOG: Revert "[InstCombine] Teach foldSelectOpOp about samesign (#122723)"

This reverts commit 48757e02ba2c1651c268351d062f80923baceda4.

Added: 


Modified: 
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
llvm/test/Transforms/InstCombine/select-cmp.ll

Removed: 




diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index d5d9a829c3068a..f66a976ccb47fe 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -428,10 +428,10 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst 
&SI, Instruction *TI,
 CmpPredicate TPred, FPred;
 if (match(TI, m_ICmp(TPred, m_Value(), m_Value())) &&
 match(FI, m_ICmp(FPred, m_Value(), m_Value( {
-  bool Swapped = ICmpInst::isRelational(FPred) &&
- CmpPredicate::getMatching(
- TPred, ICmpInst::getSwappedCmpPredicate(FPred));
-  if (CmpPredicate::getMatching(TPred, FPred) || Swapped) {
+  // FIXME: Use CmpPredicate::getMatching here.
+  CmpInst::Predicate T = TPred, F = FPred;
+  if (T == F || T == ICmpInst::getSwappedCmpPredicate(F)) {
+bool Swapped = T != F;
 if (Value *MatchOp =
 getCommonOp(TI, FI, ICmpInst::isEquality(TPred), Swapped)) {
   Value *NewSel = Builder.CreateSelect(Cond, OtherOpT, OtherOpF,

diff  --git a/llvm/test/Transforms/InstCombine/select-cmp.ll 
b/llvm/test/Transforms/InstCombine/select-cmp.ll
index 7e5d5821d9f6a7..f7505bd85f89eb 100644
--- a/llvm/test/Transforms/InstCombine/select-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/select-cmp.ll
@@ -23,18 +23,6 @@ define i1 @icmp_ne_common_op00(i1 %c, i6 %x, i6 %y, i6 %z) {
   ret i1 %r
 }
 
-define i1 @icmp_ne_samesign_common(i1 %c, i6 %x, i6 %y, i6 %z) {
-; CHECK-LABEL: @icmp_ne_samesign_common(
-; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i6 [[Y:%.*]], i6 [[Z:%.*]]
-; CHECK-NEXT:[[R:%.*]] = icmp ne i6 [[X:%.*]], [[R_V]]
-; CHECK-NEXT:ret i1 [[R]]
-;
-  %cmp1 = icmp samesign ne i6 %x, %y
-  %cmp2 = icmp ne i6 %x, %z
-  %r = select i1 %c, i1 %cmp1, i1 %cmp2
-  ret i1 %r
-}
-
 define i1 @icmp_ne_common_op01(i1 %c, i3 %x, i3 %y, i3 %z) {
 ; CHECK-LABEL: @icmp_ne_common_op01(
 ; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i3 [[Y:%.*]], i3 [[Z:%.*]]
@@ -83,18 +71,6 @@ define i1 @icmp_eq_common_op00(i1 %c, i5 %x, i5 %y, i5 %z) {
   ret i1 %r
 }
 
-define i1 @icmp_eq_samesign_common(i1 %c, i5 %x, i5 %y, i5 %z) {
-; CHECK-LABEL: @icmp_eq_samesign_common(
-; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i5 [[Y:%.*]], i5 [[Z:%.*]]
-; CHECK-NEXT:[[R:%.*]] = icmp eq i5 [[X:%.*]], [[R_V]]
-; CHECK-NEXT:ret i1 [[R]]
-;
-  %cmp1 = icmp eq i5 %x, %y
-  %cmp2 = icmp samesign eq i5 %x, %z
-  %r = select i1 %c, i1 %cmp1, i1 %cmp2
-  ret i1 %r
-}
-
 define <5 x i1> @icmp_eq_common_op01(<5 x i1> %c, <5 x i7> %x, <5 x i7> %y, <5 
x i7> %z) {
 ; CHECK-LABEL: @icmp_eq_common_op01(
 ; CHECK-NEXT:[[R_V:%.*]] = select <5 x i1> [[C:%.*]], <5 x i7> [[Y:%.*]], 
<5 x i7> [[Z:%.*]]
@@ -158,18 +134,6 @@ define i1 @icmp_slt_common(i1 %c, i6 %x, i6 %y, i6 %z) {
   ret i1 %r
 }
 
-define i1 @icmp_slt_samesign_common(i1 %c, i6 %x, i6 %y, i6 %z) {
-; CHECK-LABEL: @icmp_slt_samesign_common(
-; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i6 [[Y:%.*]], i6 [[Z:%.*]]
-; CHECK-NEXT:[[R:%.*]] = icmp ult i6 [[X:%.*]], [[R_V]]
-; CHECK-NEXT:ret i1 [[R]]
-;
-  %cmp1 = icmp samesign ult i6 %x, %y
-  %cmp2 = icmp slt i6 %x, %z
-  %r = select i1 %c, i1 %cmp1, i1 %cmp2
-  ret i1 %r
-}
-
 define i1 @icmp_sgt_common(i1 %c, i6 %x, i6 %y, i6 %z) {
 ; CHECK-LABEL: @icmp_sgt_common(
 ; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i6 [[Y:%.*]], i6 [[Z:%.*]]
@@ -182,18 +146,6 @@ define i1 @icmp_sgt_common(i1 %c, i6 %x, i6 %y, i6 %z) {
   ret i1 %r
 }
 
-define i1 @icmp_sgt_samesign_common(i1 %c, i6 %x, i6 %y, i6 %z) {
-; CHECK-LABEL: @icmp_sgt_samesign_common(
-; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i6 [[Y:%.*]], i6 [[Z:%.*]]
-; CHECK-NEXT:[[R:%.*]] = icmp ugt i6 [[X:%.*]], [[R_V]]
-; CHECK-NEXT:ret i1 [[R]]
-;
-  %cmp1 = icmp samesign ugt i6 %x, %y
-  %cmp2 = icmp sgt i6 %x, %z
-  %r = select i1 %c, i1 %cmp1, i1 %cmp2
-  ret i1 %r
-}
-
 define i1 @icmp_sle_common(i1 %c, i6 %x, i6 %y, i6 %z) {
 ; CHECK-LABEL: @icmp_sle_common(
 ; CHECK-NEXT:[[R_V:%.*]] = select i1 [[C:%.*]], i6 [[Y:%.*]], i6 [[Z:%.*]]
@@ -206,18 +158,6 @@ define i1 @icmp_sle_common(i1 %c, i6 %x, i6 %y, i6 %z) {
   ret i1 %r
 }
 
-define i1 @icm

[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)

2025-01-23 Thread Nathan Gauër via llvm-branch-commits


@@ -1,7 +1,14 @@
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s \
+// RUN: %clang_cc1 -finclude-default-header -triple 
dxil-pc-shadermodel6.3-library %s \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
+// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-library %s 
\
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK: @a = external addrspace(2) externally_initialized global float, 
align 4
+// CHECK: @b = external addrspace(2) externally_initialized global double, 
align 8
+// CHECK: @c = external addrspace(2) externally_initialized global float, 
align 4
+// CHECK: @d = external addrspace(2) externally_initialized global double, 
align 8
+
 // CHECK: @[[CB:.+]] = external constant { float, double }

Keenuts wrote:

Ok, so this `CB` will be replaced by the CBV, and the actual globals will be 
deleted in favor of the intrinsic if I understand, thanks!

https://github.com/llvm/llvm-project/pull/123411
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Rework update of waves per eu (PR #123995)

2025-01-23 Thread Shilei Tian via llvm-branch-commits


@@ -1109,74 +1109,38 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute 
{
 Function *F = getAssociatedFunction();
 auto &InfoCache = static_cast(A.getInfoCache());
 
-auto TakeRange = [&](std::pair R) {
-  auto [Min, Max] = R;
-  ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
-  IntegerRangeState RangeState(Range);
-  clampStateAndIndicateChange(this->getState(), RangeState);
-  indicateOptimisticFixpoint();
-};
-
-std::pair MaxWavesPerEURange{
-1U, InfoCache.getMaxWavesPerEU(*F)};
-
 // If the attribute exists, we will honor it if it is not the default.
 if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
+  std::pair MaxWavesPerEURange{
+  1U, InfoCache.getMaxWavesPerEU(*F)};
   if (*Attr != MaxWavesPerEURange) {
-TakeRange(*Attr);
+auto [Min, Max] = *Attr;
+ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
+IntegerRangeState RangeState(Range);
+clampStateAndIndicateChange(this->getState(), RangeState);
+indicateOptimisticFixpoint();
 return;
   }
 }
 
-// Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the
-// calculation of waves per EU involves flat work group size, we can't
-// simply use an assumed flat work group size as a start point, because the
-// update of flat work group size is in an inverse direction of waves per
-// EU. However, we can still do something if it is an entry function. Since
-// an entry function is a terminal node, and flat work group size either
-// from attribute or default will be used anyway, we can take that value 
and
-// calculate the waves per EU based on it. This result can't be updated by
-// no means, but that could still allow us to propagate it.
-if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
-  std::pair FlatWorkGroupSize;
-  if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F))
-FlatWorkGroupSize = *Attr;
-  else
-FlatWorkGroupSize = InfoCache.getDefaultFlatWorkGroupSize(*F);
-  TakeRange(InfoCache.getEffectiveWavesPerEU(*F, MaxWavesPerEURange,
- FlatWorkGroupSize));
-}
+if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
+  indicatePessimisticFixpoint();
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
-auto &InfoCache = static_cast(A.getInfoCache());
 ChangeStatus Change = ChangeStatus::UNCHANGED;
 
 auto CheckCallSite = [&](AbstractCallSite CS) {
   Function *Caller = CS.getInstruction()->getFunction();
-  Function *Func = getAssociatedFunction();
-  LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
-<< "->" << Func->getName() << '\n');
-
   const auto *CallerInfo = A.getAAFor(
   *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
-  const auto *AssumedGroupSize = A.getAAFor(
-  *this, IRPosition::function(*Func), DepClassTy::REQUIRED);
-  if (!CallerInfo || !AssumedGroupSize || !CallerInfo->isValidState() ||
-  !AssumedGroupSize->isValidState())
+  if (!CallerInfo || !CallerInfo->isValidState())
 return false;
-
-  unsigned Min, Max;
-  std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
-  *Caller,
-  {CallerInfo->getAssumed().getLower().getZExtValue(),
-   CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
-  {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
-   AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
-  ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
+  unsigned Min = CallerInfo->getAssumed().getLower().getZExtValue();
+  unsigned Max = CallerInfo->getAssumed().getUpper().getZExtValue();
+  ConstantRange CallerRange(APInt(32, Min), APInt(32, Max));
   IntegerRangeState CallerRangeState(CallerRange);
   Change |= clampStateAndIndicateChange(this->getState(), 
CallerRangeState);

shiltian wrote:

@arsenm did I get this part correctly?

https://github.com/llvm/llvm-project/pull/123995
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Rework update of waves per eu (PR #123995)

2025-01-23 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/123995
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [llvm] [Flang-RT] Build libflang_rt.so (PR #121782)

2025-01-23 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur updated 
https://github.com/llvm/llvm-project/pull/121782

>From a3037ab5557dcc4a4deb5bb40f801ca9770e3854 Mon Sep 17 00:00:00 2001
From: Michael Kruse 
Date: Mon, 6 Jan 2025 16:44:08 +0100
Subject: [PATCH 1/7] Add FLANG_RT_ENABLE_STATIC and FLANG_RT_ENABLE_SHARED

---
 flang-rt/CMakeLists.txt   |  30 ++
 flang-rt/cmake/modules/AddFlangRT.cmake   | 291 --
 .../cmake/modules/AddFlangRTOffload.cmake |   8 +-
 flang-rt/cmake/modules/GetToolchainDirs.cmake | 254 +++
 flang-rt/lib/flang_rt/CMakeLists.txt  |  20 +-
 flang-rt/test/CMakeLists.txt  |   2 +-
 flang-rt/test/lit.cfg.py  |   2 +-
 7 files changed, 366 insertions(+), 241 deletions(-)

diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt
index 7b3d22e454a108..7effa6012a078f 100644
--- a/flang-rt/CMakeLists.txt
+++ b/flang-rt/CMakeLists.txt
@@ -113,6 +113,15 @@ cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR)
 cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH)
 
 # Determine subdirectories for build output and install destinations.
+# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good
+#destination because it is not a ld.so default search path.
+#The machine where the executable is eventually executed may not be the
+#machine where the Flang compiler and its resource dir is installed, so
+#setting RPath by the driver is not an solution. It should belong into
+#/usr/lib//libflang_rt.so, like e.g. libgcc_s.so.
+#But the linker as invoked by the Flang driver also requires
+#libflang_rt.so to be found when linking and the resource lib dir is
+#the only reliable location.
 get_toolchain_library_subdir(toolchain_lib_subdir)
 extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" 
"${toolchain_lib_subdir}")
 extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH 
"${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}")
@@ -130,6 +139,27 @@ cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH)
 option(FLANG_RT_INCLUDE_TESTS "Generate build targets for the flang-rt unit 
and regression-tests." "${LLVM_INCLUDE_TESTS}")
 
 
+option(FLANG_RT_ENABLE_STATIC "Build Flang-RT as a static library." ON)
+if (WIN32)
+  # Windows DLL currently not implemented.
+  set(FLANG_RT_ENABLE_SHARED OFF)
+else ()
+  # TODO: Enable by default to increase test coverage, and which version of the
+  #   library should be the user's choice anyway.
+  #   Currently, the Flang driver adds `-L"libdir" -lflang_rt` as linker
+  #   argument, which leaves the choice which library to use to the linker.
+  #   Since most linkers prefer the shared library, this would constitute a
+  #   breaking change unless the driver is changed.
+  option(FLANG_RT_ENABLE_SHARED "Build Flang-RT as a shared library." OFF)
+endif ()
+if (NOT FLANG_RT_ENABLE_STATIC AND NOT FLANG_RT_ENABLE_SHARED)
+  message(FATAL_ERROR "
+  Must build at least one type of library
+  (FLANG_RT_ENABLE_STATIC=ON, FLANG_RT_ENABLE_SHARED=ON, or both)
+")
+endif ()
+
+
 set(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT "" CACHE STRING "Compile Flang-RT 
with GPU support (CUDA or OpenMP)")
 set_property(CACHE FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT PROPERTY STRINGS
 ""
diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake 
b/flang-rt/cmake/modules/AddFlangRT.cmake
index 1f8b5111433825..5f493a80c35f20 100644
--- a/flang-rt/cmake/modules/AddFlangRT.cmake
+++ b/flang-rt/cmake/modules/AddFlangRT.cmake
@@ -16,7 +16,8 @@
 #   STATIC
 # Build a static (.a/.lib) library
 #   OBJECT
-# Create only object files without static/dynamic library
+# Always create an object library.
+# Without SHARED/STATIC, build only the object library.
 #   INSTALL_WITH_TOOLCHAIN
 # Install library into Clang's resource directory so it can be found by the
 # Flang driver during compilation, including tests
@@ -44,17 +45,73 @@ function (add_flangrt_library name)
   ")
   endif ()
 
-  # Forward libtype to add_library
-  set(extra_args "")
-  if (ARG_SHARED)
-list(APPEND extra_args SHARED)
+  # Internal names of libraries. If called with just single type option, use
+  # the default name for it. Name of targets must only depend on function
+  # arguments to be predictable for callers.
+  set(name_static "${name}.static")
+  set(name_shared "${name}.shared")
+  set(name_object "obj.${name}")
+  if (ARG_STATIC AND NOT ARG_SHARED)
+set(name_static "${name}")
+  elseif (NOT ARG_STATIC AND ARG_SHARED)
+set(name_shared "${name}")
+  elseif (NOT ARG_STATIC AND NOT ARG_SHARED AND ARG_OBJECT)
+set(name_object "${name}")
+  elseif (NOT ARG_STATIC AND NOT ARG_SHARED AND NOT ARG_OBJECT)
+# Only one of them will actually be built.
+set(name_static "${name}")
+set(name_shared "${name}")
+  endif ()
+
+  # Determine what to build. If not explicitly

[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Tom Eccles via llvm-branch-commits


@@ -468,15 +505,49 @@ void DataSharingProcessor::doPrivatize(const 
semantics::Symbol *sym,
   lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
   assert(hsb && "Host symbol box not found");
 
-  mlir::Type symType = hsb.getAddr().getType();
+  mlir::Value privVal = hsb.getAddr();
+  mlir::Type allocType;
+  if (mlir::isa(privVal.getType()))
+allocType = privVal.getType();
+  else
+allocType = fir::unwrapRefType(privVal.getType());
+
   mlir::Location symLoc = hsb.getAddr().getLoc();
   std::string privatizerName = sym->name().ToString() + ".privatizer";
   bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate);
 
+  if (auto poly = mlir::dyn_cast(allocType)) {
+if (!mlir::isa(poly.getEleTy()) && isFirstPrivate)
+  TODO(symLoc, "create polymorphic host associated copy");
+  }

tblah wrote:

Done in 
[7cb83e3](https://github.com/llvm/llvm-project/pull/124019/commits/7cb83e335cfd25ab750e8c15c951dfb9198ac7a1)

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP][flang] make private variable allocation implicit in omp.private (PR #124019)

2025-01-23 Thread Tom Eccles via llvm-branch-commits


@@ -488,44 +559,34 @@ void DataSharingProcessor::doPrivatize(const 
semantics::Symbol *sym,
 mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
 firOpBuilder.setInsertionPointToStart(moduleOp.getBody());
 auto result = firOpBuilder.create(
-symLoc, uniquePrivatizerName, symType,
+symLoc, uniquePrivatizerName, allocType,
 isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
: mlir::omp::DataSharingClauseType::Private);
 fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
 lower::SymMapScope outerScope(symTable);
 
-// Populate the `alloc` region.
-{
-  mlir::Region &allocRegion = result.getAllocRegion();
-  mlir::Block *allocEntryBlock = firOpBuilder.createBlock(
-  &allocRegion, /*insertPt=*/{}, symType, symLoc);
-
-  firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
-
-  fir::ExtendedValue localExV =
-  hlfir::translateToExtendedValue(
-  symLoc, firOpBuilder, hlfir::Entity{allocRegion.getArgument(0)},
-  /*contiguousHint=*/
-  evaluate::IsSimplyContiguous(*sym, 
converter.getFoldingContext()))
-  .first;
-
-  symTable.addSymbol(*sym, localExV);
-  lower::SymMapScope innerScope(symTable);
-  cloneSymbol(sym);
-  mlir::Value cloneAddr = symTable.shallowLookupSymbol(*sym).getAddr();
-  mlir::Type cloneType = cloneAddr.getType();
-
-  // A `convert` op is required for variables that are storage associated
-  // via `equivalence`. The problem is that these variables are declared as
-  // `fir.ptr`s while their privatized storage is declared as `fir.ref`,
-  // therefore we convert to proper symbol type.
-  mlir::Value yieldedValue =
-  (symType == cloneType) ? cloneAddr
- : firOpBuilder.createConvert(
-   cloneAddr.getLoc(), symType, cloneAddr);
-
-  firOpBuilder.create(hsb.getAddr().getLoc(),
-  yieldedValue);
+// Populate the `init` region.
+const bool needsInitialization =

tblah wrote:

Done in 
[7cb83e3](https://github.com/llvm/llvm-project/pull/124019/commits/7cb83e335cfd25ab750e8c15c951dfb9198ac7a1)

https://github.com/llvm/llvm-project/pull/124019
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Parse METADIRECTIVE in specification part (PR #123397)

2025-01-23 Thread Kiran Chandramohan via llvm-branch-commits

https://github.com/kiranchandramohan approved this pull request.

LG.

The specification part has to be emitted in module files. But this is not 
necessary for producing the TODOs.

https://github.com/llvm/llvm-project/pull/123397
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [PassBuilder][CodeGen] Add callback style pass buider (PR #116913)

2025-01-23 Thread via llvm-branch-commits

paperchalice wrote:

> Will new codegen support disabling individual passes? Instead of having 
> separate arguments like `-disable-machine-sink` we could do 
> `-disable-passes=machine-sink`.

I created #76714, but disabling arbitrary passes is not we expect. Maybe we 
could add an allowlist as a compromise...

https://github.com/llvm/llvm-project/pull/116913
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 396698f - Revert "Revert "[LoopVectorizer] Add support for chaining partial reductions …"

2025-01-23 Thread via llvm-branch-commits

Author: Vitaly Buka
Date: 2025-01-23T14:00:43-08:00
New Revision: 396698f10d6c3b5433320a28c314d33c6356ff03

URL: 
https://github.com/llvm/llvm-project/commit/396698f10d6c3b5433320a28c314d33c6356ff03
DIFF: 
https://github.com/llvm/llvm-project/commit/396698f10d6c3b5433320a28c314d33c6356ff03.diff

LOG: Revert "Revert "[LoopVectorizer] Add support for chaining partial 
reductions …"

This reverts commit 0e213834df114484ca9525c0e60522b40ecf24e8.

Added: 
llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll

Modified: 
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
llvm/lib/Transforms/Vectorize/VPlan.h

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7167e2179af535..dec7a87ba9c50b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8682,12 +8682,12 @@ VPReplicateRecipe 
*VPRecipeBuilder::handleReplication(Instruction *I,
 /// are valid so recipes can be formed later.
 void VPRecipeBuilder::collectScaledReductions(VFRange &Range) {
   // Find all possible partial reductions.
-  SmallVector, 1>
+  SmallVector>
   PartialReductionChains;
-  for (const auto &[Phi, RdxDesc] : Legal->getReductionVars())
-if (std::optional> Pair =
-getScaledReduction(Phi, RdxDesc, Range))
-  PartialReductionChains.push_back(*Pair);
+  for (const auto &[Phi, RdxDesc] : Legal->getReductionVars()) {
+if (auto SR = getScaledReduction(Phi, RdxDesc.getLoopExitInstr(), Range))
+  PartialReductionChains.append(*SR);
+  }
 
   // A partial reduction is invalid if any of its extends are used by
   // something that isn't another partial reduction. This is because the
@@ -8715,26 +8715,44 @@ void VPRecipeBuilder::collectScaledReductions(VFRange 
&Range) {
   }
 }
 
-std::optional>
-VPRecipeBuilder::getScaledReduction(PHINode *PHI,
-const RecurrenceDescriptor &Rdx,
+std::optional>>
+VPRecipeBuilder::getScaledReduction(Instruction *PHI, Instruction 
*RdxExitInstr,
 VFRange &Range) {
+
+  if (!CM.TheLoop->contains(RdxExitInstr))
+return std::nullopt;
+
   // TODO: Allow scaling reductions when predicating. The select at
   // the end of the loop chooses between the phi value and most recent
   // reduction result, both of which have 
diff erent VFs to the active lane
   // mask when scaling.
-  if 
(CM.blockNeedsPredicationForAnyReason(Rdx.getLoopExitInstr()->getParent()))
+  if (CM.blockNeedsPredicationForAnyReason(RdxExitInstr->getParent()))
 return std::nullopt;
 
-  auto *Update = dyn_cast(Rdx.getLoopExitInstr());
+  auto *Update = dyn_cast(RdxExitInstr);
   if (!Update)
 return std::nullopt;
 
   Value *Op = Update->getOperand(0);
   Value *PhiOp = Update->getOperand(1);
-  if (Op == PHI) {
-Op = Update->getOperand(1);
-PhiOp = Update->getOperand(0);
+  if (Op == PHI)
+std::swap(Op, PhiOp);
+
+  SmallVector> Chains;
+
+  // Try and get a scaled reduction from the first non-phi operand.
+  // If one is found, we use the discovered reduction instruction in
+  // place of the accumulator for costing.
+  if (auto *OpInst = dyn_cast(Op)) {
+if (auto SR0 = getScaledReduction(PHI, OpInst, Range)) {
+  Chains.append(*SR0);
+  PHI = SR0->rbegin()->first.Reduction;
+
+  Op = Update->getOperand(0);
+  PhiOp = Update->getOperand(1);
+  if (Op == PHI)
+std::swap(Op, PhiOp);
+}
   }
   if (PhiOp != PHI)
 return std::nullopt;
@@ -8757,7 +8775,7 @@ VPRecipeBuilder::getScaledReduction(PHINode *PHI,
   TTI::PartialReductionExtendKind OpBExtend =
   TargetTransformInfo::getPartialReductionExtendKind(ExtB);
 
-  PartialReductionChain Chain(Rdx.getLoopExitInstr(), ExtA, ExtB, BinOp);
+  PartialReductionChain Chain(RdxExitInstr, ExtA, ExtB, BinOp);
 
   unsigned TargetScaleFactor =
   PHI->getType()->getPrimitiveSizeInBits().getKnownScalarFactor(
@@ -8772,9 +8790,9 @@ VPRecipeBuilder::getScaledReduction(PHINode *PHI,
 return Cost.isValid();
   },
   Range))
-return std::make_pair(Chain, TargetScaleFactor);
+Chains.push_back(std::make_pair(Chain, TargetScaleFactor));
 
-  return std::nullopt;
+  return Chains;
 }
 
 VPRecipeBase *
@@ -8869,12 +8887,14 @@ 
VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
  "Unexpected number of operands for partial reduction");
 
   VPValue *BinOp = Operands[0];
-  VPValue *Phi = Operands[1];
-  if (isa(BinOp->getDefiningRecipe()))
-std::swap(BinOp, Phi);
-
-  return new VPPartialReductionRecipe(Reduction->getOpcode(), BinOp, Phi,
-  Reduction);
+  VPValue *Accumulator = Operands[1];
+  VPRecipeBase *BinOpRecipe = BinOp->getDefiningRecipe();
+  if

[llvm-branch-commits] [llvm] [Analysis] Add DebugInfoCache analysis (PR #118629)

2025-01-23 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118629
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass (PR #118630)

2025-01-23 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118630
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)

2025-01-23 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118628
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Analysis] Add DebugInfoCache analysis (PR #118629)

2025-01-23 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118629
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] PeepholeOpt: Simplify tracking of current op for copy and reg_sequence (PR #124224)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/124224

Set the starting index in the constructor instead of treating
0 as a special case. There should also be no need for bounds
checking in the rewrite.

>From 5092973f8640de1323594a63338e20aee0a3fe89 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 23 Jan 2025 11:49:01 +0700
Subject: [PATCH] PeepholeOpt: Simplify tracking of current op for copy and
 reg_sequence

Set the starting index in the constructor instead of treating
0 as a special case. There should also be no need for bounds
checking in the rewrite.
---
 llvm/lib/CodeGen/PeepholeOptimizer.cpp | 31 +++---
 1 file changed, 8 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp 
b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index af4f2dc49b690b..2fc48209126acd 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -153,7 +153,7 @@ class RecurrenceInstr;
 class Rewriter {
 protected:
   MachineInstr &CopyLike;
-  unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten.
+  int CurrentSrcIdx = 0; ///< The index of the source being rewritten.
 public:
   Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
   virtual ~Rewriter() = default;
@@ -201,12 +201,9 @@ class CopyRewriter : public Rewriter {
 
   bool getNextRewritableSource(RegSubRegPair &Src,
RegSubRegPair &Dst) override {
-// CurrentSrcIdx > 0 means this function has already been called.
-if (CurrentSrcIdx > 0)
+if (CurrentSrcIdx++ > 1)
   return false;
-// This is the first call to getNextRewritableSource.
-// Move the CurrentSrcIdx to remember that we made that call.
-CurrentSrcIdx = 1;
+
 // The rewritable source is the argument.
 const MachineOperand &MOSrc = CopyLike.getOperand(1);
 Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
@@ -217,8 +214,6 @@ class CopyRewriter : public Rewriter {
   }
 
   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
-if (CurrentSrcIdx != 1)
-  return false;
 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
 MOSrc.setReg(NewReg);
 MOSrc.setSubReg(NewSubReg);
@@ -229,7 +224,7 @@ class CopyRewriter : public Rewriter {
 /// Helper class to rewrite uncoalescable copy like instructions
 /// into new COPY (coalescable friendly) instructions.
 class UncoalescableRewriter : public Rewriter {
-  unsigned NumDefs; ///< Number of defs in the bitcast.
+  int NumDefs; ///< Number of defs in the bitcast.
 
 public:
   UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
@@ -383,6 +378,7 @@ class RegSequenceRewriter : public Rewriter {
 public:
   RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
 assert(MI.isRegSequence() && "Invalid instruction");
+CurrentSrcIdx = -1;
   }
 
   /// \see Rewriter::getNextRewritableSource()
@@ -404,16 +400,10 @@ class RegSequenceRewriter : public Rewriter {
   bool getNextRewritableSource(RegSubRegPair &Src,
RegSubRegPair &Dst) override {
 // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
+CurrentSrcIdx += 2;
+if (static_cast(CurrentSrcIdx) >= CopyLike.getNumOperands())
+  return false;
 
-// If this is the first call, move to the first argument.
-if (CurrentSrcIdx == 0) {
-  CurrentSrcIdx = 1;
-} else {
-  // Otherwise, move to the next argument and check that it is valid.
-  CurrentSrcIdx += 2;
-  if (CurrentSrcIdx >= CopyLike.getNumOperands())
-return false;
-}
 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
 Src.Reg = MOInsertedReg.getReg();
 // If we have to compose sub-register indices, bail out.
@@ -431,11 +421,6 @@ class RegSequenceRewriter : public Rewriter {
   }
 
   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
-// We cannot rewrite out of bound operands.
-// Moreover, rewritable sources are at odd positions.
-if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
-  return false;
-
 // Do not introduce new subregister uses in a reg_sequence. Until composing
 // subregister indices is supported while folding, we're just blocking
 // folding of subregister copies later in the function.

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] PeepholeOpt: Simplify tracking of current op for copy and reg_sequence (PR #124224)

2025-01-23 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

Set the starting index in the constructor instead of treating
0 as a special case. There should also be no need for bounds
checking in the rewrite.

---
Full diff: https://github.com/llvm/llvm-project/pull/124224.diff


1 Files Affected:

- (modified) llvm/lib/CodeGen/PeepholeOptimizer.cpp (+8-23) 


``diff
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp 
b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index af4f2dc49b690b..2fc48209126acd 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -153,7 +153,7 @@ class RecurrenceInstr;
 class Rewriter {
 protected:
   MachineInstr &CopyLike;
-  unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten.
+  int CurrentSrcIdx = 0; ///< The index of the source being rewritten.
 public:
   Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
   virtual ~Rewriter() = default;
@@ -201,12 +201,9 @@ class CopyRewriter : public Rewriter {
 
   bool getNextRewritableSource(RegSubRegPair &Src,
RegSubRegPair &Dst) override {
-// CurrentSrcIdx > 0 means this function has already been called.
-if (CurrentSrcIdx > 0)
+if (CurrentSrcIdx++ > 1)
   return false;
-// This is the first call to getNextRewritableSource.
-// Move the CurrentSrcIdx to remember that we made that call.
-CurrentSrcIdx = 1;
+
 // The rewritable source is the argument.
 const MachineOperand &MOSrc = CopyLike.getOperand(1);
 Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
@@ -217,8 +214,6 @@ class CopyRewriter : public Rewriter {
   }
 
   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
-if (CurrentSrcIdx != 1)
-  return false;
 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
 MOSrc.setReg(NewReg);
 MOSrc.setSubReg(NewSubReg);
@@ -229,7 +224,7 @@ class CopyRewriter : public Rewriter {
 /// Helper class to rewrite uncoalescable copy like instructions
 /// into new COPY (coalescable friendly) instructions.
 class UncoalescableRewriter : public Rewriter {
-  unsigned NumDefs; ///< Number of defs in the bitcast.
+  int NumDefs; ///< Number of defs in the bitcast.
 
 public:
   UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
@@ -383,6 +378,7 @@ class RegSequenceRewriter : public Rewriter {
 public:
   RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
 assert(MI.isRegSequence() && "Invalid instruction");
+CurrentSrcIdx = -1;
   }
 
   /// \see Rewriter::getNextRewritableSource()
@@ -404,16 +400,10 @@ class RegSequenceRewriter : public Rewriter {
   bool getNextRewritableSource(RegSubRegPair &Src,
RegSubRegPair &Dst) override {
 // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
+CurrentSrcIdx += 2;
+if (static_cast(CurrentSrcIdx) >= CopyLike.getNumOperands())
+  return false;
 
-// If this is the first call, move to the first argument.
-if (CurrentSrcIdx == 0) {
-  CurrentSrcIdx = 1;
-} else {
-  // Otherwise, move to the next argument and check that it is valid.
-  CurrentSrcIdx += 2;
-  if (CurrentSrcIdx >= CopyLike.getNumOperands())
-return false;
-}
 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
 Src.Reg = MOInsertedReg.getReg();
 // If we have to compose sub-register indices, bail out.
@@ -431,11 +421,6 @@ class RegSequenceRewriter : public Rewriter {
   }
 
   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
-// We cannot rewrite out of bound operands.
-// Moreover, rewritable sources are at odd positions.
-if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
-  return false;
-
 // Do not introduce new subregister uses in a reg_sequence. Until composing
 // subregister indices is supported while folding, we're just blocking
 // folding of subregister copies later in the function.

``




https://github.com/llvm/llvm-project/pull/124224
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] PeepholeOpt: Simplify tracking of current op for copy and reg_sequence (PR #124224)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/124224
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] PeepholeOpt: Simplify tracking of current op for copy and reg_sequence (PR #124224)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/124224?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#124224** https://app.graphite.dev/github/pr/llvm/llvm-project/124224?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/124224?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#124111** https://app.graphite.dev/github/pr/llvm/llvm-project/124111?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/124224
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits


@@ -20,107 +20,172 @@
 
 using namespace llvm;
 
-static cl::opt Mode(
+static cl::opt Mode(
 "regalloc-enable-priority-advisor", cl::Hidden,
-cl::init(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default),
+cl::init(RegAllocPriorityAdvisorProvider::AdvisorMode::Default),
 cl::desc("Enable regalloc advisor mode"),
 cl::values(
-clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default,
+clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Default,
"default", "Default"),
-clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release,
+clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Release,
"release", "precompiled"),
-clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development,
+clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Development,
"development", "for training"),
 clEnumValN(
-RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy, "dummy",
+RegAllocPriorityAdvisorProvider::AdvisorMode::Dummy, "dummy",
 "prioritize low virtual register numbers for test and debug")));
 
-char RegAllocPriorityAdvisorAnalysis::ID = 0;
-INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysis, "regalloc-priority",
+char RegAllocPriorityAdvisorAnalysisLegacy::ID = 0;
+INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysisLegacy, "regalloc-priority",
 "Regalloc priority policy", false, true)
 
 namespace {
-class DefaultPriorityAdvisorAnalysis final
-: public RegAllocPriorityAdvisorAnalysis {
+
+class DefaultPriorityAdvisorProvider final
+: public RegAllocPriorityAdvisorProvider {
+public:
+  DefaultPriorityAdvisorProvider(bool NotAsRequested, LLVMContext &Ctx)
+  : RegAllocPriorityAdvisorProvider(AdvisorMode::Default) {
+if (NotAsRequested)
+  Ctx.emitError("Requested regalloc priority advisor analysis "
+"could be created. Using default");
+  }
+
+  // support for isa<> and dyn_cast.
+  static bool classof(const RegAllocPriorityAdvisorProvider *R) {
+return R->getAdvisorMode() == AdvisorMode::Default;
+  }
+
+  std::unique_ptr
+  getAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *SI) override {
+assert(SI && "SlotIndexes result must be set");
+return std::make_unique(MF, RA, SI);
+  }
+};
+
+class DummyPriorityAdvisorProvider final
+: public RegAllocPriorityAdvisorProvider {
+public:
+  DummyPriorityAdvisorProvider()
+  : RegAllocPriorityAdvisorProvider(AdvisorMode::Dummy) {}
+
+  static bool classof(const RegAllocPriorityAdvisorProvider *R) {
+return R->getAdvisorMode() == AdvisorMode::Dummy;
+  }
+
+  std::unique_ptr
+  getAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *SI) override {
+assert(SI && "SlotIndexes result must be set");
+return std::make_unique(MF, RA, SI);
+  }
+};
+
+class DefaultPriorityAdvisorAnalysisLegacy final
+: public RegAllocPriorityAdvisorAnalysisLegacy {
 public:
-  DefaultPriorityAdvisorAnalysis(bool NotAsRequested)
-  : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Default),
+  DefaultPriorityAdvisorAnalysisLegacy(bool NotAsRequested)
+  : RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode::Default),
 NotAsRequested(NotAsRequested) {}
 
   // support for isa<> and dyn_cast.
-  static bool classof(const RegAllocPriorityAdvisorAnalysis *R) {
+  static bool classof(const RegAllocPriorityAdvisorAnalysisLegacy *R) {
 return R->getAdvisorMode() == AdvisorMode::Default;
   }
 
 private:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
 AU.addRequired();
-RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU);
-  }
-  std::unique_ptr
-  getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
-return std::make_unique(
-MF, RA, &getAnalysis().getSI());
+RegAllocPriorityAdvisorAnalysisLegacy::getAnalysisUsage(AU);
   }
+
   bool doInitialization(Module &M) override {
-if (NotAsRequested)
-  M.getContext().emitError("Requested regalloc priority advisor analysis "
-   "could be created. Using default");
-return RegAllocPriorityAdvisorAnalysis::doInitialization(M);
+Provider.reset(
+new DefaultPriorityAdvisorProvider(NotAsRequested, M.getContext()));
+return false;
   }
+
   const bool NotAsRequested;
 };
 
 class DummyPriorityAdvisorAnalysis final
-: public RegAllocPriorityAdvisorAnalysis {
+: public RegAllocPriorityAdvisorAnalysisLegacy {
 public:
+  using RegAllocPriorityAdvisorAnalysisLegacy::AdvisorMode;
   DummyPriorityAdvisorAnalysis()
-  : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Dummy) {}
+  : RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode::Dummy) {}
 
   // support for isa<> and dyn_cast.
-  static bool classof(const RegAllocPriorityAdvisorAnalysis *R) {
+  static bool classof(const RegAl

[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits


@@ -357,3 +391,21 @@ DevelopmentModePriorityAdvisor::getPriority(const 
LiveInterval &LI) const {
 }
 
 #endif // #ifdef LLVM_HAVE_TFLITE
+
+void RegAllocPriorityAdvisorAnalysis::initializeMLProvider(
+RegAllocPriorityAdvisorProvider::AdvisorMode Mode, LLVMContext &Ctx) {
+  if (Provider)
+return;
+  switch (Mode) {
+  case RegAllocPriorityAdvisorProvider::AdvisorMode::Development:
+#if defined(LLVM_HAVE_TFLITE)
+Provider.reset(new DevelopmentModePriorityAdvisorProvider(Ctx));
+#endif
+break;
+  case RegAllocPriorityAdvisorProvider::AdvisorMode::Release:
+Provider.reset(new ReleaseModePriorityAdvisorProvider());
+break;
+  default:
+break;

arsenm wrote:

llvm_unreachable? This should be a fully covered switch 

https://github.com/llvm/llvm-project/pull/118462
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits


@@ -68,20 +70,72 @@ class DummyPriorityAdvisor : public RegAllocPriorityAdvisor 
{
   unsigned getPriority(const LiveInterval &LI) const override;
 };
 
-class RegAllocPriorityAdvisorAnalysis : public ImmutablePass {
+/// Common provider for getting the priority advisor and logging rewards.
+/// Legacy analysis forwards all calls to this provider.
+/// New analysis serves the provider as the analysis result.
+/// Expensive setup is done in the constructor, so that the advisor can be
+/// created quickly for every machine function.
+/// TODO: Remove once legacy PM support is dropped.
+class RegAllocPriorityAdvisorProvider {
 public:
   enum class AdvisorMode : int { Default, Release, Development, Dummy };
 
-  RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode)
-  : ImmutablePass(ID), Mode(Mode){};
+  RegAllocPriorityAdvisorProvider(AdvisorMode Mode) : Mode(Mode) {}
+
+  virtual ~RegAllocPriorityAdvisorProvider() = default;
+
+  virtual void logRewardIfNeeded(const MachineFunction &MF,
+ function_ref GetReward) {};
+
+  virtual std::unique_ptr
+  getAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *SI) = 0;

arsenm wrote:

Make SI a reference, it is required 

https://github.com/llvm/llvm-project/pull/118462
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits


@@ -20,107 +20,172 @@
 
 using namespace llvm;
 
-static cl::opt Mode(
+static cl::opt Mode(
 "regalloc-enable-priority-advisor", cl::Hidden,
-cl::init(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default),
+cl::init(RegAllocPriorityAdvisorProvider::AdvisorMode::Default),
 cl::desc("Enable regalloc advisor mode"),
 cl::values(
-clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default,
+clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Default,
"default", "Default"),
-clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release,
+clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Release,
"release", "precompiled"),
-clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development,
+clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Development,
"development", "for training"),
 clEnumValN(
-RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy, "dummy",
+RegAllocPriorityAdvisorProvider::AdvisorMode::Dummy, "dummy",
 "prioritize low virtual register numbers for test and debug")));
 
-char RegAllocPriorityAdvisorAnalysis::ID = 0;
-INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysis, "regalloc-priority",
+char RegAllocPriorityAdvisorAnalysisLegacy::ID = 0;
+INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysisLegacy, "regalloc-priority",
 "Regalloc priority policy", false, true)
 
 namespace {
-class DefaultPriorityAdvisorAnalysis final
-: public RegAllocPriorityAdvisorAnalysis {
+
+class DefaultPriorityAdvisorProvider final
+: public RegAllocPriorityAdvisorProvider {
+public:
+  DefaultPriorityAdvisorProvider(bool NotAsRequested, LLVMContext &Ctx)
+  : RegAllocPriorityAdvisorProvider(AdvisorMode::Default) {
+if (NotAsRequested)
+  Ctx.emitError("Requested regalloc priority advisor analysis "
+"could be created. Using default");
+  }
+
+  // support for isa<> and dyn_cast.
+  static bool classof(const RegAllocPriorityAdvisorProvider *R) {
+return R->getAdvisorMode() == AdvisorMode::Default;
+  }
+
+  std::unique_ptr
+  getAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *SI) override {
+assert(SI && "SlotIndexes result must be set");
+return std::make_unique(MF, RA, SI);
+  }
+};
+
+class DummyPriorityAdvisorProvider final
+: public RegAllocPriorityAdvisorProvider {
+public:
+  DummyPriorityAdvisorProvider()
+  : RegAllocPriorityAdvisorProvider(AdvisorMode::Dummy) {}
+
+  static bool classof(const RegAllocPriorityAdvisorProvider *R) {
+return R->getAdvisorMode() == AdvisorMode::Dummy;
+  }
+
+  std::unique_ptr
+  getAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *SI) override {
+assert(SI && "SlotIndexes result must be set");
+return std::make_unique(MF, RA, SI);
+  }
+};
+
+class DefaultPriorityAdvisorAnalysisLegacy final
+: public RegAllocPriorityAdvisorAnalysisLegacy {
 public:
-  DefaultPriorityAdvisorAnalysis(bool NotAsRequested)
-  : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Default),
+  DefaultPriorityAdvisorAnalysisLegacy(bool NotAsRequested)
+  : RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode::Default),
 NotAsRequested(NotAsRequested) {}
 
   // support for isa<> and dyn_cast.
-  static bool classof(const RegAllocPriorityAdvisorAnalysis *R) {
+  static bool classof(const RegAllocPriorityAdvisorAnalysisLegacy *R) {
 return R->getAdvisorMode() == AdvisorMode::Default;
   }
 
 private:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
 AU.addRequired();
-RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU);
-  }
-  std::unique_ptr
-  getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
-return std::make_unique(
-MF, RA, &getAnalysis().getSI());
+RegAllocPriorityAdvisorAnalysisLegacy::getAnalysisUsage(AU);
   }
+
   bool doInitialization(Module &M) override {
-if (NotAsRequested)
-  M.getContext().emitError("Requested regalloc priority advisor analysis "
-   "could be created. Using default");
-return RegAllocPriorityAdvisorAnalysis::doInitialization(M);
+Provider.reset(
+new DefaultPriorityAdvisorProvider(NotAsRequested, M.getContext()));
+return false;
   }
+
   const bool NotAsRequested;
 };
 
 class DummyPriorityAdvisorAnalysis final
-: public RegAllocPriorityAdvisorAnalysis {
+: public RegAllocPriorityAdvisorAnalysisLegacy {
 public:
+  using RegAllocPriorityAdvisorAnalysisLegacy::AdvisorMode;
   DummyPriorityAdvisorAnalysis()
-  : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Dummy) {}
+  : RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode::Dummy) {}
 
   // support for isa<> and dyn_cast.
-  static bool classof(const RegAllocPriorityAdvisorAnalysis *R) {
+  static bool classof(const RegAl

[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits


@@ -150,12 +139,37 @@ class ReleaseModePriorityAdvisorAnalysis final
 InteractiveChannelBaseName + ".out",
 InteractiveChannelBaseName + ".in");
 }
-return std::make_unique(
-MF, RA, &getAnalysis().getSI(), Runner.get());
+assert(SI && "SlotIndexes result must be set");

arsenm wrote:

Make a reference and drop the assert 

https://github.com/llvm/llvm-project/pull/118462
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits


@@ -146,11 +149,138 @@ static cl::opt SplitThresholdForRegWithHint(
 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
 
-char RAGreedy::ID = 0;
-char &llvm::RAGreedyID = RAGreedy::ID;
+namespace {
+class RAGreedyLegacy : public MachineFunctionPass {
+  RegAllocFilterFunc F;
 
-INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
-"Greedy Register Allocator", false, false)
+public:
+  RAGreedyLegacy(const RegAllocFilterFunc F = nullptr);
+
+  static char ID;
+  /// Return the pass name.
+  StringRef getPassName() const override { return "Greedy Register Allocator"; 
}
+
+  /// RAGreedy analysis usage.
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  /// Perform register allocation.
+  bool runOnMachineFunction(MachineFunction &mf) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+return MachineFunctionProperties().set(
+MachineFunctionProperties::Property::NoPHIs);
+  }
+
+  MachineFunctionProperties getClearedProperties() const override {
+return MachineFunctionProperties().set(
+MachineFunctionProperties::Property::IsSSA);
+  }
+};
+
+} // end anonymous namespace
+
+RAGreedyLegacy::RAGreedyLegacy(const RegAllocFilterFunc F)
+: MachineFunctionPass(ID), F(F) {
+  initializeRAGreedyLegacyPass(*PassRegistry::getPassRegistry());
+}
+
+RAGreedy::RAGreedy(RequiredAnalyses &Analyses, const RegAllocFilterFunc F) : 
RegAllocBase(F) {
+  setAnalyses(Analyses);
+}
+
+void RAGreedy::setAnalyses(RequiredAnalyses &Analyses) {

arsenm wrote:

Just do this directly in the constructor, there's no other user of setAnalyses 

https://github.com/llvm/llvm-project/pull/119540
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm commented:

Missing test

https://github.com/llvm/llvm-project/pull/119540
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/119540
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocGreedy to NPM (PR #119540)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits


@@ -146,11 +149,138 @@ static cl::opt SplitThresholdForRegWithHint(
 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
 
-char RAGreedy::ID = 0;
-char &llvm::RAGreedyID = RAGreedy::ID;
+namespace {
+class RAGreedyLegacy : public MachineFunctionPass {
+  RegAllocFilterFunc F;
 
-INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
-"Greedy Register Allocator", false, false)
+public:
+  RAGreedyLegacy(const RegAllocFilterFunc F = nullptr);
+
+  static char ID;
+  /// Return the pass name.
+  StringRef getPassName() const override { return "Greedy Register Allocator"; 
}
+
+  /// RAGreedy analysis usage.
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  /// Perform register allocation.
+  bool runOnMachineFunction(MachineFunction &mf) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+return MachineFunctionProperties().set(
+MachineFunctionProperties::Property::NoPHIs);
+  }
+
+  MachineFunctionProperties getClearedProperties() const override {
+return MachineFunctionProperties().set(
+MachineFunctionProperties::Property::IsSSA);
+  }
+};
+
+} // end anonymous namespace
+
+RAGreedyLegacy::RAGreedyLegacy(const RegAllocFilterFunc F)
+: MachineFunctionPass(ID), F(F) {
+  initializeRAGreedyLegacyPass(*PassRegistry::getPassRegistry());
+}
+
+RAGreedy::RAGreedy(RequiredAnalyses &Analyses, const RegAllocFilterFunc F) : 
RegAllocBase(F) {
+  setAnalyses(Analyses);
+}
+
+void RAGreedy::setAnalyses(RequiredAnalyses &Analyses) {
+  VRM = Analyses.VRM;
+  LIS = Analyses.LIS;
+  Matrix = Analyses.LRM;
+  Indexes = Analyses.Indexes;
+  MBFI = Analyses.MBFI;
+  DomTree = Analyses.DomTree;
+  Loops = Analyses.Loops;
+  ORE = Analyses.ORE;
+  Bundles = Analyses.Bundles;
+  SpillPlacer = Analyses.SpillPlacer;
+  DebugVars = Analyses.DebugVars;
+  LSS = Analyses.LSS;
+  EvictProvider = Analyses.EvictProvider;
+  PriorityProvider = Analyses.PriorityProvider;
+}
+
+void RAGreedyPass::printPipeline(raw_ostream &OS, 
function_ref MapClassName2PassName) const {
+  StringRef FilterName = Opts.FilterName.empty() ? "all" : Opts.FilterName;
+  OS << "regallocgreedy<" << FilterName << '>';

arsenm wrote:

```suggestion
  OS << "regalloc-greedy<" << FilterName << '>';
```

https://github.com/llvm/llvm-project/pull/119540
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [HLSL] Introduce address space `hlsl_constant(2)` for constant buffer declarations (PR #123411)

2025-01-23 Thread Helena Kotas via llvm-branch-commits


@@ -1,16 +1,21 @@
 // RUN: %clang_cc1 -finclude-default-header -triple 
dxil-pc-shadermodel6.3-library %s \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
-// CHECK-DAG: @[[CB:.+]] = external constant { float }
+// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-library %s 
\
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
 cbuffer A {
-float a;
-  // CHECK-DAG:@_ZL1b = internal global float 3.00e+00, align 4
+  // CHECK: @a = external addrspace(2) externally_initialized global float, 
align 4
+  float a;
+  // CHECK: @_ZL1b = internal global float 3.00e+00, align 4
   static float b = 3;

hekota wrote:

At this point it is not clear what is the end goal. The static decl here tests 
that is does not get added to the cbuffer layout struct or the new address 
space. When/if we prohibit static decls in cbuffers this test will surely flare 
up and will be fixed up.

https://github.com/llvm/llvm-project/pull/123411
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)

2025-01-23 Thread Finn Plummer via llvm-branch-commits


@@ -148,6 +148,333 @@ bool RootSignatureLexer::LexToken(RootSignatureToken 
&Result) {
   return false;
 }
 
+// Parser Definitions
+
+RootSignatureParser::RootSignatureParser(
+SmallVector &Elements,
+const SmallVector &Tokens)
+: Elements(Elements) {
+  CurTok = Tokens.begin();
+  LastTok = Tokens.end();
+}
+
+bool RootSignatureParser::ReportError() { return true; }
+
+bool RootSignatureParser::Parse() {
+  CurTok--; // Decrement once here so we can use the ...ExpectedToken api
+
+  // Iterate as many RootElements as possible
+  bool HasComma = true;
+  while (HasComma &&
+ !TryConsumeExpectedToken(ArrayRef{TokenKind::kw_DescriptorTable})) {
+if (ParseRootElement())
+  return true;
+HasComma = !TryConsumeExpectedToken(TokenKind::pu_comma);
+  }
+  if (HasComma)
+return ReportError(); // report 'comma' denotes a required extra item
+
+  // Ensure that we are at the end of the tokens
+  CurTok++;
+  if (CurTok != LastTok)
+return ReportError(); // report expected end of input but got more
+  return false;
+}
+
+bool RootSignatureParser::ParseRootElement() {
+  // Dispatch onto the correct parse method
+  switch (CurTok->Kind) {
+  case TokenKind::kw_DescriptorTable:
+return ParseDescriptorTable();
+  default:
+llvm_unreachable("Switch for an expected token was not provided");
+return true;
+  }
+}
+
+bool RootSignatureParser::ParseDescriptorTable() {
+  DescriptorTable Table;
+
+  if (ConsumeExpectedToken(TokenKind::pu_l_paren))
+return true;
+
+  // Iterate as many DescriptorTableClaues as possible
+  bool HasComma = true;
+  while (!TryConsumeExpectedToken({TokenKind::kw_CBV, TokenKind::kw_SRV,
+   TokenKind::kw_UAV, TokenKind::kw_Sampler})) 
{
+if (ParseDescriptorTableClause())
+  return true;
+Table.NumClauses++;
+HasComma = !TryConsumeExpectedToken(TokenKind::pu_comma);
+  }
+
+  // Consume optional 'visibility' paramater
+  if (HasComma && !TryConsumeExpectedToken(TokenKind::kw_visibility)) {
+if (ConsumeExpectedToken(TokenKind::pu_equal))
+  return true;
+
+if (ParseShaderVisibility(Table.Visibility))
+  return true;
+
+HasComma = !TryConsumeExpectedToken(TokenKind::pu_comma);
+  }
+
+  if (HasComma && Table.NumClauses != 0)
+return ReportError(); // report 'comma' denotes a required extra item
+
+  if (ConsumeExpectedToken(TokenKind::pu_r_paren))
+return true;
+
+  Elements.push_back(RootElement(Table));
+  return false;
+}
+
+bool RootSignatureParser::ParseDescriptorTableClause() {
+  // Determine the type of Clause first so we can initialize the struct with
+  // the correct default flags
+  ClauseType CT;
+  switch (CurTok->Kind) {
+  case TokenKind::kw_CBV:
+CT = ClauseType::CBV;
+break;
+  case TokenKind::kw_SRV:
+CT = ClauseType::SRV;
+break;
+  case TokenKind::kw_UAV:
+CT = ClauseType::UAV;
+break;
+  case TokenKind::kw_Sampler:
+CT = ClauseType::Sampler;
+break;
+  default:
+llvm_unreachable("Switch for an expected token was not provided");
+return true;
+  }
+  DescriptorTableClause Clause(CT);
+
+  if (ConsumeExpectedToken(TokenKind::pu_l_paren))
+return true;
+
+  // Consume mandatory Register paramater
+  if (ConsumeExpectedToken(
+  {TokenKind::bReg, TokenKind::tReg, TokenKind::uReg, 
TokenKind::sReg}))
+return true;
+  if (ParseRegister(Clause.Register))
+return true;
+
+  // Start parsing the optional parameters

inbelic wrote:

Confirmed that we should accept the parameters in any order. So will need to 
refactor this.

https://github.com/llvm/llvm-project/pull/122982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)

2025-01-23 Thread Chris B via llvm-branch-commits

https://github.com/llvm-beanz commented:

I think that the way you're breaking up this change is sub-optimal from a 
review perspective. You've added a lot of code that partially handles parsing a 
very complex root signature. The problem is that to complete this 
implementation you're going to go back over this code over and over again 
fleshing it out, and from a reviewer's perspective we're going to need to keep 
paging back in extra context.

If instead you started with a much simpler root signature (even just an empty 
one), but implement more complete handling for it, we can review that and 
incrementally build up without revisiting the same code over and over again in 
each subsequent patch.

https://github.com/llvm/llvm-project/pull/122982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)

2025-01-23 Thread Finn Plummer via llvm-branch-commits

inbelic wrote:

Sounds good, and I appreciate the feedback. I will restructure the changes to 
be of smaller granularity, which will be better self-contained and directly 
include their diagnostics testing.

https://github.com/llvm/llvm-project/pull/122982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)

2025-01-23 Thread Finn Plummer via llvm-branch-commits

https://github.com/inbelic converted_to_draft 
https://github.com/llvm/llvm-project/pull/122982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [Clang][CWG2369] Implement GCC's heuristic for DR 2369 (PR #124231)

2025-01-23 Thread Younan Zhang via llvm-branch-commits

https://github.com/zyn0217 updated 
https://github.com/llvm/llvm-project/pull/124231

>From c36dd4fcac367b206072b36ccc9be4106a22ec3b Mon Sep 17 00:00:00 2001
From: Younan Zhang 
Date: Fri, 24 Jan 2025 13:52:37 +0800
Subject: [PATCH 1/2] Implement GCC's CWG 2369 heuristic

---
 clang/include/clang/Sema/Sema.h   |   7 +-
 clang/lib/Sema/SemaOverload.cpp   |  70 +++-
 clang/lib/Sema/SemaTemplateDeduction.cpp  |  13 +-
 .../SemaTemplate/concepts-recursive-inst.cpp  | 169 ++
 4 files changed, 246 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 87d9a335763e31..fd4d1f7e0d8f9c 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -10236,7 +10236,8 @@ class Sema final : public SemaBase {
   FunctionTemplateDecl *FunctionTemplate, ArrayRef ParamTypes,
   ArrayRef Args, OverloadCandidateSet &CandidateSet,
   ConversionSequenceList &Conversions, bool SuppressUserConversions,
-  CXXRecordDecl *ActingContext = nullptr, QualType ObjectType = QualType(),
+  bool NonInstOnly, CXXRecordDecl *ActingContext = nullptr,
+  QualType ObjectType = QualType(),
   Expr::Classification ObjectClassification = {},
   OverloadCandidateParamOrder PO = {});
 
@@ -12272,7 +12273,7 @@ class Sema final : public SemaBase {
   sema::TemplateDeductionInfo &Info,
   SmallVectorImpl const *OriginalCallArgs = nullptr,
   bool PartialOverloading = false,
-  llvm::function_ref CheckNonDependent = [] { return false; });
+  llvm::function_ref CheckNonDependent = [](bool) { return 
false; });
 
   /// Perform template argument deduction from a function call
   /// (C++ [temp.deduct.call]).
@@ -12306,7 +12307,7 @@ class Sema final : public SemaBase {
   FunctionDecl *&Specialization, sema::TemplateDeductionInfo &Info,
   bool PartialOverloading, bool AggregateDeductionCandidate,
   QualType ObjectType, Expr::Classification ObjectClassification,
-  llvm::function_ref)> CheckNonDependent);
+  llvm::function_ref, bool)> CheckNonDependent);
 
   /// Deduce template arguments when taking the address of a function
   /// template (C++ [temp.deduct.funcaddr]) or matching a specialization to
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 3be9ade80f1d94..aded8abe5b4f7b 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -7733,10 +7733,10 @@ void Sema::AddMethodTemplateCandidate(
   MethodTmpl, ExplicitTemplateArgs, Args, Specialization, Info,
   PartialOverloading, /*AggregateDeductionCandidate=*/false, 
ObjectType,
   ObjectClassification,
-  [&](ArrayRef ParamTypes) {
+  [&](ArrayRef ParamTypes, bool NonInstOnly) {
 return CheckNonDependentConversions(
 MethodTmpl, ParamTypes, Args, CandidateSet, Conversions,
-SuppressUserConversions, ActingContext, ObjectType,
+SuppressUserConversions, NonInstOnly, ActingContext, 
ObjectType,
 ObjectClassification, PO);
   });
   Result != TemplateDeductionResult::Success) {
@@ -7818,10 +7818,11 @@ void Sema::AddTemplateOverloadCandidate(
   PartialOverloading, AggregateCandidateDeduction,
   /*ObjectType=*/QualType(),
   /*ObjectClassification=*/Expr::Classification(),
-  [&](ArrayRef ParamTypes) {
+  [&](ArrayRef ParamTypes, bool NonInstOnly) {
 return CheckNonDependentConversions(
 FunctionTemplate, ParamTypes, Args, CandidateSet, Conversions,
-SuppressUserConversions, nullptr, QualType(), {}, PO);
+SuppressUserConversions, NonInstOnly, nullptr, QualType(), {},
+PO);
   });
   Result != TemplateDeductionResult::Success) {
 OverloadCandidate &Candidate =
@@ -7863,7 +7864,7 @@ bool Sema::CheckNonDependentConversions(
 FunctionTemplateDecl *FunctionTemplate, ArrayRef ParamTypes,
 ArrayRef Args, OverloadCandidateSet &CandidateSet,
 ConversionSequenceList &Conversions, bool SuppressUserConversions,
-CXXRecordDecl *ActingContext, QualType ObjectType,
+bool NonInstOnly, CXXRecordDecl *ActingContext, QualType ObjectType,
 Expr::Classification ObjectClassification, OverloadCandidateParamOrder PO) 
{
   // FIXME: The cases in which we allow explicit conversions for constructor
   // arguments never consider calling a constructor template. It's not clear
@@ -7900,6 +7901,63 @@ bool Sema::CheckNonDependentConversions(
 }
   }
 
+  // 
https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=2154bcd6d43cfd821ca70e1583880c4ed955355d
+  auto ConversionMightInduceInstantiation = [&](QualType ParmType,
+QualType ArgType) {
+ParmType = ParmType.getNonReferenceType();
+ArgType = ArgType.getNonReferenceType();
+bool Pointe

[llvm-branch-commits] [llvm] [JITLink][LoongArch] Add label addition and subtraction relocations (PR #122262)

2025-01-23 Thread via llvm-branch-commits

https://github.com/wangleiat approved this pull request.

LGTM, thanks.

https://github.com/llvm/llvm-project/pull/122262
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)

2025-01-23 Thread Damyan Pepper via llvm-branch-commits


@@ -89,6 +91,75 @@ class RootSignatureLexer {
   }
 };
 
+class RootSignatureParser {
+public:
+  RootSignatureParser(SmallVector &Elements,
+  const SmallVector &Tokens);
+
+  // Iterates over the provided tokens and constructs the in-memory
+  // representations of the RootElements.
+  //
+  // The return value denotes if there was a failure and the method will
+  // return on the first encountered failure, or, return false if it
+  // can sucessfully reach the end of the tokens.
+  bool Parse();
+
+private:
+  bool ReportError(); // TODO: Implement this to report error through Diags
+
+  // Root Element helpers
+  bool ParseRootElement();
+  bool ParseDescriptorTable();
+  bool ParseDescriptorTableClause();
+
+  // Common parsing helpers
+  bool ParseRegister(Register &Register);
+
+  // Various flags/enum parsing helpers
+  bool ParseDescriptorRangeFlags(DescriptorRangeFlags &Flags);
+  bool ParseShaderVisibility(ShaderVisibility &Flag);
+
+  // Increment the token iterator if we have not reached the end.
+  // Return value denotes if we were already at the last token.
+  bool ConsumeNextToken();
+
+  // Attempt to retrieve the next token, if TokenKind is invalid then there was
+  // no next token.
+  RootSignatureToken PeekNextToken();
+
+  // Is the current token one of the expected kinds
+  bool IsCurExpectedToken(ArrayRef AnyExpected);

damyanp wrote:

Any reason this doesn't have an overload that takes a single `Expected` like 
all the ones below?

Alternatively, if this one doesn't need the overload then do we need the other 
ones?

https://github.com/llvm/llvm-project/pull/122982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)

2025-01-23 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118628
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass (PR #118630)

2025-01-23 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118630
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)

2025-01-23 Thread Damyan Pepper via llvm-branch-commits


@@ -0,0 +1,140 @@
+//===- HLSLRootSignature.h - HLSL Root Signature helper objects 
---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+///
+/// \file This file contains helper objects for working with HLSL Root
+/// Signatures.
+///
+//===--===//
+
+#ifndef LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H
+#define LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H
+
+#include 
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Endian.h"
+
+namespace llvm {
+namespace hlsl {
+namespace root_signature {
+
+// This is a copy from DebugInfo/CodeView/CodeView.h
+#define RS_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(Class)
\
+  inline Class operator|(Class a, Class b) {   
\
+return static_cast(llvm::to_underlying(a) | 
\
+  llvm::to_underlying(b)); 
\
+  }
\
+  inline Class operator&(Class a, Class b) {   
\
+return static_cast(llvm::to_underlying(a) & 
\
+  llvm::to_underlying(b)); 
\
+  }
\
+  inline Class operator~(Class a) {
\
+return static_cast(~llvm::to_underlying(a));
\
+  }
\
+  inline Class &operator|=(Class &a, Class b) {
\
+a = a | b; 
\
+return a;  
\
+  }
\
+  inline Class &operator&=(Class &a, Class b) {
\
+a = a & b; 
\
+return a;  
\
+  }
+
+// Definition of the various enumerations and flags
+enum class DescriptorRangeFlags : unsigned {
+  None = 0,
+  DescriptorsVolatile = 0x1,
+  DataVolatile = 0x2,
+  DataStaticWhileSetAtExecute = 0x4,
+  DataStatic = 0x8,
+  DescriptorsStaticKeepingBufferBoundsChecks = 0x1,
+  ValidFlags = 0x1000f,
+  ValidSamplerFlags = DescriptorsVolatile,
+};
+RS_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(DescriptorRangeFlags)
+
+enum class ShaderVisibility {
+  All = 0,
+  Vertex = 1,
+  Hull = 2,
+  Domain = 3,
+  Geometry = 4,
+  Pixel = 5,
+  Amplification = 6,
+  Mesh = 7,
+};
+
+// Definitions of the in-memory data layout structures
+
+// Models the different registers: bReg | tReg | uReg | sReg
+enum class RegisterType { BReg, TReg, UReg, SReg };
+struct Register {
+  RegisterType ViewType;
+  uint32_t Number;
+};
+
+static const uint32_t DescriptorTableOffsetAppend = 0x;
+// Models DTClause : CBV | SRV | UAV | Sampler by collecting like parameters
+enum class ClauseType { CBV, SRV, UAV, Sampler };
+struct DescriptorTableClause {
+  ClauseType Type;
+  Register Register;
+  uint32_t NumDescriptors = 1;
+  uint32_t Space = 0;
+  uint32_t Offset = DescriptorTableOffsetAppend;
+  DescriptorRangeFlags Flags;
+
+  DescriptorTableClause(ClauseType Type) : Type(Type) {
+switch (Type) {
+case ClauseType::CBV:
+  Flags = DescriptorRangeFlags::DataStaticWhileSetAtExecute;
+  break;
+case ClauseType::SRV:
+  Flags = DescriptorRangeFlags::DataStaticWhileSetAtExecute;
+  break;
+case ClauseType::UAV:
+  Flags = DescriptorRangeFlags::DataVolatile;
+  break;
+case ClauseType::Sampler:
+  Flags = DescriptorRangeFlags::None;
+  break;
+}
+  }
+};
+
+// Models the end of a descriptor table and stores its visibility
+struct DescriptorTable {
+  ShaderVisibility Visibility = ShaderVisibility::All;
+  uint32_t NumClauses = 0; // The number of clauses in the table
+};
+
+// Models RootElement : DescriptorTable | DescriptorTableClause
+struct RootElement {

damyanp wrote:

Is there a reason not to use `std::variant`?

https://github.com/llvm/llvm-project/pull/122982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)

2025-01-23 Thread Damyan Pepper via llvm-branch-commits


@@ -148,6 +148,347 @@ bool RootSignatureLexer::LexToken(RootSignatureToken 
&Result) {
   return false;
 }
 
+// Parser Definitions
+
+RootSignatureParser::RootSignatureParser(
+SmallVector &Elements,
+const SmallVector &Tokens)
+: Elements(Elements) {
+  CurTok = Tokens.begin();
+  LastTok = Tokens.end();
+}
+
+bool RootSignatureParser::ReportError() { return true; }
+
+bool RootSignatureParser::Parse() {
+  // Handle edge-case of empty RootSignature()
+  if (CurTok == LastTok)
+return false;
+
+  // Iterate as many RootElements as possible
+  bool HasComma = true;
+  while (HasComma &&
+ IsCurExpectedToken(ArrayRef{TokenKind::kw_DescriptorTable})) {
+if (ParseRootElement())
+  return true;
+HasComma = !TryConsumeExpectedToken(TokenKind::pu_comma);
+if (HasComma)
+  ConsumeNextToken();
+  }
+
+  if (HasComma)
+return ReportError(); // report 'comma' denotes a required extra item
+
+  // Ensure that we are at the end of the tokens
+  CurTok++;
+  if (CurTok != LastTok)
+return ReportError(); // report expected end of input but got more
+  return false;
+}
+
+bool RootSignatureParser::ParseRootElement() {
+  // Dispatch onto the correct parse method
+  switch (CurTok->Kind) {
+  case TokenKind::kw_DescriptorTable:
+return ParseDescriptorTable();
+  default:
+llvm_unreachable("Switch for an expected token was not provided");
+return true;

damyanp wrote:

Is the `return true` after `llvm_unreachable` the right thing to do here?  

https://github.com/llvm/llvm-project/pull/122982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)

2025-01-23 Thread Damyan Pepper via llvm-branch-commits


@@ -0,0 +1,140 @@
+//===- HLSLRootSignature.h - HLSL Root Signature helper objects 
---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+///
+/// \file This file contains helper objects for working with HLSL Root
+/// Signatures.
+///
+//===--===//
+
+#ifndef LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H
+#define LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H
+
+#include 
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Endian.h"
+
+namespace llvm {
+namespace hlsl {
+namespace root_signature {
+
+// This is a copy from DebugInfo/CodeView/CodeView.h
+#define RS_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(Class)
\
+  inline Class operator|(Class a, Class b) {   
\
+return static_cast(llvm::to_underlying(a) | 
\
+  llvm::to_underlying(b)); 
\
+  }
\
+  inline Class operator&(Class a, Class b) {   
\
+return static_cast(llvm::to_underlying(a) & 
\
+  llvm::to_underlying(b)); 
\
+  }
\
+  inline Class operator~(Class a) {
\
+return static_cast(~llvm::to_underlying(a));
\
+  }
\
+  inline Class &operator|=(Class &a, Class b) {
\
+a = a | b; 
\
+return a;  
\
+  }
\
+  inline Class &operator&=(Class &a, Class b) {
\
+a = a & b; 
\
+return a;  
\
+  }
+
+// Definition of the various enumerations and flags
+enum class DescriptorRangeFlags : unsigned {
+  None = 0,
+  DescriptorsVolatile = 0x1,
+  DataVolatile = 0x2,
+  DataStaticWhileSetAtExecute = 0x4,
+  DataStatic = 0x8,
+  DescriptorsStaticKeepingBufferBoundsChecks = 0x1,
+  ValidFlags = 0x1000f,
+  ValidSamplerFlags = DescriptorsVolatile,
+};
+RS_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(DescriptorRangeFlags)
+
+enum class ShaderVisibility {
+  All = 0,
+  Vertex = 1,
+  Hull = 2,
+  Domain = 3,
+  Geometry = 4,
+  Pixel = 5,
+  Amplification = 6,
+  Mesh = 7,
+};
+
+// Definitions of the in-memory data layout structures
+
+// Models the different registers: bReg | tReg | uReg | sReg
+enum class RegisterType { BReg, TReg, UReg, SReg };
+struct Register {
+  RegisterType ViewType;
+  uint32_t Number;
+};
+
+static const uint32_t DescriptorTableOffsetAppend = 0x;
+// Models DTClause : CBV | SRV | UAV | Sampler by collecting like parameters
+enum class ClauseType { CBV, SRV, UAV, Sampler };
+struct DescriptorTableClause {
+  ClauseType Type;
+  Register Register;

damyanp wrote:

It looks like there's nothing enforcing Register's initialization?  Since this 
struct has a constructor, I'd expect to get a fully initialized object back 
when I construct it. 

https://github.com/llvm/llvm-project/pull/122982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)

2025-01-23 Thread Chris B via llvm-branch-commits


@@ -0,0 +1,140 @@
+//===- HLSLRootSignature.h - HLSL Root Signature helper objects 
---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+///
+/// \file This file contains helper objects for working with HLSL Root
+/// Signatures.
+///
+//===--===//
+
+#ifndef LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H
+#define LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H
+
+#include 
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Endian.h"
+
+namespace llvm {
+namespace hlsl {
+namespace root_signature {
+
+// This is a copy from DebugInfo/CodeView/CodeView.h
+#define RS_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(Class)
\
+  inline Class operator|(Class a, Class b) {   
\
+return static_cast(llvm::to_underlying(a) | 
\
+  llvm::to_underlying(b)); 
\
+  }
\
+  inline Class operator&(Class a, Class b) {   
\
+return static_cast(llvm::to_underlying(a) & 
\
+  llvm::to_underlying(b)); 
\
+  }
\
+  inline Class operator~(Class a) {
\
+return static_cast(~llvm::to_underlying(a));
\
+  }
\
+  inline Class &operator|=(Class &a, Class b) {
\
+a = a | b; 
\
+return a;  
\
+  }
\
+  inline Class &operator&=(Class &a, Class b) {
\
+a = a & b; 
\
+return a;  
\
+  }
+
+// Definition of the various enumerations and flags
+enum class DescriptorRangeFlags : unsigned {
+  None = 0,
+  DescriptorsVolatile = 0x1,
+  DataVolatile = 0x2,
+  DataStaticWhileSetAtExecute = 0x4,
+  DataStatic = 0x8,
+  DescriptorsStaticKeepingBufferBoundsChecks = 0x1,
+  ValidFlags = 0x1000f,
+  ValidSamplerFlags = DescriptorsVolatile,
+};
+RS_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(DescriptorRangeFlags)
+
+enum class ShaderVisibility {
+  All = 0,
+  Vertex = 1,
+  Hull = 2,
+  Domain = 3,
+  Geometry = 4,
+  Pixel = 5,
+  Amplification = 6,
+  Mesh = 7,
+};
+
+// Definitions of the in-memory data layout structures
+
+// Models the different registers: bReg | tReg | uReg | sReg
+enum class RegisterType { BReg, TReg, UReg, SReg };
+struct Register {
+  RegisterType ViewType;
+  uint32_t Number;
+};
+
+static const uint32_t DescriptorTableOffsetAppend = 0x;
+// Models DTClause : CBV | SRV | UAV | Sampler by collecting like parameters
+enum class ClauseType { CBV, SRV, UAV, Sampler };

llvm-beanz wrote:

Can we do this instead?
```suggestion
using ClauseType = llvm::dxil::ResourceClass
```

This will change `CBV` to `CBuffer`, but otherwise those enums need to be the 
same right?

https://github.com/llvm/llvm-project/pull/122982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)

2025-01-23 Thread Chris B via llvm-branch-commits


@@ -89,6 +91,72 @@ class RootSignatureLexer {
   }
 };
 
+class RootSignatureParser {
+public:
+  RootSignatureParser(SmallVector &Elements,
+  const SmallVector &Tokens);
+
+  // Iterates over the provided tokens and constructs the in-memory
+  // representations of the RootElements.
+  //
+  // The return value denotes if there was a failure and the method will
+  // return on the first encountered failure, or, return false if it
+  // can sucessfully reach the end of the tokens.
+  bool Parse();
+
+private:
+  bool ReportError(); // TODO: Implement this to report error through Diags

llvm-beanz wrote:

I don't think this should be separate. It's going to be really hard to ensure 
that any follow-up that adds error reporting properly covers all the cases.

https://github.com/llvm/llvm-project/pull/122982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement Parsing of Descriptor Tables (PR #122982)

2025-01-23 Thread Chris B via llvm-branch-commits

https://github.com/llvm-beanz edited 
https://github.com/llvm/llvm-project/pull/122982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits


@@ -5907,6 +5910,82 @@ SDValue 
DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
  N->getOperand(1), N->getOperand(2));
 }
 
+static std::optional findMemType(SelectionDAG &DAG,
+  const TargetLowering &TLI, unsigned 
Width,
+  EVT WidenVT, unsigned Align,
+  unsigned WidenEx);
+
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  EVT WidenVT = 
TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+  EVT LdVT= LD->getMemoryVT();
+  SDLoc dl(LD);
+  assert(LdVT.isVector() && WidenVT.isVector());
+  assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+  // Load information
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = LD->getAAInfo();
+
+  TypeSize LdWidth = LdVT.getSizeInBits();
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  TypeSize WidthDiff = WidenWidth - LdWidth;
+  // Allow wider loads if they are sufficiently aligned to avoid memory faults
+  // and if the original load is simple.
+  unsigned LdAlign =
+  (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : 
LD->getAlign().value();
+
+  // Find the vector type that can load from.
+  std::optional FirstVT =
+  findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, LdAlign,
+  WidthDiff.getKnownMinValue());
+
+  if (!FirstVT)
+return SDValue();
+
+  SmallVector MemVTs;
+  TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+  SDValue LdOp = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, *FirstVT, *FirstVT, Chain,
+ BasePtr, LD->getMemOperand());
+
+  // Load the element with one instruction.
+  SDValue Result;
+  assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+  if (!FirstVT->isVector()) {
+unsigned NumElts =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
+SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+Result = DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+  }
+  else if (FirstVT == WidenVT)
+Result = LdOp;
+  else {
+// TODO: We don't currently have any tests that exercise this code path.
+assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
+unsigned NumConcat =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+SmallVector ConcatOps(NumConcat);
+SDValue UndefVal = DAG.getUNDEF(*FirstVT);
+ConcatOps[0] = LdOp;
+for (unsigned i = 1; i != NumConcat; ++i)
+  ConcatOps[i] = UndefVal;
+Result = DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
+  }
+

arsenm wrote:

The type coercion code is the sharable part that could be extracted into a 
helper function 

https://github.com/llvm/llvm-project/pull/120598
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SelectionDAG][X86] Split via Concat vector types for atomic load (PR #120640)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits


@@ -194,8 +194,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, 
int64_t BitSize,
   return false;
 }
 
-/// Parses tree in Ptr for base, index, offset addresses.
-static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
+template 
+static BaseIndexOffset matchSDNode(const T *N,

arsenm wrote:

That's fine, the optimization can be another PR 

https://github.com/llvm/llvm-project/pull/120640
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SelectionDAG][X86] Split via Concat vector types for atomic load (PR #120640)

2025-01-23 Thread Matt Arsenault via llvm-branch-commits


@@ -1395,6 +1398,34 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SetSplitVector(SDValue(N, ResNo), Lo, Hi);
 }
 
+void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  SDLoc dl(LD);
+
+  EVT MemoryVT = LD->getMemoryVT();
+  unsigned NumElts = MemoryVT.getVectorMinNumElements();
+
+  EVT IntMemoryVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts);
+  EVT ElemVT = EVT::getVectorVT(*DAG.getContext(),
+MemoryVT.getVectorElementType(), 1);
+
+  // Create a single atomic to load all the elements at once.
+  SDValue Atomic = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, IntMemoryVT, 
IntMemoryVT,
+ LD->getChain(), LD->getBasePtr(),
+ LD->getMemOperand());
+
+  // Instead of splitting, put all the elements back into a vector.
+  SmallVector Ops;
+  for (unsigned i = 0; i < NumElts; ++i) {
+SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Atomic,
+  DAG.getVectorIdxConstant(i, dl));
+Elt = DAG.getBitcast(ElemVT, Elt);
+Ops.push_back(Elt);
+  }
+  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MemoryVT, Ops);

arsenm wrote:

I'd consider this a bug, I would expect this to assert in getNode 

https://github.com/llvm/llvm-project/pull/120640
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [Clang][CWG2369] Implement GCC's heuristic for DR 2369 (PR #124231)

2025-01-23 Thread Younan Zhang via llvm-branch-commits

https://github.com/zyn0217 created 
https://github.com/llvm/llvm-project/pull/124231

None

>From c36dd4fcac367b206072b36ccc9be4106a22ec3b Mon Sep 17 00:00:00 2001
From: Younan Zhang 
Date: Fri, 24 Jan 2025 13:52:37 +0800
Subject: [PATCH] Implement GCC's CWG 2369 heuristic

---
 clang/include/clang/Sema/Sema.h   |   7 +-
 clang/lib/Sema/SemaOverload.cpp   |  70 +++-
 clang/lib/Sema/SemaTemplateDeduction.cpp  |  13 +-
 .../SemaTemplate/concepts-recursive-inst.cpp  | 169 ++
 4 files changed, 246 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 87d9a335763e31..fd4d1f7e0d8f9c 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -10236,7 +10236,8 @@ class Sema final : public SemaBase {
   FunctionTemplateDecl *FunctionTemplate, ArrayRef ParamTypes,
   ArrayRef Args, OverloadCandidateSet &CandidateSet,
   ConversionSequenceList &Conversions, bool SuppressUserConversions,
-  CXXRecordDecl *ActingContext = nullptr, QualType ObjectType = QualType(),
+  bool NonInstOnly, CXXRecordDecl *ActingContext = nullptr,
+  QualType ObjectType = QualType(),
   Expr::Classification ObjectClassification = {},
   OverloadCandidateParamOrder PO = {});
 
@@ -12272,7 +12273,7 @@ class Sema final : public SemaBase {
   sema::TemplateDeductionInfo &Info,
   SmallVectorImpl const *OriginalCallArgs = nullptr,
   bool PartialOverloading = false,
-  llvm::function_ref CheckNonDependent = [] { return false; });
+  llvm::function_ref CheckNonDependent = [](bool) { return 
false; });
 
   /// Perform template argument deduction from a function call
   /// (C++ [temp.deduct.call]).
@@ -12306,7 +12307,7 @@ class Sema final : public SemaBase {
   FunctionDecl *&Specialization, sema::TemplateDeductionInfo &Info,
   bool PartialOverloading, bool AggregateDeductionCandidate,
   QualType ObjectType, Expr::Classification ObjectClassification,
-  llvm::function_ref)> CheckNonDependent);
+  llvm::function_ref, bool)> CheckNonDependent);
 
   /// Deduce template arguments when taking the address of a function
   /// template (C++ [temp.deduct.funcaddr]) or matching a specialization to
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 3be9ade80f1d94..aded8abe5b4f7b 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -7733,10 +7733,10 @@ void Sema::AddMethodTemplateCandidate(
   MethodTmpl, ExplicitTemplateArgs, Args, Specialization, Info,
   PartialOverloading, /*AggregateDeductionCandidate=*/false, 
ObjectType,
   ObjectClassification,
-  [&](ArrayRef ParamTypes) {
+  [&](ArrayRef ParamTypes, bool NonInstOnly) {
 return CheckNonDependentConversions(
 MethodTmpl, ParamTypes, Args, CandidateSet, Conversions,
-SuppressUserConversions, ActingContext, ObjectType,
+SuppressUserConversions, NonInstOnly, ActingContext, 
ObjectType,
 ObjectClassification, PO);
   });
   Result != TemplateDeductionResult::Success) {
@@ -7818,10 +7818,11 @@ void Sema::AddTemplateOverloadCandidate(
   PartialOverloading, AggregateCandidateDeduction,
   /*ObjectType=*/QualType(),
   /*ObjectClassification=*/Expr::Classification(),
-  [&](ArrayRef ParamTypes) {
+  [&](ArrayRef ParamTypes, bool NonInstOnly) {
 return CheckNonDependentConversions(
 FunctionTemplate, ParamTypes, Args, CandidateSet, Conversions,
-SuppressUserConversions, nullptr, QualType(), {}, PO);
+SuppressUserConversions, NonInstOnly, nullptr, QualType(), {},
+PO);
   });
   Result != TemplateDeductionResult::Success) {
 OverloadCandidate &Candidate =
@@ -7863,7 +7864,7 @@ bool Sema::CheckNonDependentConversions(
 FunctionTemplateDecl *FunctionTemplate, ArrayRef ParamTypes,
 ArrayRef Args, OverloadCandidateSet &CandidateSet,
 ConversionSequenceList &Conversions, bool SuppressUserConversions,
-CXXRecordDecl *ActingContext, QualType ObjectType,
+bool NonInstOnly, CXXRecordDecl *ActingContext, QualType ObjectType,
 Expr::Classification ObjectClassification, OverloadCandidateParamOrder PO) 
{
   // FIXME: The cases in which we allow explicit conversions for constructor
   // arguments never consider calling a constructor template. It's not clear
@@ -7900,6 +7901,63 @@ bool Sema::CheckNonDependentConversions(
 }
   }
 
+  // 
https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=2154bcd6d43cfd821ca70e1583880c4ed955355d
+  auto ConversionMightInduceInstantiation = [&](QualType ParmType,
+QualType ArgType) {
+ParmType = ParmType.getNonReferenceType();
+ArgType = ArgType.getNonReferenceType();
+bool Poin

[llvm-branch-commits] [clang] [Clang][CWG2369] Implement GCC's heuristic for DR 2369 (PR #124231)

2025-01-23 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 816bda32a6e9c52d93a06e6e19a5ac3fcc66 
c36dd4fcac367b206072b36ccc9be4106a22ec3b --extensions cpp,h -- 
clang/include/clang/Sema/Sema.h clang/lib/Sema/SemaOverload.cpp 
clang/lib/Sema/SemaTemplateDeduction.cpp 
clang/test/SemaTemplate/concepts-recursive-inst.cpp
``





View the diff from clang-format here.


``diff
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index fd4d1f7e0d..99ca651591 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -12273,7 +12273,9 @@ public:
   sema::TemplateDeductionInfo &Info,
   SmallVectorImpl const *OriginalCallArgs = nullptr,
   bool PartialOverloading = false,
-  llvm::function_ref CheckNonDependent = [](bool) { return 
false; });
+  llvm::function_ref CheckNonDependent = [](bool) {
+return false;
+  });
 
   /// Perform template argument deduction from a function call
   /// (C++ [temp.deduct.call]).
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index aded8abe5b..6f3400cf79 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -7978,7 +7978,8 @@ bool Sema::CheckNonDependentConversions(
 // For members, 'this' got ConvIdx = 0 previously.
 ConvIdx = ThisConversions + I;
   }
-  if (NonInstOnly && ConversionMightInduceInstantiation(ParamType, 
Args[I]->getType()))
+  if (NonInstOnly &&
+  ConversionMightInduceInstantiation(ParamType, Args[I]->getType()))
 continue;
   Conversions[ConvIdx]
 = TryCopyInitialization(*this, Args[I], ParamType,
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp 
b/clang/lib/Sema/SemaTemplateDeduction.cpp
index a44ad00d2c..dcaaed8613 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -4702,7 +4702,8 @@ TemplateDeductionResult Sema::DeduceTemplateArguments(
   runWithSufficientStackSpace(Info.getLocation(), [&] {
 Result = FinishTemplateArgumentDeduction(
 FunctionTemplate, Deduced, NumExplicitlySpecified, Specialization, 
Info,
-&OriginalCallArgs, PartialOverloading, [&, CallingCtx](bool 
NonInstOnly) {
+&OriginalCallArgs, PartialOverloading,
+[&, CallingCtx](bool NonInstOnly) {
   ContextRAII SavedContext(*this, CallingCtx);
   return CheckNonDependent(ParamTypesForArgChecking, NonInstOnly);
 });

``




https://github.com/llvm/llvm-project/pull/124231
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits