[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
https://github.com/aengelke updated
https://github.com/llvm/llvm-project/pull/142584
>From 4cbc231699c11444cff73ff28b88dc0f3835c752 Mon Sep 17 00:00:00 2001
From: Alexis Engelke
Date: Wed, 4 Jun 2025 09:21:02 +
Subject: [PATCH 1/2] Move one check to beginning of function
Created using spr 1.3.5-bogner
---
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 10 +-
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index e96f3f8193b09..2dbabfe345d5e 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1483,6 +1483,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
if (SuccChain.UnscheduledPredecessors == 0)
return false;
+ // Compile-time optimization: runtime is quadratic in the number of
+ // predecessors. For such uncommon cases, exit early.
+ if (Succ->pred_size() > PredecessorLimit)
+return false;
+
// There are two basic scenarios here:
// -
// Case 1: triangular shape CFG (if-then):
@@ -1603,11 +1608,6 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
bool BadCFGConflict = false;
- // Compile-time optimization: runtime is quadratic in the number of
- // predecessors. For such uncommon cases, exit early.
- if (Succ->pred_size() > PredecessorLimit)
-return false;
-
for (MachineBasicBlock *Pred : Succ->predecessors()) {
BlockChain *PredChain = BlockToChain[Pred];
if (Pred == Succ || PredChain == &SuccChain ||
>From e90cfcb5740fc7297e05a876172ad8c25f596a33 Mon Sep 17 00:00:00 2001
From: Alexis Engelke
Date: Fri, 13 Jun 2025 15:43:00 +
Subject: [PATCH 2/2] Test new command line flag
Created using spr 1.3.5-bogner
---
llvm/test/CodeGen/RISCV/branch.ll | 49 +++
1 file changed, 49 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/branch.ll
b/llvm/test/CodeGen/RISCV/branch.ll
index 578080cd3a240..ed86ca8ca4dd1 100644
--- a/llvm/test/CodeGen/RISCV/branch.ll
+++ b/llvm/test/CodeGen/RISCV/branch.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs
-block-placement-predecessor-limit=10 < %s \
+; RUN: | FileCheck -check-prefix=RV32I-MBPLIMIT %s
define void @foo(i32 %a, ptr %b, i1 %c) nounwind {
; RV32I-LABEL: foo:
@@ -48,6 +50,53 @@ define void @foo(i32 %a, ptr %b, i1 %c) nounwind {
; RV32I-NEXT:lw zero, 0(a1)
; RV32I-NEXT: .LBB0_14: # %end
; RV32I-NEXT:ret
+;
+; RV32I-MBPLIMIT-LABEL: foo:
+; RV32I-MBPLIMIT: # %bb.0:
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_2
+; RV32I-MBPLIMIT-NEXT: .LBB0_1: # %end
+; RV32I-MBPLIMIT-NEXT:ret
+; RV32I-MBPLIMIT-NEXT: .LBB0_2: # %test2
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.3: # %test3
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:blt a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.4: # %test4
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bge a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.5: # %test5
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bltu a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.6: # %test6
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bgeu a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.7: # %test7
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:blt a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.8: # %test8
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bge a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.9: # %test9
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bltu a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.10: # %test10
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bgeu a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.11: # %test11
+; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1)
+; RV32I-MBPLIMIT-NEXT:andi a2, a2, 1
+; RV32I-MBPLIMIT-NEXT:bnez a2, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.12: # %test12
+; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bgez a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.13: # %test13
+; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1)
+; RV32I-MBPLIMIT-NEXT:blez a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.14: # %test14
+; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1)
+; RV32I-MBPLIMIT-NEXT:ret
%val1 = load volatile i32, ptr %b
%tst1 = icmp eq i32 %val1, %a
br i1 %tst1, label %end, label %test2
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/c
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
https://github.com/aengelke updated
https://github.com/llvm/llvm-project/pull/142584
>From 4cbc231699c11444cff73ff28b88dc0f3835c752 Mon Sep 17 00:00:00 2001
From: Alexis Engelke
Date: Wed, 4 Jun 2025 09:21:02 +
Subject: [PATCH 1/2] Move one check to beginning of function
Created using spr 1.3.5-bogner
---
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 10 +-
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index e96f3f8193b09..2dbabfe345d5e 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1483,6 +1483,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
if (SuccChain.UnscheduledPredecessors == 0)
return false;
+ // Compile-time optimization: runtime is quadratic in the number of
+ // predecessors. For such uncommon cases, exit early.
+ if (Succ->pred_size() > PredecessorLimit)
+return false;
+
// There are two basic scenarios here:
// -
// Case 1: triangular shape CFG (if-then):
@@ -1603,11 +1608,6 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
bool BadCFGConflict = false;
- // Compile-time optimization: runtime is quadratic in the number of
- // predecessors. For such uncommon cases, exit early.
- if (Succ->pred_size() > PredecessorLimit)
-return false;
-
for (MachineBasicBlock *Pred : Succ->predecessors()) {
BlockChain *PredChain = BlockToChain[Pred];
if (Pred == Succ || PredChain == &SuccChain ||
>From e90cfcb5740fc7297e05a876172ad8c25f596a33 Mon Sep 17 00:00:00 2001
From: Alexis Engelke
Date: Fri, 13 Jun 2025 15:43:00 +
Subject: [PATCH 2/2] Test new command line flag
Created using spr 1.3.5-bogner
---
llvm/test/CodeGen/RISCV/branch.ll | 49 +++
1 file changed, 49 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/branch.ll
b/llvm/test/CodeGen/RISCV/branch.ll
index 578080cd3a240..ed86ca8ca4dd1 100644
--- a/llvm/test/CodeGen/RISCV/branch.ll
+++ b/llvm/test/CodeGen/RISCV/branch.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs
-block-placement-predecessor-limit=10 < %s \
+; RUN: | FileCheck -check-prefix=RV32I-MBPLIMIT %s
define void @foo(i32 %a, ptr %b, i1 %c) nounwind {
; RV32I-LABEL: foo:
@@ -48,6 +50,53 @@ define void @foo(i32 %a, ptr %b, i1 %c) nounwind {
; RV32I-NEXT:lw zero, 0(a1)
; RV32I-NEXT: .LBB0_14: # %end
; RV32I-NEXT:ret
+;
+; RV32I-MBPLIMIT-LABEL: foo:
+; RV32I-MBPLIMIT: # %bb.0:
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_2
+; RV32I-MBPLIMIT-NEXT: .LBB0_1: # %end
+; RV32I-MBPLIMIT-NEXT:ret
+; RV32I-MBPLIMIT-NEXT: .LBB0_2: # %test2
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.3: # %test3
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:blt a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.4: # %test4
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bge a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.5: # %test5
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bltu a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.6: # %test6
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bgeu a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.7: # %test7
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:blt a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.8: # %test8
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bge a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.9: # %test9
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bltu a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.10: # %test10
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bgeu a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.11: # %test11
+; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1)
+; RV32I-MBPLIMIT-NEXT:andi a2, a2, 1
+; RV32I-MBPLIMIT-NEXT:bnez a2, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.12: # %test12
+; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bgez a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.13: # %test13
+; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1)
+; RV32I-MBPLIMIT-NEXT:blez a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.14: # %test14
+; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1)
+; RV32I-MBPLIMIT-NEXT:ret
%val1 = load volatile i32, ptr %b
%tst1 = icmp eq i32 %val1, %a
br i1 %tst1, label %end, label %test2
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/c
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
https://github.com/kyulee-com approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/142584 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
aengelke wrote: Reused an existing test case, this also shows the difference in the resulting block order. If preferred, I can also write a separate test case. https://github.com/llvm/llvm-project/pull/142584 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
https://github.com/aengelke updated
https://github.com/llvm/llvm-project/pull/142584
>From 4cbc231699c11444cff73ff28b88dc0f3835c752 Mon Sep 17 00:00:00 2001
From: Alexis Engelke
Date: Wed, 4 Jun 2025 09:21:02 +
Subject: [PATCH 1/2] Move one check to beginning of function
Created using spr 1.3.5-bogner
---
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 10 +-
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index e96f3f8193b09..2dbabfe345d5e 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1483,6 +1483,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
if (SuccChain.UnscheduledPredecessors == 0)
return false;
+ // Compile-time optimization: runtime is quadratic in the number of
+ // predecessors. For such uncommon cases, exit early.
+ if (Succ->pred_size() > PredecessorLimit)
+return false;
+
// There are two basic scenarios here:
// -
// Case 1: triangular shape CFG (if-then):
@@ -1603,11 +1608,6 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
bool BadCFGConflict = false;
- // Compile-time optimization: runtime is quadratic in the number of
- // predecessors. For such uncommon cases, exit early.
- if (Succ->pred_size() > PredecessorLimit)
-return false;
-
for (MachineBasicBlock *Pred : Succ->predecessors()) {
BlockChain *PredChain = BlockToChain[Pred];
if (Pred == Succ || PredChain == &SuccChain ||
>From e90cfcb5740fc7297e05a876172ad8c25f596a33 Mon Sep 17 00:00:00 2001
From: Alexis Engelke
Date: Fri, 13 Jun 2025 15:43:00 +
Subject: [PATCH 2/2] Test new command line flag
Created using spr 1.3.5-bogner
---
llvm/test/CodeGen/RISCV/branch.ll | 49 +++
1 file changed, 49 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/branch.ll
b/llvm/test/CodeGen/RISCV/branch.ll
index 578080cd3a240..ed86ca8ca4dd1 100644
--- a/llvm/test/CodeGen/RISCV/branch.ll
+++ b/llvm/test/CodeGen/RISCV/branch.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs
-block-placement-predecessor-limit=10 < %s \
+; RUN: | FileCheck -check-prefix=RV32I-MBPLIMIT %s
define void @foo(i32 %a, ptr %b, i1 %c) nounwind {
; RV32I-LABEL: foo:
@@ -48,6 +50,53 @@ define void @foo(i32 %a, ptr %b, i1 %c) nounwind {
; RV32I-NEXT:lw zero, 0(a1)
; RV32I-NEXT: .LBB0_14: # %end
; RV32I-NEXT:ret
+;
+; RV32I-MBPLIMIT-LABEL: foo:
+; RV32I-MBPLIMIT: # %bb.0:
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_2
+; RV32I-MBPLIMIT-NEXT: .LBB0_1: # %end
+; RV32I-MBPLIMIT-NEXT:ret
+; RV32I-MBPLIMIT-NEXT: .LBB0_2: # %test2
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.3: # %test3
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:blt a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.4: # %test4
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bge a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.5: # %test5
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bltu a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.6: # %test6
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bgeu a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.7: # %test7
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:blt a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.8: # %test8
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bge a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.9: # %test9
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bltu a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.10: # %test10
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bgeu a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.11: # %test11
+; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1)
+; RV32I-MBPLIMIT-NEXT:andi a2, a2, 1
+; RV32I-MBPLIMIT-NEXT:bnez a2, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.12: # %test12
+; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bgez a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.13: # %test13
+; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1)
+; RV32I-MBPLIMIT-NEXT:blez a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT: # %bb.14: # %test14
+; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1)
+; RV32I-MBPLIMIT-NEXT:ret
%val1 = load volatile i32, ptr %b
%tst1 = icmp eq i32 %val1, %a
br i1 %tst1, label %end, label %test2
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/c
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
kyulee-com wrote: Can we add a LIT test case using this flag? I think you could set it with a smaller number to create a test case. https://github.com/llvm/llvm-project/pull/142584 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
https://github.com/spupyrev approved this pull request. I don't have objections but maybe wait what others say. My assumption is that this is really needed to speed up some artificial IRs which are not (or rarely) observed in practice; for such cases it doesn't matter too much what this pass produces. However, if that's not the case and we do care about the result, then we probably should just optimize the function, instead of adding arbitrary thresholds. https://github.com/llvm/llvm-project/pull/142584 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
aengelke wrote: > Adding this threshold check within isTrellis() feels somewhat unnatural. If > compile time is a concern, could we simply check the size of functions (in > terms of the number of blocks, as opposed to predecessor only) early in this > pass and either skip it or switch to a faster, simpler algorithm? Is such an algorithm already implemented and readily available or would it require implementing a new algorithm? (I would prefer to keep changes locally and not abruptly degrade/change the output of the entire function, so I placed the checks as closely to the relevant points.) > Also 1000 size seems small, may be 1? Some measurement data, left without this patch, right with. 1000 is the region were it becomes somewhat noticeable and at 3000 blocks, can already end up with ~2x change of compile time... a threshold of 10k would be way too high in my opinion. ``` 500: 0.06 0.06 1000: 0.11 0.09 2000: 0.26 0.18 3000: 0.49 0.27 4000: 0.70 0.37 5000: 1.14 0.47 1: 3.86 1.09 ``` https://github.com/llvm/llvm-project/pull/142584 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
kyulee-com wrote: Adding this threshold check within `isTrellis()` feels somewhat unnatural. If compile time is a concern, could we simply check the size of functions (in terms of the number of blocks, as opposed to predecessor only) early in this pass and either skip it or switch to a faster, simpler algorithm? Also 1000 size seems small, may be 1? https://github.com/llvm/llvm-project/pull/142584 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
@@ -1592,6 +1603,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb; bool BadCFGConflict = false; + // Compile-time optimization: runtime is quadratic in the number of aengelke wrote: Done https://github.com/llvm/llvm-project/pull/142584 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
https://github.com/aengelke updated
https://github.com/llvm/llvm-project/pull/142584
>From 4cbc231699c11444cff73ff28b88dc0f3835c752 Mon Sep 17 00:00:00 2001
From: Alexis Engelke
Date: Wed, 4 Jun 2025 09:21:02 +
Subject: [PATCH] Move one check to beginning of function
Created using spr 1.3.5-bogner
---
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 10 +-
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index e96f3f8193b09..2dbabfe345d5e 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1483,6 +1483,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
if (SuccChain.UnscheduledPredecessors == 0)
return false;
+ // Compile-time optimization: runtime is quadratic in the number of
+ // predecessors. For such uncommon cases, exit early.
+ if (Succ->pred_size() > PredecessorLimit)
+return false;
+
// There are two basic scenarios here:
// -
// Case 1: triangular shape CFG (if-then):
@@ -1603,11 +1608,6 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
bool BadCFGConflict = false;
- // Compile-time optimization: runtime is quadratic in the number of
- // predecessors. For such uncommon cases, exit early.
- if (Succ->pred_size() > PredecessorLimit)
-return false;
-
for (MachineBasicBlock *Pred : Succ->predecessors()) {
BlockChain *PredChain = BlockToChain[Pred];
if (Pred == Succ || PredChain == &SuccChain ||
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
