Issue |
155402
|
Summary |
Why was the cold block placed inside the hot loop? It appears that the MachineBlockPlacement pass did not perform effective optimization.
|
Labels |
new issue
|
Assignees |
|
Reporter |
Ganwenzhao
|
```cpp
extern int verify();
extern int slowpath();
extern int fastpath();
extern void other_work();
int test(int k, int t) {
int res = 0;
int ret_no = 0;
// outer loop
for (int i = 0; i < k; i++) {
// inner loop for verify something
while (t-- >= 0) {
ret_no = verify();
// unlikey branch, execute slowpath
if (ret_no < 0) [[unlikely]] {
res += slowpath();
goto do_work;
}
}
// verify ok, execute fastpath
res += fastpath();
do_work:
other_work();
}
return res;
}
```
As shown in the example above, running the following command (llc version 19.1.7) reveals that
the cold block %bb.5 remains inside the loop.
clang++ -c -O2 -target aarch64-linux-gnu -emit-llvm -S test.cc -o test.ll
llc -O3 -mtriple=aarch64-none-linux-gnu -filetype=asm test.ll -o test.s
```asm
// %bb.1: // %.preheader
mov w19, w0
mov w21, w1
mov w22, wzr
mov w20, wzr
.LBB0_2: // =>This Loop Header: Depth=1
// Child Loop BB0_3 Depth 2
cmn w21, #1
csinv w8, w21, wzr, lt
sub w23, w8, #1
.LBB0_3: // Parent Loop BB0_2 Depth=1
// => This Inner Loop Header: Depth=2
tbnz w21, #31, .LBB0_6
// %bb.4: // in Loop: Header=BB0_3 Depth=2
sub w21, w21, #1
bl _Z6verifyv
tbz w0, #31, .LBB0_3
// %bb.5: // in Loop: Header=BB0_2 Depth=1
bl _Z8slowpathv
add w20, w0, w20
bl _Z10other_workv
add w22, w22, #1
cmp w22, w19
b.ne .LBB0_2
b .LBB0_7
.LBB0_6: // in Loop: Header=BB0_2 Depth=1
bl _Z8fastpathv
mov w21, w23
add w20, w0, w20
bl _Z10other_workv
add w22, w22, #1
cmp w22, w19
b.ne .LBB0_2
```
The following layout appears to be more efficient.
```asm
// %bb.1: // %.preheader
mov w19, w0
mov w21, w1
mov w22, wzr
mov w20, wzr
.LBB0_2: // =>This Loop Header: Depth=1
// Child Loop BB0_3 Depth 2
cmn w21, #1
csinv w8, w21, wzr, lt
sub w23, w8, #1
.LBB0_3: // Parent Loop BB0_2 Depth=1
// => This Inner Loop Header: Depth=2
tbnz w21, #31, .LBB0_5
// %bb.4: // in Loop: Header=BB0_3 Depth=2
sub w21, w21, #1
bl _Z6verifyv
tbz w0, #31, .LBB0_3
b .LBB0_9
.LBB0_5: // in Loop: Header=BB0_2 Depth=1
bl _Z8fastpathv
mov w21, w23
.LBB0_6: // in Loop: Header=BB0_2 Depth=1
add w20, w0, w20
bl _Z10other_workv
add w22, w22, #1
cmp w22, w19
b.ne .LBB0_2
// %bb.7:
....... // igonre some instructions
ret
.LBB0_8:
....... // igonre some instructions
ret
.LBB0_9: // in Loop: Header=BB0_2 Depth=1
.cfi_restore_state
bl _Z8slowpathv
b .LBB0_6
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs