Issue 155402
Summary Why was the cold block placed inside the hot loop? It appears that the MachineBlockPlacement pass did not perform effective optimization.
Labels new issue
Assignees
Reporter Ganwenzhao
    ```cpp
extern int verify();
extern int slowpath();
extern int fastpath();
extern void other_work();

int test(int k, int t) {
    int res = 0;
    int ret_no = 0;
    // outer loop
    for (int i = 0; i < k; i++) {
        // inner loop for verify something
        while (t-- >= 0) {
            ret_no = verify();
 // unlikey branch, execute slowpath
            if (ret_no < 0) [[unlikely]] {
                res += slowpath();
                goto do_work;
            }
        }
        // verify ok, execute fastpath
 res += fastpath();

    do_work:
        other_work();
    }

 return res;
}

```

As shown in the example above, running the following command (llc version 19.1.7) reveals that 
the cold block %bb.5 remains inside the loop.

clang++ -c -O2 -target aarch64-linux-gnu -emit-llvm -S test.cc -o test.ll
llc -O3  -mtriple=aarch64-none-linux-gnu -filetype=asm test.ll -o test.s

```asm
// %bb.1:                               // %.preheader
	mov	w19, w0
	mov	w21, w1
	mov	w22, wzr
	mov	w20, wzr
.LBB0_2:                                // =>This Loop Header: Depth=1
 //     Child Loop BB0_3 Depth 2
	cmn	w21, #1
	csinv	w8, w21, wzr, lt
	sub	w23, w8, #1
.LBB0_3: //   Parent Loop BB0_2 Depth=1
 // =>  This Inner Loop Header: Depth=2
	tbnz	w21, #31, .LBB0_6
// %bb.4:                               //   in Loop: Header=BB0_3 Depth=2
	sub	w21, w21, #1
	bl	_Z6verifyv
	tbz	w0, #31, .LBB0_3
// %bb.5:                               //   in Loop: Header=BB0_2 Depth=1
	bl	_Z8slowpathv
	add	w20, w0, w20
	bl	_Z10other_workv
	add	w22, w22, #1
	cmp	w22, w19
	b.ne	.LBB0_2
	b	.LBB0_7
.LBB0_6: //   in Loop: Header=BB0_2 Depth=1
	bl	_Z8fastpathv
	mov	w21, w23
	add	w20, w0, w20
	bl	_Z10other_workv
	add	w22, w22, #1
	cmp	w22, w19
	b.ne	.LBB0_2
```

The following layout appears to be more efficient.

```asm
// %bb.1:                               // %.preheader
	mov	w19, w0
	mov	w21, w1
	mov	w22, wzr
	mov	w20, wzr
.LBB0_2:                                // =>This Loop Header: Depth=1
 //     Child Loop BB0_3 Depth 2
	cmn	w21, #1
	csinv	w8, w21, wzr, lt
	sub	w23, w8, #1
.LBB0_3: //   Parent Loop BB0_2 Depth=1
 // =>  This Inner Loop Header: Depth=2
	tbnz	w21, #31, .LBB0_5
// %bb.4:                               //   in Loop: Header=BB0_3 Depth=2
	sub	w21, w21, #1
	bl	_Z6verifyv
	tbz	w0, #31, .LBB0_3
	b	.LBB0_9
.LBB0_5:                                //   in Loop: Header=BB0_2 Depth=1
	bl	_Z8fastpathv
	mov	w21, w23
.LBB0_6: //   in Loop: Header=BB0_2 Depth=1
	add	w20, w0, w20
	bl	_Z10other_workv
	add	w22, w22, #1
	cmp	w22, w19
	b.ne	.LBB0_2
// %bb.7:
	.......                             // igonre some instructions
	ret
.LBB0_8:
	....... //   igonre some instructions
	ret
.LBB0_9: //   in Loop: Header=BB0_2 Depth=1
	.cfi_restore_state
	bl	_Z8slowpathv
	b	.LBB0_6
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to