| Issue |
58212
|
| Summary |
[ARM] divmod decomposition prevents __aeabi_idivmod
|
| Labels |
|
| Assignees |
|
| Reporter |
easyaspi314
|
LLVM will always decompose `div` + `rem` on ARM to `div` + `mul` + `sub` when optimizations are enabled.
This makes sense on targets with division, but on targets without division it breaks the conversion to `__aeabi_[u]idivmod`:
```llvm
define void @divmod(i32 %num, i32 %den, ptr %out0) {
%quo = udiv i32 %num, %den
%rem = urem i32 %num, %den
store i32 %quo, i32 * %out0, align 4
%out1 = getelementptr i32 *, ptr %out0, i32 1
store i32 %rem, i32 * %out1, align 4
ret void
}
```
With `--target=armv5te-none-eabi -O0`:
```asm
divmod:
push {r11, lr}
sub sp, sp, #8
str r2, [sp, #4]
bl __aeabi_uidivmod
ldr r2, [sp, #4]
str r0, [r2]
str r1, [r2, #4]
add sp, sp, #8
pop {r11, pc}
```
With `--target=armv5te-none-eabi -O3`:
```asm
divmod:
push {r4, r5, r6, lr}
mov r4, r2
mov r5, r1
mov r6, r0
bl __aeabi_uidiv
mul r1, r0, r5
sub r1, r6, r1
stm r4, {r0, r1}
pop {r4, r5, r6, pc}
```
This is because llvm will "decompose" the div+rem into this:
```llvm
define void @divmod(i32 %num, i32 %den, ptr nocapture writeonly %out0) local_unnamed_addr #0 {
%num.frozen = freeze i32 %num
%den.frozen = freeze i32 %den
%quo = udiv i32 %num.frozen, %den.frozen
%1 = mul i32 %quo, %den.frozen
%rem.decomposed = sub i32 %num.frozen, %1
store i32 %quo, ptr %out0, align 4
%out1 = getelementptr ptr, ptr %out0, i32 1
store i32 %rem.decomposed, ptr %out1, align 4
ret void
}
```
If this optimization didn't occur it would emit something much cleaner, without spilling r0 and r1.
```asm
divmod:
push {r4, lr}
mov r4, r2
bl __aeabi_uidivmod
stm r4!, {r0, r1}
pop {r4, pc}
```
Additionally, `udiv+urem` without optimizations already generates `udiv+mls` on targets with idiv, so this optimization pass is detrimental either way.
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs