| Issue |
164550
|
| Summary |
clang missed optimize imul to add with const propagation
|
| Labels |
clang
|
| Assignees |
|
| Reporter |
rockeet
|
```c++
#include <immintrin.h>
typedef unsigned long ulong;
ulong getuint(const char* ptr, ulong width, ulong i) {
[[assume(width <= 64)]];
auto offset = (width * i) % 8u;
auto u = *(long*)(ptr + (width * i) / 8u);
return _bextr_u64(u, offset, unsigned(width));
}
struct Pair { ulong x, y; };
Pair getuint_pair(const char* ptr, ulong width, ulong i) {
[[assume(width <= 64)]];
return {getuint(ptr, width, i), getuint(ptr, width, i+1)};
}
```
For `getuint_pair`, clang generate 2 `imul` instructions, while gcc generate one `imul` and one `add`:
<table>
<tr>
<td>clang(trunk)</td>
<td>gcc(13.4)</td>
</tr>
<tr>
<td>
```nasm
getuint(char const*, unsigned long, unsigned long):
imul rdx, rsi
mov rax, rdx
shr rax, 3
and edx, 7
shl esi, 8
or esi, edx
bextr rax, qword ptr [rdi + rax], rsi
ret
getuint_pair(char const*, unsigned long, unsigned long):
mov rax, rdx
imul rax, rsi
mov rcx, rax
shr rcx, 3
and eax, 7
inc rdx
imul rdx, rsi
shl esi, 8
or eax, esi
bextr rax, qword ptr [rdi + rcx], rax
mov rcx, rdx
shr rcx, 3
and edx, 7
or edx, esi
bextr rdx, qword ptr [rdi + rcx], rdx
ret
```
</td>
<td>
```nasm
getuint(char const*, unsigned long, unsigned long):
imul rdx, rsi
sal esi, 8
mov rax, rdx
and edx, 7
shr rax, 3
or esi, edx
bextr rax, QWORD PTR [rdi+rax], rsi
ret
getuint_pair(char const*, unsigned long, unsigned long):
imul rdx, rsi
mov ecx, esi
sal ecx, 8
mov rax, rdx
mov r8, rdx
add rsi, rdx
and eax, 7
shr r8, 3
or eax, ecx
bextr rax, QWORD PTR [rdi+r8], rax
mov r8, rsi
and esi, 7
or ecx, esi
shr r8, 3
mov edx, ecx
bextr rdx, QWORD PTR [rdi+r8], rdx
ret
```
</td>
</tr>
</table>
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs