https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122377
Bug ID: 122377
Summary: gcc suboptimal register allocation
Product: gcc
Version: 16.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: rockeet at gmail dot com
Target Milestone: ---
#include <immintrin.h>
typedef unsigned long ulong;
ulong getuint(const char* ptr, ulong width, ulong i) {
[[assume(width <= 64)]];
auto offset = (width * i) % 8u;
auto u = *(long*)(ptr + (width * i) / 8u);
return _bextr_u64(u, offset, unsigned(width));
}
struct Pair { ulong x, y; };
Pair getuint_pair(const char* ptr, ulong width, ulong i) {
[[assume(width <= 64)]];
return {getuint(ptr, width, i), getuint(ptr, width, i+1)};
}
-------------
In getuint_pair, gcc(trunk) has suboptimal register allocation issue:
-------------
"getuint_pair(char const*, unsigned long, unsigned long)":
imul rdx, rsi
mov eax, esi
sal eax, 8
add rsi, rdx
mov rcx, rdx
mov r8, rdx
and ecx, 7
mov rdx, rsi
and esi, 7
shr r8, 3
or ecx, eax
shr rdx, 3
or eax, esi
bextr rax, QWORD PTR [rdi+rdx], rax
bextr rcx, QWORD PTR [rdi+r8], rcx
mov rdx, rax
mov rax, rcx
ret
-------------
The last 4 instructions should be:
-------------
bextr rdx, QWORD PTR [rdi+rdx], rax
bextr rax, QWORD PTR [rdi+r8], rcx