| Issue |
107086
|
| Summary |
[X86] Useless SIMD<->GPR transfers for f16 arithmetic
|
| Labels |
backend:X86,
missed-optimization
|
| Assignees |
|
| Reporter |
RKSimon
|
https://simd.godbolt.org/z/MfT8oe61d
```ll
define half @test_half_add(half %a0, half %a1) {
%res = fadd half %a0, %a1
ret half %res
}
define void @test_half_add_store(half %a0, half %a1, ptr %p0) {
%res = fadd half %a0, %a1
store half %res, ptr %p0, align 2
ret void
}
```
Not only do we perform unnecessary transfers from/to the xmm registers, when we fail to fold the store into a vpextrw
```asm
test_half_add: # @test_half_add
vpextrw $0, %xmm1, %eax
vmovd %eax, %xmm1
vpextrw $0, %xmm0, %eax
vcvtph2ps %xmm1, %xmm0
vmovd %eax, %xmm1
vcvtph2ps %xmm1, %xmm1
vaddss %xmm0, %xmm1, %xmm0
vcvtps2ph $4, %xmm0, %xmm0
vmovd %xmm0, %eax
vpinsrw $0, %eax, %xmm0, %xmm0
retq
test_half_add_store: # @test_half_add_store
vpextrw $0, %xmm0, %eax
vpextrw $0, %xmm1, %ecx
vmovd %ecx, %xmm0
vmovd %eax, %xmm1
vcvtph2ps %xmm0, %xmm0
vcvtph2ps %xmm1, %xmm1
vaddss %xmm0, %xmm1, %xmm0
vcvtps2ph $4, %xmm0, %xmm0
vmovd %xmm0, %eax
movw %ax, (%rdi)
retq
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs