Issue 107086
Summary [X86] Useless SIMD<->GPR transfers for f16 arithmetic
Labels backend:X86, missed-optimization
Assignees
Reporter RKSimon
    https://simd.godbolt.org/z/MfT8oe61d
```ll
define half @test_half_add(half %a0, half %a1) {
  %res = fadd half %a0, %a1
 ret half %res
}

define void @test_half_add_store(half %a0, half %a1, ptr %p0) {
  %res = fadd half %a0, %a1
  store half %res, ptr %p0, align 2
  ret void
}
```
Not only do we perform unnecessary transfers from/to the xmm registers, when we fail to fold the store into a vpextrw
```asm
test_half_add:                          # @test_half_add
        vpextrw $0, %xmm1, %eax
        vmovd   %eax, %xmm1
        vpextrw $0, %xmm0, %eax
        vcvtph2ps       %xmm1, %xmm0
        vmovd   %eax, %xmm1
        vcvtph2ps       %xmm1, %xmm1
        vaddss  %xmm0, %xmm1, %xmm0
        vcvtps2ph       $4, %xmm0, %xmm0
        vmovd   %xmm0, %eax
        vpinsrw $0, %eax, %xmm0, %xmm0
        retq
test_half_add_store:                    # @test_half_add_store
        vpextrw $0, %xmm0, %eax
        vpextrw $0, %xmm1, %ecx
        vmovd   %ecx, %xmm0
        vmovd   %eax, %xmm1
        vcvtph2ps       %xmm0, %xmm0
        vcvtph2ps %xmm1, %xmm1
        vaddss  %xmm0, %xmm1, %xmm0
        vcvtps2ph $4, %xmm0, %xmm0
        vmovd   %xmm0, %eax
        movw    %ax, (%rdi)
        retq
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to