[llvm-bugs] [Bug 107086] [X86] Useless SIMD<->GPR transfers for f16 arithmetic

LLVM Bugs via llvm-bugs Tue, 03 Sep 2024 04:16:14 -0700

Issue	107086
Summary	[X86] Useless SIMD<->GPR transfers for f16 arithmetic
Labels	backend:X86, missed-optimization
Assignees
Reporter	RKSimon

    https://simd.godbolt.org/z/MfT8oe61d
```ll
define half @test_half_add(half %a0, half %a1) {
  %res = fadd half %a0, %a1
 ret half %res
}


define void @test_half_add_store(half %a0, half %a1, ptr %p0) {
  %res = fadd half %a0, %a1
  store half %res, ptr %p0, align 2
  ret void
}
```
Not only do we perform unnecessary transfers from/to the xmm registers, when we fail to fold the store into a vpextrw
```asm
test_half_add:                          # @test_half_add
        vpextrw $0, %xmm1, %eax
        vmovd   %eax, %xmm1
        vpextrw $0, %xmm0, %eax
        vcvtph2ps       %xmm1, %xmm0
        vmovd   %eax, %xmm1
        vcvtph2ps       %xmm1, %xmm1
        vaddss  %xmm0, %xmm1, %xmm0
        vcvtps2ph       $4, %xmm0, %xmm0
        vmovd   %xmm0, %eax
        vpinsrw $0, %eax, %xmm0, %xmm0
        retq
test_half_add_store:                    # @test_half_add_store
        vpextrw $0, %xmm0, %eax
        vpextrw $0, %xmm1, %ecx
        vmovd   %ecx, %xmm0
        vmovd   %eax, %xmm1
        vcvtph2ps       %xmm0, %xmm0
        vcvtph2ps %xmm1, %xmm1
        vaddss  %xmm0, %xmm1, %xmm0
        vcvtps2ph $4, %xmm0, %xmm0
        vmovd   %xmm0, %eax
        movw    %ax, (%rdi)
        retq
```

_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

[llvm-bugs] [Bug 107086] [X86] Useless SIMD<->GPR transfers for f16 arithmetic

Reply via email to