Issue 83414
Summary [X86] Failure to merge X86ISD::CVTPH2PS nodes
Labels backend:X86, missed-optimization
Assignees
Reporter RKSimon
    ```ll
define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) {
  %cvt = fptosi <2 x half> %a to <2 x i32>
  %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i32> %ext
}
```
llc -mcpu=x86-64-v3
```asm
fptosi_2f16_to_4i32: # @fptosi_2f16_to_4i32
	vpshufb	.LCPI0_0(%rip), %xmm0, %xmm1    # xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
	vcvtph2ps	%xmm1, %xmm1
	vpmovzxwq	%xmm0, %xmm0            # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
	vcvtph2ps	%xmm0, %xmm0
	vunpcklps	%xmm1, %xmm0, %xmm0     # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
	vcvttps2dq	%xmm0, %xmm0
	vmovq	%xmm0, %xmm0                    # xmm0 = xmm0[0],zero
	retq
```
Latest trunk now gives the above assembly, ideally we would only have a single vcvtph2ps node, and avoid all the shuffles which are just trying to move elements into the lowest element:
```asm
fptosi_2f16_to_4i32:                    # @fptosi_2f16_to_4i32
	vcvtph2ps	%xmm0, %xmm0
	vcvttps2dq	%xmm0, %xmm0
	vmovq	%xmm0, %xmm0                    # xmm0 = xmm0[0],zero
	retq
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to