| Issue |
83414
|
| Summary |
[X86] Failure to merge X86ISD::CVTPH2PS nodes
|
| Labels |
backend:X86,
missed-optimization
|
| Assignees |
|
| Reporter |
RKSimon
|
```ll
define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) {
%cvt = fptosi <2 x half> %a to <2 x i32>
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %ext
}
```
llc -mcpu=x86-64-v3
```asm
fptosi_2f16_to_4i32: # @fptosi_2f16_to_4i32
vpshufb .LCPI0_0(%rip), %xmm0, %xmm1 # xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
vcvtph2ps %xmm1, %xmm1
vpmovzxwq %xmm0, %xmm0 # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
vcvtph2ps %xmm0, %xmm0
vunpcklps %xmm1, %xmm0, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
vcvttps2dq %xmm0, %xmm0
vmovq %xmm0, %xmm0 # xmm0 = xmm0[0],zero
retq
```
Latest trunk now gives the above assembly, ideally we would only have a single vcvtph2ps node, and avoid all the shuffles which are just trying to move elements into the lowest element:
```asm
fptosi_2f16_to_4i32: # @fptosi_2f16_to_4i32
vcvtph2ps %xmm0, %xmm0
vcvttps2dq %xmm0, %xmm0
vmovq %xmm0, %xmm0 # xmm0 = xmm0[0],zero
retq
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs