| Issue |
83289
|
| Summary |
[X86] [ISEL] Redundant instructions selected for storing vector of floats
|
| Labels |
new issue
|
| Assignees |
|
| Reporter |
nurmukhametov
|
Consider the following code (does `A = max(A, B)` for vectors of `float`):
```
declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) #0
define void @foo(float* noalias nocapture %A, float* noalias nocapture readonly %B) local_unnamed_addr {
allocas:
%V1_load_bitcast_load.unpack.unpack.i = load float, float* %A, align 4
%V1_load_bitcast_load.unpack.elt17.i = getelementptr inbounds float, float* %A, i64 1
%V1_load_bitcast_load.unpack.unpack18.i = load float, float* %V1_load_bitcast_load.unpack.elt17.i, align 4
%V1_load_bitcast_load.unpack.elt19.i = getelementptr inbounds float, float* %A, i64 2
%V1_load_bitcast_load.unpack.unpack20.i = load float, float* %V1_load_bitcast_load.unpack.elt19.i, align 4
%V1_load_bitcast_load.unpack.elt21.i = getelementptr inbounds float, float* %A, i64 3
%V1_load_bitcast_load.unpack.unpack22.i = load float, float* %V1_load_bitcast_load.unpack.elt21.i, align 4
%S0.0.vec.insert.i = insertelement <8 x float> undef, float %V1_load_bitcast_load.unpack.unpack.i, i64 0
%S0.4.vec.insert.i = insertelement <8 x float> %S0.0.vec.insert.i, float %V1_load_bitcast_load.unpack.unpack18.i, i64 1
%S0.8.vec.insert.i = insertelement <8 x float> %S0.4.vec.insert.i, float %V1_load_bitcast_load.unpack.unpack20.i, i64 2
%S0.12.vec.insert.i = insertelement <8 x float> %S0.8.vec.insert.i, float %V1_load_bitcast_load.unpack.unpack22.i, i64 3
%V2_load_bitcast_load.unpack.unpack.i = load float, float* %B, align 4
%V2_load_bitcast_load.unpack.elt25.i = getelementptr inbounds float, float* %B, i64 1
%V2_load_bitcast_load.unpack.unpack26.i = load float, float* %V2_load_bitcast_load.unpack.elt25.i, align 4
%V2_load_bitcast_load.unpack.elt27.i = getelementptr inbounds float, float* %B, i64 2
%V2_load_bitcast_load.unpack.unpack28.i = load float, float* %V2_load_bitcast_load.unpack.elt27.i, align 4
%V2_load_bitcast_load.unpack.elt29.i = getelementptr inbounds float, float* %B, i64 3
%V2_load_bitcast_load.unpack.unpack30.i = load float, float* %V2_load_bitcast_load.unpack.elt29.i, align 4
%S1.0.vec.insert.i = insertelement <8 x float> undef, float %V2_load_bitcast_load.unpack.unpack.i, i64 0
%S1.4.vec.insert.i = insertelement <8 x float> %S1.0.vec.insert.i, float %V2_load_bitcast_load.unpack.unpack26.i, i64 1
%S1.8.vec.insert.i = insertelement <8 x float> %S1.4.vec.insert.i, float %V2_load_bitcast_load.unpack.unpack28.i, i64 2
%S1.12.vec.insert.i = insertelement <8 x float> %S1.8.vec.insert.i, float %V2_load_bitcast_load.unpack.unpack30.i, i64 3
%call.i.i.i = tail call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %S0.12.vec.insert.i, <8 x float> %S1.12.vec.insert.i)
%Result.0.vec.extract.i = extractelement <8 x float> %call.i.i.i, i64 0
%Result.4.vec.extract.i = extractelement <8 x float> %call.i.i.i, i64 1
%Result.8.vec.extract.i = extractelement <8 x float> %call.i.i.i, i64 2
%Result.12.vec.extract.i = extractelement <8 x float> %call.i.i.i, i64 3
store float %Result.0.vec.extract.i, float* %A, align 4
store float %Result.4.vec.extract.i, float* %V1_load_bitcast_load.unpack.elt17.i, align 4
store float %Result.8.vec.extract.i, float* %V1_load_bitcast_load.unpack.elt19.i, align 4
store float %Result.12.vec.extract.i, float* %V1_load_bitcast_load.unpack.elt21.i, align 4
ret void
}
```
When built as follow:
```
llc -O3 -march=x86-64 -mcpu=core-avx2 -mattr=avx2 -x86-asm-syntax=intel t.ll -o -
```
contains redundant instructions `vmovd` and `vpinsrd`:
```asm
vmovups xmm0, xmmword ptr [rdi]
vmaxps xmm0, xmm0, xmmword ptr [rsi]
vmovd eax, xmm0
vpinsrd xmm0, xmm0, eax, 0
vmovdqu xmmword ptr [rdi], xmm0
ret
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs