Issue 83289
Summary [X86] [ISEL] Redundant instructions selected for storing vector of floats
Labels new issue
Assignees
Reporter nurmukhametov
    Consider the following code (does `A = max(A, B)` for vectors of `float`):
```
declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) #0

define void @foo(float* noalias nocapture %A, float* noalias nocapture readonly %B) local_unnamed_addr {
allocas:
  %V1_load_bitcast_load.unpack.unpack.i = load float, float* %A, align 4
  %V1_load_bitcast_load.unpack.elt17.i = getelementptr inbounds float, float* %A, i64 1
 %V1_load_bitcast_load.unpack.unpack18.i = load float, float* %V1_load_bitcast_load.unpack.elt17.i, align 4
 %V1_load_bitcast_load.unpack.elt19.i = getelementptr inbounds float, float* %A, i64 2
  %V1_load_bitcast_load.unpack.unpack20.i = load float, float* %V1_load_bitcast_load.unpack.elt19.i, align 4
 %V1_load_bitcast_load.unpack.elt21.i = getelementptr inbounds float, float* %A, i64 3
  %V1_load_bitcast_load.unpack.unpack22.i = load float, float* %V1_load_bitcast_load.unpack.elt21.i, align 4
  %S0.0.vec.insert.i = insertelement <8 x float> undef, float %V1_load_bitcast_load.unpack.unpack.i, i64 0
  %S0.4.vec.insert.i = insertelement <8 x float> %S0.0.vec.insert.i, float %V1_load_bitcast_load.unpack.unpack18.i, i64 1
  %S0.8.vec.insert.i = insertelement <8 x float> %S0.4.vec.insert.i, float %V1_load_bitcast_load.unpack.unpack20.i, i64 2
  %S0.12.vec.insert.i = insertelement <8 x float> %S0.8.vec.insert.i, float %V1_load_bitcast_load.unpack.unpack22.i, i64 3
 %V2_load_bitcast_load.unpack.unpack.i = load float, float* %B, align 4
 %V2_load_bitcast_load.unpack.elt25.i = getelementptr inbounds float, float* %B, i64 1
  %V2_load_bitcast_load.unpack.unpack26.i = load float, float* %V2_load_bitcast_load.unpack.elt25.i, align 4
 %V2_load_bitcast_load.unpack.elt27.i = getelementptr inbounds float, float* %B, i64 2
  %V2_load_bitcast_load.unpack.unpack28.i = load float, float* %V2_load_bitcast_load.unpack.elt27.i, align 4
 %V2_load_bitcast_load.unpack.elt29.i = getelementptr inbounds float, float* %B, i64 3
  %V2_load_bitcast_load.unpack.unpack30.i = load float, float* %V2_load_bitcast_load.unpack.elt29.i, align 4
  %S1.0.vec.insert.i = insertelement <8 x float> undef, float %V2_load_bitcast_load.unpack.unpack.i, i64 0
  %S1.4.vec.insert.i = insertelement <8 x float> %S1.0.vec.insert.i, float %V2_load_bitcast_load.unpack.unpack26.i, i64 1
  %S1.8.vec.insert.i = insertelement <8 x float> %S1.4.vec.insert.i, float %V2_load_bitcast_load.unpack.unpack28.i, i64 2
  %S1.12.vec.insert.i = insertelement <8 x float> %S1.8.vec.insert.i, float %V2_load_bitcast_load.unpack.unpack30.i, i64 3
  %call.i.i.i = tail call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %S0.12.vec.insert.i, <8 x float> %S1.12.vec.insert.i)
  %Result.0.vec.extract.i = extractelement <8 x float> %call.i.i.i, i64 0
  %Result.4.vec.extract.i = extractelement <8 x float> %call.i.i.i, i64 1
  %Result.8.vec.extract.i = extractelement <8 x float> %call.i.i.i, i64 2
  %Result.12.vec.extract.i = extractelement <8 x float> %call.i.i.i, i64 3
  store float %Result.0.vec.extract.i, float* %A, align 4
  store float %Result.4.vec.extract.i, float* %V1_load_bitcast_load.unpack.elt17.i, align 4
  store float %Result.8.vec.extract.i, float* %V1_load_bitcast_load.unpack.elt19.i, align 4
  store float %Result.12.vec.extract.i, float* %V1_load_bitcast_load.unpack.elt21.i, align 4
  ret void
}
```

When built as follow:
```
llc -O3 -march=x86-64 -mcpu=core-avx2 -mattr=avx2 -x86-asm-syntax=intel t.ll -o -
```

contains redundant instructions `vmovd` and `vpinsrd`:

```asm
        vmovups xmm0, xmmword ptr [rdi]
 vmaxps  xmm0, xmm0, xmmword ptr [rsi]
        vmovd   eax, xmm0
 vpinsrd xmm0, xmm0, eax, 0
        vmovdqu xmmword ptr [rdi], xmm0
 ret
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to