Author: Simon Pilgrim Date: 2021-01-22T15:47:23Z New Revision: c33d36e0667e7fff186243ac7a3a9cd63e797438
URL: https://github.com/llvm/llvm-project/commit/c33d36e0667e7fff186243ac7a3a9cd63e797438 DIFF: https://github.com/llvm/llvm-project/commit/c33d36e0667e7fff186243ac7a3a9cd63e797438.diff LOG: [X86][AVX] canonicalizeLaneShuffleWithRepeatedOps - handle unary vperm2x128(permute/shift(x,c),undef) cases Fold vperm2x128(permute/shift(x,c),undef) -> permute/shift(vperm2x128(x,undef),c) Added: Modified: llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/CodeGen/X86/avx-splat.ll llvm/test/CodeGen/X86/extract-concat.ll llvm/test/CodeGen/X86/haddsub-4.ll llvm/test/CodeGen/X86/known-signbits-vector.ll llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll llvm/test/CodeGen/X86/vector-shuffle-combining.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 577745c42d81..90ed8c920565 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36918,19 +36918,21 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V, EVT SrcVT0 = Src0.getValueType(); EVT SrcVT1 = Src1.getValueType(); - // TODO: Under what circumstances should we push perm2f128 up when we have one - // active src? - if (SrcOpc0 != SrcOpc1 || SrcVT0 != SrcVT1) + if (!Src1.isUndef() && (SrcVT0 != SrcVT1 || SrcOpc0 != SrcOpc1)) return SDValue(); switch (SrcOpc0) { case X86ISD::VSHLI: case X86ISD::VSRLI: case X86ISD::VSRAI: - if (Src0.getOperand(1) == Src1.getOperand(1)) { - SDValue Res = DAG.getNode( - X86ISD::VPERM2X128, DL, VT, DAG.getBitcast(VT, Src0.getOperand(0)), - DAG.getBitcast(VT, Src1.getOperand(0)), V.getOperand(2)); + case X86ISD::PSHUFD: + case X86ISD::VPERMILPI: + if (Src1.isUndef() || Src0.getOperand(1) == Src1.getOperand(1)) { + SDValue LHS = DAG.getBitcast(VT, Src0.getOperand(0)); + SDValue RHS = + DAG.getBitcast(VT, Src1.isUndef() ? Src1 : Src1.getOperand(0)); + SDValue Res = + DAG.getNode(X86ISD::VPERM2X128, DL, VT, LHS, RHS, V.getOperand(2)); Res = DAG.getNode(SrcOpc0, DL, SrcVT0, DAG.getBitcast(SrcVT0, Res), Src0.getOperand(1)); return DAG.getBitcast(VT, Res); diff --git a/llvm/test/CodeGen/X86/avx-splat.ll b/llvm/test/CodeGen/X86/avx-splat.ll index 3755cf4740ab..7602975c8872 100644 --- a/llvm/test/CodeGen/X86/avx-splat.ll +++ b/llvm/test/CodeGen/X86/avx-splat.ll @@ -157,8 +157,8 @@ entry: define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { ; CHECK-LABEL: funcH: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5] ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5] ; CHECK-NEXT: ret{{[l|q]}} entry: %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> diff --git a/llvm/test/CodeGen/X86/extract-concat.ll b/llvm/test/CodeGen/X86/extract-concat.ll index 26e07d86bfc3..49ac851d88fc 100644 --- a/llvm/test/CodeGen/X86/extract-concat.ll +++ b/llvm/test/CodeGen/X86/extract-concat.ll @@ -70,12 +70,12 @@ define <16 x i64> @catcat(<4 x i64> %x) { ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,2,3] -; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3,2,3] ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm4 -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: vpermilpd {{.*#+}} ymm3 = ymm0[1,1,3,3] ; AVX1-NEXT: vmovaps %ymm4, %ymm0 ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/haddsub-4.ll b/llvm/test/CodeGen/X86/haddsub-4.ll index 6003f98b9371..2e077d6247ba 100644 --- a/llvm/test/CodeGen/X86/haddsub-4.ll +++ b/llvm/test/CodeGen/X86/haddsub-4.ll @@ -65,8 +65,8 @@ define <8 x float> @hadd_reverse_v8f32(<8 x float> %a0, <8 x float> %a1) { ; AVX1-LABEL: hadd_reverse_v8f32: ; AVX1: # %bb.0: ; AVX1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] ; AVX1-NEXT: retq ; ; AVX2-LABEL: hadd_reverse_v8f32: @@ -97,10 +97,10 @@ define <8 x float> @hadd_reverse2_v8f32(<8 x float> %a0, <8 x float> %a1) { ; ; AVX1-LABEL: hadd_reverse2_v8f32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4] +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4] ; AVX1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -135,8 +135,8 @@ define <8 x float> @hadd_reverse3_v8f32(<8 x float> %a0, <8 x float> %a1) { ; AVX1-LABEL: hadd_reverse3_v8f32: ; AVX1: # %bb.0: ; AVX1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: retq ; ; AVX2-LABEL: hadd_reverse3_v8f32: @@ -299,15 +299,15 @@ define <8 x double> @hadd_reverse2_v8f64(<8 x double> %a0, <8 x double> %a1) nou ; ; AVX1-LABEL: hadd_reverse2_v8f64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,0,3,2] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3,0,1] -; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm2[1,0,3,2] +; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX1-NEXT: vpermilpd {{.*#+}} ymm4 = ymm1[1,0,3,2] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3,0,1] +; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,0,3,2] ; AVX1-NEXT: vhaddpd %ymm1, %ymm0, %ymm1 -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm3[1,0,3,2] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm3[2,3,0,1] +; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; AVX1-NEXT: vhaddpd %ymm0, %ymm4, %ymm0 ; AVX1-NEXT: retq ; @@ -400,15 +400,15 @@ define <16 x float> @hadd_reverse2_v16f32(<16 x float> %a0, <16 x float> %a1) no ; ; AVX1-LABEL: hadd_reverse2_v16f32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3,0,1] -; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm2[3,2,1,0,7,6,5,4] +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX1-NEXT: vpermilps {{.*#+}} ymm4 = ymm1[3,2,1,0,7,6,5,4] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3,0,1] +; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4] ; AVX1-NEXT: vhaddps %ymm1, %ymm0, %ymm1 -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm3[3,2,1,0,7,6,5,4] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm3[2,3,0,1] +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: vhaddps %ymm0, %ymm4, %ymm0 ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll index 150b1bd98cb8..945e12b34450 100644 --- a/llvm/test/CodeGen/X86/known-signbits-vector.ll +++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll @@ -253,10 +253,10 @@ define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) ; X86: # %bb.0: ; X86-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X86-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 -; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2] +; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] ; X86-NEXT: vcvtdq2pd %xmm0, %ymm0 ; X86-NEXT: retl ; @@ -264,10 +264,10 @@ define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X64-AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; X64-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2] +; X64-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] ; X64-AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 ; X64-AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index 9c237f2a9b12..38600884262c 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -212,8 +212,8 @@ define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) { define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) { ; AVX1-LABEL: shuffle_v4f64_3210: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4f64_3210: @@ -606,8 +606,8 @@ define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) { define <4 x double> @shuffle_v4f64_3333(<4 x double> %a, <4 x double> %b) { ; AVX1-LABEL: shuffle_v4f64_3333: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4f64_3333: @@ -897,8 +897,8 @@ define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) { define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_3210: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4i64_3210: @@ -1383,8 +1383,8 @@ define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) { define <4 x i64> @shuffle_v4i64_3333(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_3333: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4i64_3333: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll index 5f2a3cd72b71..5d53eb48a67f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -804,11 +804,11 @@ define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) { ; AVX1-LABEL: shuffle_v8f32_f511235a: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5] ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[3],ymm0[3] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6] ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] ; AVX1-NEXT: retq ; @@ -878,8 +878,8 @@ define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) { ; AVX1-LABEL: shuffle_v8f32_76547654: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: retq ; ; AVX2-SLOW-LABEL: shuffle_v8f32_76547654: @@ -914,8 +914,8 @@ define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) { ; AVX1-LABEL: shuffle_v8f32_76543210: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: retq ; ; AVX2-SLOW-LABEL: shuffle_v8f32_76543210: @@ -1242,8 +1242,8 @@ define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) { ; AVX1-LABEL: shuffle_v8f32_44444444: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: shuffle_v8f32_44444444: @@ -2255,10 +2255,10 @@ define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) { ; AVX1-LABEL: shuffle_v8i32_6caa87e5: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] ; AVX1-NEXT: retq ; @@ -2327,8 +2327,8 @@ define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) { ; AVX1-LABEL: shuffle_v8i32_76547654: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: retq ; ; AVX2-SLOW-LABEL: shuffle_v8i32_76547654: @@ -2363,8 +2363,8 @@ define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) { ; AVX1-LABEL: shuffle_v8i32_76543210: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: retq ; ; AVX2-SLOW-LABEL: shuffle_v8i32_76543210: @@ -2662,8 +2662,8 @@ define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) { ; AVX1-LABEL: shuffle_v8i32_44444444: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: shuffle_v8i32_44444444: @@ -2678,8 +2678,8 @@ define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_44444444_bc(<8 x float> %a, <8 x float> %b) { ; AVX1-LABEL: shuffle_v8i32_44444444_bc: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: shuffle_v8i32_44444444_bc: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 1393cf256028..a73427a3220e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -2392,7 +2392,6 @@ define <8 x i32> @combine_unneeded_subvector1(<8 x i32> %a) { ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] ; AVX1-NEXT: retq ; ; AVX2-SLOW-LABEL: combine_unneeded_subvector1: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits