[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)
mike.dvoretsky abandoned this revision. mike.dvoretsky added a comment. Closing this due to failure of https://reviews.llvm.org/D45721. https://reviews.llvm.org/D45720 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)
mike.dvoretsky updated this revision to Diff 144126. mike.dvoretsky added a comment. Changed the shuffle mask emission code to match https://reviews.llvm.org/D45721. https://reviews.llvm.org/D45720 Files: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/avx2-builtins.c clang/test/CodeGen/avx512bw-builtins.c clang/test/CodeGen/avx512vlbw-builtins.c clang/test/CodeGen/sse2-builtins.c clang/test/CodeGen/sse41-builtins.c Index: clang/test/CodeGen/sse41-builtins.c === --- clang/test/CodeGen/sse41-builtins.c +++ clang/test/CodeGen/sse41-builtins.c @@ -328,7 +328,12 @@ __m128i test_mm_packus_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_packus_epi32 - // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, zeroinitializer + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> zeroinitializer + // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16> return _mm_packus_epi32(x, y); } Index: clang/test/CodeGen/sse2-builtins.c === --- clang/test/CodeGen/sse2-builtins.c +++ clang/test/CodeGen/sse2-builtins.c @@ -869,19 +869,34 @@ __m128i test_mm_packs_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_packs_epi16 - // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> + // CHECK: %{{.*}} = icmp slt <16 x i16> %{{.*}}, + // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> + // CHECK: %{{.*}} = icmp sgt <16 x i16> %{{.*}}, + // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> + // CHECK: %{{.*}} = trunc <16 x i16> %{{.*}} to <16 x i8> return _mm_packs_epi16(A, B); } __m128i test_mm_packs_epi32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_packs_epi32 - // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16> return _mm_packs_epi32(A, B); } __m128i test_mm_packus_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_packus_epi16 - // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> + // CHECK: %{{.*}} = icmp slt <16 x i16> %{{.*}}, + // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> + // CHECK: %{{.*}} = icmp sgt <16 x i16> %{{.*}}, zeroinitializer + // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> zeroinitializer + // CHECK: %{{.*}} = trunc <16 x i16> %{{.*}} to <16 x i8> return _mm_packus_epi16(A, B); } Index: clang/test/CodeGen/avx512vlbw-builtins.c === --- clang/test/CodeGen/avx512vlbw-builtins.c +++ clang/test/CodeGen/avx512vlbw-builtins.c @@ -970,105 +970,185 @@ __m128i test_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_packs_epi32 - // CHECK: @llvm.x86.sse2.packssdw + // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_packs_epi32(__M,__A,__B); } __m128i test_mm_mask_packs_epi32(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_packs_epi32 - // CHECK: @llvm.x86.sse2.packssdw + // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16> // CHECK: select
[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)
craig.topper accepted this revision. craig.topper added a comment. This revision is now accepted and ready to land. LGTM https://reviews.llvm.org/D45720 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)
mike.dvoretsky updated this revision to Diff 142899. mike.dvoretsky added a comment. Updated per comments. https://reviews.llvm.org/D45720 Files: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/avx2-builtins.c clang/test/CodeGen/avx512bw-builtins.c clang/test/CodeGen/avx512vlbw-builtins.c clang/test/CodeGen/sse2-builtins.c clang/test/CodeGen/sse41-builtins.c Index: clang/test/CodeGen/sse41-builtins.c === --- clang/test/CodeGen/sse41-builtins.c +++ clang/test/CodeGen/sse41-builtins.c @@ -328,7 +328,12 @@ __m128i test_mm_packus_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_packus_epi32 - // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, zeroinitializer + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> zeroinitializer + // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16> return _mm_packus_epi32(x, y); } Index: clang/test/CodeGen/sse2-builtins.c === --- clang/test/CodeGen/sse2-builtins.c +++ clang/test/CodeGen/sse2-builtins.c @@ -869,19 +869,34 @@ __m128i test_mm_packs_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_packs_epi16 - // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> + // CHECK: %{{.*}} = icmp slt <16 x i16> %{{.*}}, + // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> + // CHECK: %{{.*}} = icmp sgt <16 x i16> %{{.*}}, + // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> + // CHECK: %{{.*}} = trunc <16 x i16> %{{.*}} to <16 x i8> return _mm_packs_epi16(A, B); } __m128i test_mm_packs_epi32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_packs_epi32 - // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16> return _mm_packs_epi32(A, B); } __m128i test_mm_packus_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_packus_epi16 - // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> + // CHECK: %{{.*}} = icmp slt <16 x i16> %{{.*}}, + // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> + // CHECK: %{{.*}} = icmp sgt <16 x i16> %{{.*}}, zeroinitializer + // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> zeroinitializer + // CHECK: %{{.*}} = trunc <16 x i16> %{{.*}} to <16 x i8> return _mm_packus_epi16(A, B); } Index: clang/test/CodeGen/avx512vlbw-builtins.c === --- clang/test/CodeGen/avx512vlbw-builtins.c +++ clang/test/CodeGen/avx512vlbw-builtins.c @@ -970,105 +970,185 @@ __m128i test_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_packs_epi32 - // CHECK: @llvm.x86.sse2.packssdw + // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_packs_epi32(__M,__A,__B); } __m128i test_mm_mask_packs_epi32(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_packs_epi32 - // CHECK: @llvm.x86.sse2.packssdw + // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)
mike.dvoretsky added inline comments. Comment at: lib/CodeGen/CGBuiltin.cpp:8443 + Value *MaxVec = llvm::ConstantInt::get(RTy, MaxVal); + Res = EmitX86MinMax(CGF, ICmpInst::ICMP_SLT, {Res, MaxVec}); + Res = EmitX86MinMax(CGF, ICmpInst::ICMP_SGT, {Res, MinVec}); craig.topper wrote: > Why arent' these unsigned compares for Unsigned? The compares are signed on purpose. PACKUS assumes that the input elements are signed, then uses unsigned saturation. So, for instance, an 0x value must be evaluated as -1 and saturated to 0, rather than to 0xff as it would be with unsigned comparisons. Repository: rC Clang https://reviews.llvm.org/D45720 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)
craig.topper added inline comments. Comment at: lib/CodeGen/CGBuiltin.cpp:8420 + if (IsUnsigned) { +MinVal = (IsDW) ? llvm::APInt::getMinValue(16).getZExtValue() +: llvm::APInt::getMinValue(8).getZExtValue(); Why can't these just be APInts instead of uint64_t? Is this so that APInt widths don't have to match RTy below? I'd rather you just created the narrow APInt and then called sext/zext on it to get it to the right width. Comment at: lib/CodeGen/CGBuiltin.cpp:8420 + if (IsUnsigned) { +MinVal = (IsDW) ? llvm::APInt::getMinValue(16).getZExtValue() +: llvm::APInt::getMinValue(8).getZExtValue(); craig.topper wrote: > Why can't these just be APInts instead of uint64_t? Is this so that APInt > widths don't have to match RTy below? I'd rather you just created the narrow > APInt and then called sext/zext on it to get it to the right width. Pre-select the 8 or 16 based on IsDW. Then you don't need to check IsDW 4 times. You just need to pass the right width. Comment at: lib/CodeGen/CGBuiltin.cpp:8432 + SmallVectorShuffleMask; + ShuffleMask.clear(); + for (int i = 0, i1 = 0, i2 = 0, d = (IsDW) ? 4 : 8; i < NumElts; ++i) Clearing isn't necessary if you just created it. Comment at: lib/CodeGen/CGBuiltin.cpp:8433 + ShuffleMask.clear(); + for (int i = 0, i1 = 0, i2 = 0, d = (IsDW) ? 4 : 8; i < NumElts; ++i) +if ((i / d) & 1) This loop could probably use some comments. The multiple variables make the logic hard to follow Comment at: lib/CodeGen/CGBuiltin.cpp:8443 + Value *MaxVec = llvm::ConstantInt::get(RTy, MaxVal); + Res = EmitX86MinMax(CGF, ICmpInst::ICMP_SLT, {Res, MaxVec}); + Res = EmitX86MinMax(CGF, ICmpInst::ICMP_SGT, {Res, MinVec}); Why arent' these unsigned compares for Unsigned? Comment at: lib/CodeGen/CGBuiltin.cpp:8446 + llvm::Type *VTy = llvm::VectorType::get( + (IsDW) ? CGF.Builder.getInt16Ty() : CGF.Builder.getInt8Ty(), NumElts); + return CGF.Builder.CreateTrunc(Res, VTy); If you have the 8 or 16 selected above, you can use getIntNTy here I think. Repository: rC Clang https://reviews.llvm.org/D45720 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)
mike.dvoretsky created this revision. mike.dvoretsky added reviewers: craig.topper, spatel. Herald added a subscriber: cfe-commits. This patch lowers the X86 vector packing with saturation intrinsics to native LLVM IR. Comes with an LLVM patch. Repository: rC Clang https://reviews.llvm.org/D45720 Files: lib/CodeGen/CGBuiltin.cpp test/CodeGen/avx2-builtins.c test/CodeGen/avx512bw-builtins.c test/CodeGen/avx512vlbw-builtins.c test/CodeGen/sse2-builtins.c test/CodeGen/sse41-builtins.c Index: test/CodeGen/sse41-builtins.c === --- test/CodeGen/sse41-builtins.c +++ test/CodeGen/sse41-builtins.c @@ -328,7 +328,12 @@ __m128i test_mm_packus_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_packus_epi32 - // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, zeroinitializer + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> zeroinitializer + // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16> return _mm_packus_epi32(x, y); } Index: test/CodeGen/sse2-builtins.c === --- test/CodeGen/sse2-builtins.c +++ test/CodeGen/sse2-builtins.c @@ -869,19 +869,34 @@ __m128i test_mm_packs_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_packs_epi16 - // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> + // CHECK: %{{.*}} = icmp slt <16 x i16> %{{.*}}, + // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> + // CHECK: %{{.*}} = icmp sgt <16 x i16> %{{.*}}, + // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> + // CHECK: %{{.*}} = trunc <16 x i16> %{{.*}} to <16 x i8> return _mm_packs_epi16(A, B); } __m128i test_mm_packs_epi32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_packs_epi32 - // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16> return _mm_packs_epi32(A, B); } __m128i test_mm_packus_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_packus_epi16 - // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> + // CHECK: %{{.*}} = icmp slt <16 x i16> %{{.*}}, + // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> + // CHECK: %{{.*}} = icmp sgt <16 x i16> %{{.*}}, zeroinitializer + // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> zeroinitializer + // CHECK: %{{.*}} = trunc <16 x i16> %{{.*}} to <16 x i8> return _mm_packus_epi16(A, B); } Index: test/CodeGen/avx512vlbw-builtins.c === --- test/CodeGen/avx512vlbw-builtins.c +++ test/CodeGen/avx512vlbw-builtins.c @@ -970,105 +970,185 @@ __m128i test_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_packs_epi32 - // CHECK: @llvm.x86.sse2.packssdw + // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_packs_epi32(__M,__A,__B); } __m128i test_mm_mask_packs_epi32(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_packs_epi32 - // CHECK: @llvm.x86.sse2.packssdw + // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, + // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16> //