[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)

2018-06-05 Thread Mikhail Dvoretckii via Phabricator via cfe-commits
mike.dvoretsky abandoned this revision.
mike.dvoretsky added a comment.

Closing this due to failure of https://reviews.llvm.org/D45721.


https://reviews.llvm.org/D45720



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)

2018-04-26 Thread Mikhail Dvoretckii via Phabricator via cfe-commits
mike.dvoretsky updated this revision to Diff 144126.
mike.dvoretsky added a comment.

Changed the shuffle mask emission code to match https://reviews.llvm.org/D45721.


https://reviews.llvm.org/D45720

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/avx2-builtins.c
  clang/test/CodeGen/avx512bw-builtins.c
  clang/test/CodeGen/avx512vlbw-builtins.c
  clang/test/CodeGen/sse2-builtins.c
  clang/test/CodeGen/sse41-builtins.c

Index: clang/test/CodeGen/sse41-builtins.c
===
--- clang/test/CodeGen/sse41-builtins.c
+++ clang/test/CodeGen/sse41-builtins.c
@@ -328,7 +328,12 @@
 
 __m128i test_mm_packus_epi32(__m128i x, __m128i y) {
   // CHECK-LABEL: test_mm_packus_epi32
-  // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, zeroinitializer
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> zeroinitializer
+  // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16>
   return _mm_packus_epi32(x, y);
 }
 
Index: clang/test/CodeGen/sse2-builtins.c
===
--- clang/test/CodeGen/sse2-builtins.c
+++ clang/test/CodeGen/sse2-builtins.c
@@ -869,19 +869,34 @@
 
 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_packs_epi16
-  // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> 
+  // CHECK: %{{.*}} = icmp slt <16 x i16> %{{.*}}, 
+  // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> 
+  // CHECK: %{{.*}} = icmp sgt <16 x i16> %{{.*}}, 
+  // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> 
+  // CHECK: %{{.*}} = trunc <16 x i16> %{{.*}} to <16 x i8>
   return _mm_packs_epi16(A, B);
 }
 
 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_packs_epi32
-  // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16>
   return _mm_packs_epi32(A, B);
 }
 
 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_packus_epi16
-  // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> 
+  // CHECK: %{{.*}} = icmp slt <16 x i16> %{{.*}}, 
+  // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> 
+  // CHECK: %{{.*}} = icmp sgt <16 x i16> %{{.*}}, zeroinitializer
+  // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> zeroinitializer
+  // CHECK: %{{.*}} = trunc <16 x i16> %{{.*}} to <16 x i8>
   return _mm_packus_epi16(A, B);
 }
 
Index: clang/test/CodeGen/avx512vlbw-builtins.c
===
--- clang/test/CodeGen/avx512vlbw-builtins.c
+++ clang/test/CodeGen/avx512vlbw-builtins.c
@@ -970,105 +970,185 @@
 
 __m128i test_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_packs_epi32
-  // CHECK: @llvm.x86.sse2.packssdw
+  // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_packs_epi32(__M,__A,__B); 
 }
 __m128i test_mm_mask_packs_epi32(__m128i __W, __mmask16 __M, __m128i __A,  __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_packs_epi32
-  // CHECK: @llvm.x86.sse2.packssdw
+  // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16>
   // CHECK: select 

[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)

2018-04-18 Thread Craig Topper via Phabricator via cfe-commits
craig.topper accepted this revision.
craig.topper added a comment.
This revision is now accepted and ready to land.

LGTM


https://reviews.llvm.org/D45720



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)

2018-04-18 Thread Mikhail Dvoretckii via Phabricator via cfe-commits
mike.dvoretsky updated this revision to Diff 142899.
mike.dvoretsky added a comment.

Updated per comments.


https://reviews.llvm.org/D45720

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/avx2-builtins.c
  clang/test/CodeGen/avx512bw-builtins.c
  clang/test/CodeGen/avx512vlbw-builtins.c
  clang/test/CodeGen/sse2-builtins.c
  clang/test/CodeGen/sse41-builtins.c

Index: clang/test/CodeGen/sse41-builtins.c
===
--- clang/test/CodeGen/sse41-builtins.c
+++ clang/test/CodeGen/sse41-builtins.c
@@ -328,7 +328,12 @@
 
 __m128i test_mm_packus_epi32(__m128i x, __m128i y) {
   // CHECK-LABEL: test_mm_packus_epi32
-  // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, zeroinitializer
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> zeroinitializer
+  // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16>
   return _mm_packus_epi32(x, y);
 }
 
Index: clang/test/CodeGen/sse2-builtins.c
===
--- clang/test/CodeGen/sse2-builtins.c
+++ clang/test/CodeGen/sse2-builtins.c
@@ -869,19 +869,34 @@
 
 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_packs_epi16
-  // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> 
+  // CHECK: %{{.*}} = icmp slt <16 x i16> %{{.*}}, 
+  // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> 
+  // CHECK: %{{.*}} = icmp sgt <16 x i16> %{{.*}}, 
+  // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> 
+  // CHECK: %{{.*}} = trunc <16 x i16> %{{.*}} to <16 x i8>
   return _mm_packs_epi16(A, B);
 }
 
 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_packs_epi32
-  // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16>
   return _mm_packs_epi32(A, B);
 }
 
 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_packus_epi16
-  // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> 
+  // CHECK: %{{.*}} = icmp slt <16 x i16> %{{.*}}, 
+  // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> 
+  // CHECK: %{{.*}} = icmp sgt <16 x i16> %{{.*}}, zeroinitializer
+  // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> zeroinitializer
+  // CHECK: %{{.*}} = trunc <16 x i16> %{{.*}} to <16 x i8>
   return _mm_packus_epi16(A, B);
 }
 
Index: clang/test/CodeGen/avx512vlbw-builtins.c
===
--- clang/test/CodeGen/avx512vlbw-builtins.c
+++ clang/test/CodeGen/avx512vlbw-builtins.c
@@ -970,105 +970,185 @@
 
 __m128i test_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_packs_epi32
-  // CHECK: @llvm.x86.sse2.packssdw
+  // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_packs_epi32(__M,__A,__B); 
 }
 __m128i test_mm_mask_packs_epi32(__m128i __W, __mmask16 __M, __m128i __A,  __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_packs_epi32
-  // CHECK: @llvm.x86.sse2.packssdw
+  // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   

[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)

2018-04-18 Thread Mikhail Dvoretckii via Phabricator via cfe-commits
mike.dvoretsky added inline comments.



Comment at: lib/CodeGen/CGBuiltin.cpp:8443
+  Value *MaxVec = llvm::ConstantInt::get(RTy, MaxVal);
+  Res = EmitX86MinMax(CGF, ICmpInst::ICMP_SLT, {Res, MaxVec});
+  Res = EmitX86MinMax(CGF, ICmpInst::ICMP_SGT, {Res, MinVec});

craig.topper wrote:
> Why arent' these unsigned compares for Unsigned?
The compares are signed on purpose. PACKUS assumes that the input elements are 
signed, then uses unsigned saturation. So, for instance, an 0x value must 
be evaluated as -1 and saturated to 0, rather than to 0xff as it would be with 
unsigned comparisons.


Repository:
  rC Clang

https://reviews.llvm.org/D45720



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)

2018-04-17 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added inline comments.



Comment at: lib/CodeGen/CGBuiltin.cpp:8420
+  if (IsUnsigned) {
+MinVal = (IsDW) ? llvm::APInt::getMinValue(16).getZExtValue()
+: llvm::APInt::getMinValue(8).getZExtValue();

Why can't these just be APInts instead of uint64_t? Is this so that APInt 
widths don't have to match RTy below? I'd rather you just created the narrow 
APInt and then called sext/zext on it to get it to the right width.



Comment at: lib/CodeGen/CGBuiltin.cpp:8420
+  if (IsUnsigned) {
+MinVal = (IsDW) ? llvm::APInt::getMinValue(16).getZExtValue()
+: llvm::APInt::getMinValue(8).getZExtValue();

craig.topper wrote:
> Why can't these just be APInts instead of uint64_t? Is this so that APInt 
> widths don't have to match RTy below? I'd rather you just created the narrow 
> APInt and then called sext/zext on it to get it to the right width.
Pre-select the 8 or 16 based on IsDW. Then you don't need to check IsDW 4 
times. You just need to pass the right width.



Comment at: lib/CodeGen/CGBuiltin.cpp:8432
+  SmallVector ShuffleMask;
+  ShuffleMask.clear();
+  for (int i = 0, i1 = 0, i2 = 0, d = (IsDW) ? 4 : 8; i < NumElts; ++i)

Clearing isn't necessary if you just created it.



Comment at: lib/CodeGen/CGBuiltin.cpp:8433
+  ShuffleMask.clear();
+  for (int i = 0, i1 = 0, i2 = 0, d = (IsDW) ? 4 : 8; i < NumElts; ++i)
+if ((i / d) & 1)

This loop could probably use some comments. The multiple variables make the 
logic hard to follow



Comment at: lib/CodeGen/CGBuiltin.cpp:8443
+  Value *MaxVec = llvm::ConstantInt::get(RTy, MaxVal);
+  Res = EmitX86MinMax(CGF, ICmpInst::ICMP_SLT, {Res, MaxVec});
+  Res = EmitX86MinMax(CGF, ICmpInst::ICMP_SGT, {Res, MinVec});

Why arent' these unsigned compares for Unsigned?



Comment at: lib/CodeGen/CGBuiltin.cpp:8446
+  llvm::Type *VTy = llvm::VectorType::get(
+  (IsDW) ? CGF.Builder.getInt16Ty() : CGF.Builder.getInt8Ty(), NumElts);
+  return CGF.Builder.CreateTrunc(Res, VTy);

If you have the 8 or 16 selected above, you can use getIntNTy here I think.


Repository:
  rC Clang

https://reviews.llvm.org/D45720



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45720: [X86] Lowering PACK*S (pack with saturation) intrinsics to native IR (clang side)

2018-04-17 Thread Mikhail Dvoretckii via Phabricator via cfe-commits
mike.dvoretsky created this revision.
mike.dvoretsky added reviewers: craig.topper, spatel.
Herald added a subscriber: cfe-commits.

This patch lowers the X86 vector packing with saturation intrinsics to native 
LLVM IR. Comes with an LLVM patch.


Repository:
  rC Clang

https://reviews.llvm.org/D45720

Files:
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGen/avx2-builtins.c
  test/CodeGen/avx512bw-builtins.c
  test/CodeGen/avx512vlbw-builtins.c
  test/CodeGen/sse2-builtins.c
  test/CodeGen/sse41-builtins.c

Index: test/CodeGen/sse41-builtins.c
===
--- test/CodeGen/sse41-builtins.c
+++ test/CodeGen/sse41-builtins.c
@@ -328,7 +328,12 @@
 
 __m128i test_mm_packus_epi32(__m128i x, __m128i y) {
   // CHECK-LABEL: test_mm_packus_epi32
-  // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, zeroinitializer
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> zeroinitializer
+  // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16>
   return _mm_packus_epi32(x, y);
 }
 
Index: test/CodeGen/sse2-builtins.c
===
--- test/CodeGen/sse2-builtins.c
+++ test/CodeGen/sse2-builtins.c
@@ -869,19 +869,34 @@
 
 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_packs_epi16
-  // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> 
+  // CHECK: %{{.*}} = icmp slt <16 x i16> %{{.*}}, 
+  // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> 
+  // CHECK: %{{.*}} = icmp sgt <16 x i16> %{{.*}}, 
+  // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> 
+  // CHECK: %{{.*}} = trunc <16 x i16> %{{.*}} to <16 x i8>
   return _mm_packs_epi16(A, B);
 }
 
 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_packs_epi32
-  // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16>
   return _mm_packs_epi32(A, B);
 }
 
 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_packus_epi16
-  // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> 
+  // CHECK: %{{.*}} = icmp slt <16 x i16> %{{.*}}, 
+  // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> 
+  // CHECK: %{{.*}} = icmp sgt <16 x i16> %{{.*}}, zeroinitializer
+  // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> zeroinitializer
+  // CHECK: %{{.*}} = trunc <16 x i16> %{{.*}} to <16 x i8>
   return _mm_packus_epi16(A, B);
 }
 
Index: test/CodeGen/avx512vlbw-builtins.c
===
--- test/CodeGen/avx512vlbw-builtins.c
+++ test/CodeGen/avx512vlbw-builtins.c
@@ -970,105 +970,185 @@
 
 __m128i test_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_packs_epi32
-  // CHECK: @llvm.x86.sse2.packssdw
+  // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_packs_epi32(__M,__A,__B); 
 }
 __m128i test_mm_mask_packs_epi32(__m128i __W, __mmask16 __M, __m128i __A,  __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_packs_epi32
-  // CHECK: @llvm.x86.sse2.packssdw
+  // CHECK: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp slt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = icmp sgt <8 x i32> %{{.*}}, 
+  // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> 
+  // CHECK: %{{.*}} = trunc <8 x i32> %{{.*}} to <8 x i16>
   //