https://github.com/Arghnews updated 
https://github.com/llvm/llvm-project/pull/155542

>From 759f06ff21d819986603ca50b2ba213ce68d3368 Mon Sep 17 00:00:00 2001
From: Justin Riddell <arghn...@hotmail.co.uk>
Date: Wed, 27 Aug 2025 04:04:37 +0100
Subject: [PATCH] [Headers][X86] VisitCallExpr constexpr immediate shifts
 (#154293)

Implement VectorExprEvaluator::VisitCallExpr constexpr support for left,
right, arithmetic shift for MMX/SSE/AVX2/AVX512 intrinsics

_mm*_slli_epi*
_mm*_srli_epi*
_mm*_srai_epi*
_mm*_mask_slli_epi*
_mm*_maskz_slli_epi*

NOTE: not all intrinsics have all widths i.e.
_mm_srli_pi32 doesn't have pi64 etc.
---
 clang/include/clang/Basic/BuiltinsX86.td      |  96 ++--
 clang/lib/AST/ExprConstant.cpp                | 256 +++++++---
 clang/lib/Headers/avx2intrin.h                |  40 +-
 clang/lib/Headers/avx512bwintrin.h            |  45 +-
 clang/lib/Headers/avx512fintrin.h             |  87 ++--
 clang/lib/Headers/avx512vlbwintrin.h          |  50 +-
 clang/lib/Headers/avx512vlintrin.h            |  95 ++--
 clang/lib/Headers/emmintrin.h                 |  32 +-
 clang/lib/Headers/mmintrin.h                  |  64 +--
 .../CodeGen/X86/shift-immediate-constexpr.c   | 441 ++++++++++++++++++
 10 files changed, 877 insertions(+), 329 deletions(-)
 create mode 100644 clang/test/CodeGen/X86/shift-immediate-constexpr.c

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 527acd9ef086e..7fdfd363b299f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -275,15 +275,8 @@ let Features = "sse2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] i
   def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long 
int>, _Vector<2, long long int>)">;
   def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, 
short>)">;
   def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, 
int>)">;
-  def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long 
int>, _Vector<2, long long int>)">;
-  def psllwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
-  def pslldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
-  def psllqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long 
int>, int)">;
-  def psrlwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
-  def psrldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
-  def psrlqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long 
int>, int)">;
-  def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
-  def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
+  def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long "
+                            "int>, _Vector<2, long long int>)">;
   def pmaddwd128 : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, 
short>)">;
   def pslldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, 
long long int>, _Constant int)">;
   def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, 
long long int>, _Constant int)">;
@@ -291,6 +284,19 @@ let Features = "sse2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] i
 
 let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, 
_Vector<4, int>)">;
+
+  def psllwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
+  def pslldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
+  def psllqi128
+      : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, 
int)">;
+
+  def psrlwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
+  def psrldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
+  def psrlqi128
+      : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, 
int)">;
+
+  def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
+  def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
 }
 
 let Features = "sse3", Attributes = [NoThrow] in {
@@ -594,24 +600,23 @@ let Features = "avx2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] i
   def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int)">;
   def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, 
char>)">;
   def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Vector<16, short>)">;
-  def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, 
int>)">;
-  def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
+  def psignd256
+      : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
   def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, 
short>)">;
-  def pslldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, 
long long int>, _Constant int)">;
-  def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
+  def pslldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, "
+                                        "long long int>, _Constant int)">;
   def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, 
int>)">;
-  def psllqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, int)">;
-  def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Vector<2, long long int>)">;
-  def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
-  def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, 
short>)">;
-  def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
+  def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long "
+                            "int>, _Vector<2, long long int>)">;
+  def psraw256
+      : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, 
short>)">;
   def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, 
int>)">;
-  def psrldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, 
long long int>, _Constant int)">;
-  def psrlwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
-  def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, 
short>)">;
-  def psrldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
-  def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, 
int>)">;
-  def psrlqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, int)">;
+  def psrldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, "
+                                        "long long int>, _Constant int)">;
+  def psrlw256
+      : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, 
short>)">;
+  def psrld256
+      : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
   def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Vector<2, long long int>)">;
   def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, 
int>, _Constant int)">;
   def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, 
int>, _Constant int)">;
@@ -628,6 +633,19 @@ let Features = "avx2", Attributes = [NoThrow, Const, 
Constexpr, RequiredVectorWi
   def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, 
_Vector<8, int>)">;
   def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, 
_Vector<8, int>)">;
 
+  def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
+  def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
+  def psllqi256
+      : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, 
int)">;
+
+  def psrlwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
+  def psrldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
+  def psrlqi256
+      : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, 
int)">;
+
+  def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
+  def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
+
   def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, 
unsigned short>, _Vector<16, unsigned short>)">;
   def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Vector<16, short>)">;
 
@@ -2097,8 +2115,8 @@ let Features = "avx512bw,evex512", Attributes = [NoThrow, 
Const, RequiredVectorW
   def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int)">;
   def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int)">;
   def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Vector<32, short>)">;
-  def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, 
short>)">;
-  def psllwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
+  def psllw512
+      : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, 
short>)">;
 }
 
 let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {
@@ -2109,7 +2127,9 @@ let Features = "avx512bw,avx512vl", Attributes = 
[NoThrow, Const, RequiredVector
   def psllv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, 
short>)">;
 }
 
-let Features = "avx512f,evex512", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
+let Features = "avx512f,evex512",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+  def psllwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
   def pslldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
   def psllqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long 
int>, int)">;
 }
@@ -2126,7 +2146,9 @@ let Features = "avx512bw,avx512vl", Attributes = 
[NoThrow, Const, RequiredVector
   def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, 
short>)">;
 }
 
-let Features = "avx512f,evex512", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
+let Features = "avx512f,evex512",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+  def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
   def psrldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
   def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long 
int>, int)">;
 }
@@ -2152,10 +2174,10 @@ let Features = "avx512vl", Attributes = [NoThrow, 
Const, RequiredVectorWidth<256
 }
 
 let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
-  def psraw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, 
short>)">;
-  def psrawi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
-  def psrlw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, 
short>)">;
-  def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
+  def psraw512
+      : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, 
short>)">;
+  def psrlw512
+      : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, 
short>)">;
   def pslldqi512_byteshift : X86Builtin<"_Vector<8, long long int>(_Vector<8, 
long long int>, _Constant int)">;
   def psrldqi512_byteshift : X86Builtin<"_Vector<8, long long int>(_Vector<8, 
long long int>, _Constant int)">;
 }
@@ -2487,7 +2509,9 @@ let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>
   def scalefss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, 
_Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
 }
 
-let Features = "avx512f,evex512", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
+let Features = "avx512f,evex512",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+  def psrawi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
   def psradi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
   def psraqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long 
int>, int)">;
 }
@@ -2500,11 +2524,13 @@ let Features = "avx512vl", Attributes = [NoThrow, 
Const, RequiredVectorWidth<256
   def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Vector<2, long long int>)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
+let Features = "avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def psraqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long 
int>, int)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {
+let Features = "avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def psraqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, int)">;
 }
 
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 19703e40d2696..2d4c8a7c11017 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11621,6 +11621,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
   case clang::X86::BI__builtin_ia32_pmulhw128:
   case clang::X86::BI__builtin_ia32_pmulhw256:
   case clang::X86::BI__builtin_ia32_pmulhw512:
+
   case clang::X86::BI__builtin_ia32_psllv2di:
   case clang::X86::BI__builtin_ia32_psllv4di:
   case clang::X86::BI__builtin_ia32_psllv4si:
@@ -11630,7 +11631,41 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
   case clang::X86::BI__builtin_ia32_psrlv2di:
   case clang::X86::BI__builtin_ia32_psrlv4di:
   case clang::X86::BI__builtin_ia32_psrlv4si:
-  case clang::X86::BI__builtin_ia32_psrlv8si:{
+  case clang::X86::BI__builtin_ia32_psrlv8si:
+
+  // Logical left shift by immediate
+  case clang::X86::BI__builtin_ia32_psllwi128:
+  case clang::X86::BI__builtin_ia32_pslldi128:
+  case clang::X86::BI__builtin_ia32_psllqi128:
+  case clang::X86::BI__builtin_ia32_psllwi256:
+  case clang::X86::BI__builtin_ia32_pslldi256:
+  case clang::X86::BI__builtin_ia32_psllqi256:
+  case clang::X86::BI__builtin_ia32_psllwi512:
+  case clang::X86::BI__builtin_ia32_pslldi512:
+  case clang::X86::BI__builtin_ia32_psllqi512:
+
+  // Logical right shift by immediate
+  case clang::X86::BI__builtin_ia32_psrlwi128:
+  case clang::X86::BI__builtin_ia32_psrldi128:
+  case clang::X86::BI__builtin_ia32_psrlqi128:
+  case clang::X86::BI__builtin_ia32_psrlwi256:
+  case clang::X86::BI__builtin_ia32_psrldi256:
+  case clang::X86::BI__builtin_ia32_psrlqi256:
+  case clang::X86::BI__builtin_ia32_psrlwi512:
+  case clang::X86::BI__builtin_ia32_psrldi512:
+  case clang::X86::BI__builtin_ia32_psrlqi512:
+
+  // Arithmetic right shift by immediate
+  case clang::X86::BI__builtin_ia32_psrawi128:
+  case clang::X86::BI__builtin_ia32_psradi128:
+  case clang::X86::BI__builtin_ia32_psraqi128:
+  case clang::X86::BI__builtin_ia32_psrawi256:
+  case clang::X86::BI__builtin_ia32_psradi256:
+  case clang::X86::BI__builtin_ia32_psraqi256:
+  case clang::X86::BI__builtin_ia32_psrawi512:
+  case clang::X86::BI__builtin_ia32_psradi512:
+  case clang::X86::BI__builtin_ia32_psraqi512: {
+
     APValue SourceLHS, SourceRHS;
     if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
         !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
@@ -11644,64 +11679,181 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
 
     for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
       APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
-      APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt();
-      switch (E->getBuiltinCallee()) {
-      case Builtin::BI__builtin_elementwise_add_sat:
-        ResultElements.push_back(APValue(
-            APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS),
-                   DestUnsigned)));
-        break;
-      case Builtin::BI__builtin_elementwise_sub_sat:
-        ResultElements.push_back(APValue(
-            APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS),
-                   DestUnsigned)));
-        break;
-      case clang::X86::BI__builtin_ia32_pmulhuw128:
-      case clang::X86::BI__builtin_ia32_pmulhuw256:
-      case clang::X86::BI__builtin_ia32_pmulhuw512:
-        ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhu(LHS, 
RHS),
-                                                /*isUnsigned=*/true)));
-        break;
-      case clang::X86::BI__builtin_ia32_pmulhw128:
-      case clang::X86::BI__builtin_ia32_pmulhw256:
-      case clang::X86::BI__builtin_ia32_pmulhw512:
-        ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, 
RHS),
-                                                /*isUnsigned=*/false)));
-        break;
-      case clang::X86::BI__builtin_ia32_psllv2di:
-      case clang::X86::BI__builtin_ia32_psllv4di:
-      case clang::X86::BI__builtin_ia32_psllv4si:
-      case clang::X86::BI__builtin_ia32_psllv8si:
-        if (RHS.uge(RHS.getBitWidth())) {
-          ResultElements.push_back(
-              APValue(APSInt(APInt::getZero(RHS.getBitWidth()), 
DestUnsigned)));
+
+      if (SourceRHS.isInt()) {
+        uint64_t LaneWidth = 0;
+        bool IsLeftShift = false;
+        bool IsRightShift = false;
+        bool IsArithmeticRightShift = false;
+
+        switch (E->getBuiltinCallee()) {
+        case clang::X86::BI__builtin_ia32_psllwi128:
+        case clang::X86::BI__builtin_ia32_psllwi256:
+        case clang::X86::BI__builtin_ia32_psllwi512:
+          IsLeftShift = true;
+          LaneWidth = 16;
+          break;
+        case clang::X86::BI__builtin_ia32_pslldi128:
+        case clang::X86::BI__builtin_ia32_pslldi256:
+        case clang::X86::BI__builtin_ia32_pslldi512:
+          IsLeftShift = true;
+          LaneWidth = 32;
+          break;
+        case clang::X86::BI__builtin_ia32_psllqi128:
+        case clang::X86::BI__builtin_ia32_psllqi256:
+        case clang::X86::BI__builtin_ia32_psllqi512:
+          IsLeftShift = true;
+          LaneWidth = 64;
           break;
+
+        case clang::X86::BI__builtin_ia32_psrlwi128:
+        case clang::X86::BI__builtin_ia32_psrlwi256:
+        case clang::X86::BI__builtin_ia32_psrlwi512:
+          IsRightShift = true;
+          LaneWidth = 16;
+          break;
+        case clang::X86::BI__builtin_ia32_psrldi128:
+        case clang::X86::BI__builtin_ia32_psrldi256:
+        case clang::X86::BI__builtin_ia32_psrldi512:
+          IsRightShift = true;
+          LaneWidth = 32;
+          break;
+        case clang::X86::BI__builtin_ia32_psrlqi128:
+        case clang::X86::BI__builtin_ia32_psrlqi256:
+        case clang::X86::BI__builtin_ia32_psrlqi512:
+          IsRightShift = true;
+          LaneWidth = 64;
+          break;
+
+        case clang::X86::BI__builtin_ia32_psrawi128:
+        case clang::X86::BI__builtin_ia32_psrawi256:
+        case clang::X86::BI__builtin_ia32_psrawi512:
+          IsArithmeticRightShift = true;
+          LaneWidth = 16;
+          break;
+        case clang::X86::BI__builtin_ia32_psradi128:
+        case clang::X86::BI__builtin_ia32_psradi256:
+        case clang::X86::BI__builtin_ia32_psradi512:
+          IsArithmeticRightShift = true;
+          LaneWidth = 32;
+          break;
+        case clang::X86::BI__builtin_ia32_psraqi128:
+        case clang::X86::BI__builtin_ia32_psraqi256:
+        case clang::X86::BI__builtin_ia32_psraqi512:
+          IsArithmeticRightShift = true;
+          LaneWidth = 64;
+          break;
+
+        default:
+          llvm_unreachable("Unexpected builtin callee");
         }
-        ResultElements.push_back(
-            APValue(APSInt(LHS.shl(RHS.getZExtValue()), DestUnsigned)));
-        break;
-      case clang::X86::BI__builtin_ia32_psrav4si:
-      case clang::X86::BI__builtin_ia32_psrav8si:
-        if (RHS.uge(RHS.getBitWidth())) {
+
+        const APSInt RHS = SourceRHS.getInt();
+        const auto ShiftAmount = RHS.getZExtValue();
+        APInt ResultOut;
+        if (IsArithmeticRightShift) {
+          ResultOut = LHS.ashr(std::min(ShiftAmount, LaneWidth));
+        } else if (ShiftAmount >= LaneWidth) {
+          ResultOut = APInt(LaneWidth, 0);
+        } else if (IsLeftShift) {
+          ResultOut = LHS.shl(ShiftAmount);
+        } else if (IsRightShift) {
+          ResultOut = LHS.lshr(ShiftAmount);
+        } else {
+          llvm_unreachable("Invalid shift type");
+        }
+        ResultElements.push_back(APValue(APSInt(
+            std::move(ResultOut),
+            /*isUnsigned=*/DestEltTy->isUnsignedIntegerOrEnumerationType())));
+      } else {
+        APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt();
+        switch (E->getBuiltinCallee()) {
+        case Builtin::BI__builtin_elementwise_add_sat:
+          ResultElements.push_back(APValue(
+              APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS),
+                     DestUnsigned)));
+          break;
+        case Builtin::BI__builtin_elementwise_sub_sat:
+          ResultElements.push_back(APValue(
+              APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS),
+                     DestUnsigned)));
+          break;
+        case clang::X86::BI__builtin_ia32_pmulhuw128:
+        case clang::X86::BI__builtin_ia32_pmulhuw256:
+        case clang::X86::BI__builtin_ia32_pmulhuw512:
           ResultElements.push_back(
-              APValue(APSInt(LHS.ashr(RHS.getBitWidth() - 1), DestUnsigned)));
+              APValue(APSInt(llvm::APIntOps::mulhu(LHS, RHS),
+                             /*isUnsigned=*/true)));
           break;
-        }
-        ResultElements.push_back(
-            APValue(APSInt(LHS.ashr(RHS.getZExtValue()), DestUnsigned)));
-        break;
-      case clang::X86::BI__builtin_ia32_psrlv2di:
-      case clang::X86::BI__builtin_ia32_psrlv4di:
-      case clang::X86::BI__builtin_ia32_psrlv4si:
-      case clang::X86::BI__builtin_ia32_psrlv8si:
-        if (RHS.uge(RHS.getBitWidth())) {
+        case clang::X86::BI__builtin_ia32_pmulhw128:
+        case clang::X86::BI__builtin_ia32_pmulhw256:
+        case clang::X86::BI__builtin_ia32_pmulhw512:
+          ResultElements.push_back(
+              APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS),
+                             /*isUnsigned=*/false)));
+          break;
+        case clang::X86::BI__builtin_ia32_psllv2di:
+        case clang::X86::BI__builtin_ia32_psllv4di:
+        case clang::X86::BI__builtin_ia32_psllv4si:
+        case clang::X86::BI__builtin_ia32_psllv8si:
+          if (RHS.uge(RHS.getBitWidth())) {
+            ResultElements.push_back(APValue(
+                APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned)));
+            break;
+          }
+          ResultElements.push_back(
+              APValue(APSInt(LHS.shl(RHS.getZExtValue()), DestUnsigned)));
+          break;
+        case clang::X86::BI__builtin_ia32_psrav4si:
+        case clang::X86::BI__builtin_ia32_psrav8si:
+          if (RHS.uge(RHS.getBitWidth())) {
+            ResultElements.push_back(
+                APValue(APSInt(LHS.ashr(RHS.getBitWidth() - 1), 
DestUnsigned)));
+            break;
+          }
           ResultElements.push_back(
-              APValue(APSInt(APInt::getZero(RHS.getBitWidth()), 
DestUnsigned)));
+              APValue(APSInt(LHS.ashr(RHS.getZExtValue()), DestUnsigned)));
           break;
+        case clang::X86::BI__builtin_ia32_psrlv2di:
+        case clang::X86::BI__builtin_ia32_psrlv4di:
+        case clang::X86::BI__builtin_ia32_psrlv4si:
+        case clang::X86::BI__builtin_ia32_psrlv8si:
+          if (RHS.uge(RHS.getBitWidth())) {
+            ResultElements.push_back(APValue(
+                APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned)));
+            break;
+          }
+          ResultElements.push_back(
+              APValue(APSInt(LHS.lshr(RHS.getZExtValue()), DestUnsigned)));
+          break;
+          APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt();
+          switch (E->getBuiltinCallee()) {
+          case Builtin::BI__builtin_elementwise_add_sat:
+            ResultElements.push_back(APValue(
+                APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS),
+                       DestEltTy->isUnsignedIntegerOrEnumerationType())));
+            break;
+          case Builtin::BI__builtin_elementwise_sub_sat:
+            ResultElements.push_back(APValue(
+                APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS),
+                       DestEltTy->isUnsignedIntegerOrEnumerationType())));
+            break;
+          case clang::X86::BI__builtin_ia32_pmulhuw128:
+          case clang::X86::BI__builtin_ia32_pmulhuw256:
+          case clang::X86::BI__builtin_ia32_pmulhuw512:
+            ResultElements.push_back(
+                APValue(APSInt(llvm::APIntOps::mulhu(LHS, RHS),
+                               /*isUnsigned=*/true)));
+            break;
+          case clang::X86::BI__builtin_ia32_pmulhw128:
+          case clang::X86::BI__builtin_ia32_pmulhw256:
+          case clang::X86::BI__builtin_ia32_pmulhw512:
+            ResultElements.push_back(
+                APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS),
+                               /*isUnsigned=*/false)));
+            break;
+          }
         }
-        ResultElements.push_back(
-            APValue(APSInt(LHS.lshr(RHS.getZExtValue()), DestUnsigned)));
-        break;
       }
     }
 
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index ce5b2b7544d8c..baeb2e7cbff24 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -2124,9 +2124,8 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_slli_epi16(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_slli_epi16(__m256i __a, int __count) {
   return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);
 }
 
@@ -2164,9 +2163,8 @@ _mm256_sll_epi16(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_slli_epi32(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_slli_epi32(__m256i __a, int __count) {
   return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);
 }
 
@@ -2204,9 +2202,8 @@ _mm256_sll_epi32(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_slli_epi64(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_slli_epi64(__m256i __a, int __count) {
   return __builtin_ia32_psllqi256((__v4di)__a, __count);
 }
 
@@ -2245,9 +2242,8 @@ _mm256_sll_epi64(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srai_epi16(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srai_epi16(__m256i __a, int __count) {
   return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);
 }
 
@@ -2287,9 +2283,8 @@ _mm256_sra_epi16(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srai_epi32(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srai_epi32(__m256i __a, int __count) {
   return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);
 }
 
@@ -2368,9 +2363,8 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srli_epi16(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srli_epi16(__m256i __a, int __count) {
   return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);
 }
 
@@ -2408,9 +2402,8 @@ _mm256_srl_epi16(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srli_epi32(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srli_epi32(__m256i __a, int __count) {
   return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);
 }
 
@@ -2448,9 +2441,8 @@ _mm256_srl_epi32(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srli_epi64(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srli_epi64(__m256i __a, int __count) {
   return __builtin_ia32_psrlqi256((__v4di)__a, __count);
 }
 
diff --git a/clang/lib/Headers/avx512bwintrin.h 
b/clang/lib/Headers/avx512bwintrin.h
index 9263f7af3ee2f..723865ffa755e 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1483,24 +1483,21 @@ _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, 
__m128i __B)
                                           (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_slli_epi16(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_slli_epi16(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_slli_epi16(__A, __B),
                                          (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_slli_epi16(__A, __B),
                                          (__v32hi)_mm512_setzero_si512());
@@ -1575,24 +1572,21 @@ _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, 
__m128i __B)
                                           (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srai_epi16(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srai_epi16(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_srai_epi16(__A, __B),
                                          (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_srai_epi16(__A, __B),
                                          (__v32hi)_mm512_setzero_si512());
@@ -1620,24 +1614,21 @@ _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, 
__m128i __B)
                                           (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srli_epi16(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srli_epi16(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_srli_epi16(__A, __B),
                                          (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_srli_epi16(__A, 
(unsigned int)__B),
                                          (__v32hi)_mm512_setzero_si512());
diff --git a/clang/lib/Headers/avx512fintrin.h 
b/clang/lib/Headers/avx512fintrin.h
index 81c8e8e934493..611c8314b0ed9 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -5095,91 +5095,81 @@ _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, 
__m512i __B)
                                        (__v8di)_mm512_ror_epi64((A), (B)), \
                                        (__v8di)_mm512_setzero_si512()))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_slli_epi32(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_slli_epi32(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_slli_epi32(__A, __B),
                                          (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_slli_epi32(__A, __B),
                                          (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_slli_epi64(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_slli_epi64(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int 
__B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                           (__v8di)_mm512_slli_epi64(__A, __B),
                                           (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                           (__v8di)_mm512_slli_epi64(__A, __B),
                                           (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srli_epi32(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srli_epi32(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_srli_epi32(__A, __B),
                                          (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_srli_epi32(__A, __B),
                                          (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srli_epi64(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srli_epi64(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                           (__v8di)_mm512_srli_epi64(__A, __B),
                                           (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A,
-                        unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                           (__v8di)_mm512_srli_epi64(__A, __B),
                                           (__v8di)_mm512_setzero_si512());
@@ -6584,46 +6574,41 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 
__B)
                                               (__mmask8)(U), \
                                               (int)(R)))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srai_epi32(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srai_epi32(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_srai_epi32(__A, __B),
                                          (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A,
-                        unsigned int __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_srai_epi32(__A, __B),
                                          (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srai_epi64(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srai_epi64(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int 
__B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A,
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                           (__v8di)_mm512_srai_epi64(__A, __B),
                                           (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                           (__v8di)_mm512_srai_epi64(__A, __B),
                                           (__v8di)_mm512_setzero_si512());
diff --git a/clang/lib/Headers/avx512vlbwintrin.h 
b/clang/lib/Headers/avx512vlbwintrin.h
index dcd72e9240f2c..29880968c0562 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -1963,18 +1963,16 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, 
unsigned int __B)
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_slli_epi16(__A, 
(int)__B),
                                          (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_slli_epi16(__A, 
(int)__B),
                                          (__v16hi)_mm256_setzero_si256());
@@ -2100,34 +2098,30 @@ _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, 
__m128i __B)
                                           (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_srai_epi16(__A, 
(int)__B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_srai_epi16(__A, 
(int)__B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_srai_epi16(__A, 
(int)__B),
                                          (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_srai_epi16(__A, 
(int)__B),
                                          (__v16hi)_mm256_setzero_si256());
@@ -2165,33 +2159,29 @@ _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, 
__m128i __B)
                                           (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_srli_epi16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_srli_epi16(__mmask8 __U, __m128i __A, int __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_srli_epi16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_srli_epi16(__A, __B),
                                          (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_srli_epi16(__A, __B),
                                          (__v16hi)_mm256_setzero_si256());
diff --git a/clang/lib/Headers/avx512vlintrin.h 
b/clang/lib/Headers/avx512vlintrin.h
index a1f2a1c92a863..c41621d4850b7 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -4503,17 +4503,16 @@ _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, 
unsigned int __B)
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int 
__B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_slli_epi32(__A, 
(int)__B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_slli_epi32(__A, 
(int)__B),
                                              (__v8si)_mm256_setzero_si256());
@@ -4567,17 +4566,16 @@ _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, 
unsigned int __B)
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int 
__B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A,
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_slli_epi64(__A, 
(int)__B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_slli_epi64(__A, 
(int)__B),
                                              (__v4di)_mm256_setzero_si256());
@@ -4847,17 +4845,16 @@ _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, 
unsigned int __B)
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int 
__B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_srli_epi32(__A, 
(int)__B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_srli_epi32(__A, 
(int)__B),
                                              (__v8si)_mm256_setzero_si256());
@@ -4911,17 +4908,16 @@ _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, 
unsigned int __B)
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int 
__B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A,
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_srli_epi64(__A, 
(int)__B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_srli_epi64(__A, 
(int)__B),
                                              (__v4di)_mm256_setzero_si256());
@@ -6370,33 +6366,30 @@ _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, 
__m128i __B)
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_srai_epi32(__A, 
(int)__B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_srai_epi32(__A, 
(int)__B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int 
__B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_srai_epi32(__A, 
(int)__B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_srai_epi32(__A, 
(int)__B),
                                              (__v8si)_mm256_setzero_si256());
@@ -6446,46 +6439,40 @@ _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, 
__m128i __B)
                                            (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_srai_epi64(__m128i __A, unsigned int __imm)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_srai_epi64(__m128i __A, unsigned int __imm) {
   return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srai_epi64(
+    __m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
                                            (__v2di)_mm_srai_epi64(__A, __imm), 
\
                                            (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
                                            (__v2di)_mm_srai_epi64(__A, __imm), 
\
                                            (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srai_epi64(__m256i __A, unsigned int __imm)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srai_epi64(__m256i __A, unsigned int __imm) {
   return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A,
-                       unsigned int __imm)
-{
+                       unsigned int __imm) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
                                         (__v4di)_mm256_srai_epi64(__A, __imm), 
\
                                         (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
                                         (__v4di)_mm256_srai_epi64(__A, __imm), 
\
                                         (__v4di)_mm256_setzero_si256());
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 8b6b62458dac1..99ab5a2475105 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2772,8 +2772,8 @@ _mm_xor_si128(__m128i __a, __m128i __b) {
 ///    An integer value specifying the number of bits to left-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the left-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_slli_epi16(__m128i __a, int __count) {
   return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
 }
 
@@ -2808,8 +2808,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS 
_mm_sll_epi16(__m128i __a,
 ///    An integer value specifying the number of bits to left-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the left-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_slli_epi32(__m128i __a, int __count) {
   return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
 }
 
@@ -2844,8 +2844,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS 
_mm_sll_epi32(__m128i __a,
 ///    An integer value specifying the number of bits to left-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the left-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_slli_epi64(__m128i __a, int __count) {
   return __builtin_ia32_psllqi128((__v2di)__a, __count);
 }
 
@@ -2881,8 +2881,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS 
_mm_sll_epi64(__m128i __a,
 ///    An integer value specifying the number of bits to right-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srai_epi16(__m128i __a, int __count) {
   return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
 }
 
@@ -2919,8 +2919,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS 
_mm_sra_epi16(__m128i __a,
 ///    An integer value specifying the number of bits to right-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srai_epi32(__m128i __a, int __count) {
   return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
 }
 
@@ -2981,8 +2981,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS 
_mm_sra_epi32(__m128i __a,
 ///    An integer value specifying the number of bits to right-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srli_epi16(__m128i __a, int __count) {
   return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
 }
 
@@ -3017,8 +3017,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS 
_mm_srl_epi16(__m128i __a,
 ///    An integer value specifying the number of bits to right-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srli_epi32(__m128i __a, int __count) {
   return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
 }
 
@@ -3053,8 +3053,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS 
_mm_srl_epi32(__m128i __a,
 ///    An integer value specifying the number of bits to right-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srli_epi64(__m128i __a, int __count) {
   return __builtin_ia32_psrlqi128((__v2di)__a, __count);
 }
 
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index 6fe9d67b8976d..4ed95c5b7bb71 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -778,11 +778,9 @@ _mm_sll_pi16(__m64 __m, __m64 __count)
 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
 ///    values. If \a __count is greater or equal to 16, the result is set to 
all
 ///    0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_slli_pi16(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psllwi128((__v8hi)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_slli_pi16(__m64 __m, int __count) {
+  return __trunc64(__builtin_ia32_psllwi128((__v8hi)__zext128(__m), __count));
 }
 
 /// Left-shifts each 32-bit signed integer element of the first
@@ -825,11 +823,9 @@ _mm_sll_pi32(__m64 __m, __m64 __count)
 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
 ///    values. If \a __count is greater or equal to 32, the result is set to 
all
 ///    0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_slli_pi32(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_pslldi128((__v4si)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_slli_pi32(__m64 __m, int __count) {
+  return __trunc64(__builtin_ia32_pslldi128((__v4si)__zext128(__m), __count));
 }
 
 /// Left-shifts the first 64-bit integer parameter by the number of bits
@@ -867,11 +863,9 @@ _mm_sll_si64(__m64 __m, __m64 __count)
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector containing the left-shifted value. If
 ///     \a __count is greater or equal to 64, the result is set to 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_slli_si64(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_slli_si64(__m64 __m, int __count) {
+  return __trunc64(__builtin_ia32_psllqi128((__v2di)__zext128(__m), __count));
 }
 
 /// Right-shifts each 16-bit integer element of the first parameter,
@@ -916,11 +910,9 @@ _mm_sra_pi16(__m64 __m, __m64 __count)
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srai_pi16(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psrawi128((__v8hi)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_srai_pi16(__m64 __m, int __count) {
+  return __trunc64(__builtin_ia32_psrawi128((__v8hi)__zext128(__m), __count));
 }
 
 /// Right-shifts each 32-bit integer element of the first parameter,
@@ -965,11 +957,9 @@ _mm_sra_pi32(__m64 __m, __m64 __count)
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srai_pi32(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psradi128((__v4si)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_srai_pi32(__m64 __m, int __count) {
+  return __trunc64(__builtin_ia32_psradi128((__v4si)__zext128(__m), __count));
 }
 
 /// Right-shifts each 16-bit integer element of the first parameter,
@@ -1012,11 +1002,9 @@ _mm_srl_pi16(__m64 __m, __m64 __count)
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srli_pi16(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_srli_pi16(__m64 __m, int __count) {
+  return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__zext128(__m), __count));
 }
 
 /// Right-shifts each 32-bit integer element of the first parameter,
@@ -1059,11 +1047,9 @@ _mm_srl_pi32(__m64 __m, __m64 __count)
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srli_pi32(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psrldi128((__v4si)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_srli_pi32(__m64 __m, int __count) {
+  return __trunc64(__builtin_ia32_psrldi128((__v4si)__zext128(__m), __count));
 }
 
 /// Right-shifts the first 64-bit integer parameter by the number of bits
@@ -1102,11 +1088,9 @@ _mm_srl_si64(__m64 __m, __m64 __count)
 /// \param __count
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector containing the right-shifted value.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srli_si64(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_srli_si64(__m64 __m, int __count) {
+  return __trunc64(__builtin_ia32_psrlqi128((__v2di)__zext128(__m), __count));
 }
 
 /// Performs a bitwise AND of two 64-bit integer vectors.
diff --git a/clang/test/CodeGen/X86/shift-immediate-constexpr.c 
b/clang/test/CodeGen/X86/shift-immediate-constexpr.c
new file mode 100644
index 0000000000000..c0888e09ccec1
--- /dev/null
+++ b/clang/test/CodeGen/X86/shift-immediate-constexpr.c
@@ -0,0 +1,441 @@
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386 -target-feature +sse2 
-emit-llvm -Wall -Werror
+
+#include <mmintrin.h>
+#include "builtin_test_helpers.h"
+
+TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 0), 0, 1, 
2, 3));
+TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 1), 0, 1 
<< 1, 2 << 1, 3 << 1));
+TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 15), 0, 
1U << 15, 2 << 15, 3 << 15));
+TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 16), 0, 
0, 0, 0));
+TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 17), 0, 
0, 0, 0));
+
+TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 1), 0, 1 
>> 1, 2 >> 1, 3 >> 1));
+TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 1), 0, 1 
>> 1, 2 >> 1, 3 >> 1));
+TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){-1, 0, 0, 0}, 1), 
0x7f'ff, 0, 0, 0));
+TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 16), 0, 
0, 0, 0));
+TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 17), 0, 
0, 0, 0));
+
+TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-1, 1, 2, 3}, 1), -1, 
0, 1, 1));
+TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-1, 1, 2, 3}, 15), -1, 
0, 0, 0));
+TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-1, 1, 2, 3}, 16), -1, 
0, 0, 0));
+
+TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-1, 1, 2, 3}, 200), 
-1, 0, 0, 0));
+TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-32768, 32767, -2, 0}, 
1), -16384, 16383, -1, 0));
+TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-32768, 32767, -2, 0}, 
15), -1, 0, -1, 0));
+TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-32768, 32767, -2, 0}, 
30), -1, 0, -1, 0));
+
+TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){0, 1}, 0), 0, 1));
+TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){0, 1}, 1), 0, 1 << 1));
+TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){1, 2}, 2), 1 << 2, 2 
<< 2));
+TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){1, 1}, 31), 1 << 31, 1 
<< 31));
+TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){1, 1}, 32), 0, 0));
+TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){1, 1}, 33), 0, 0));
+
+TEST_CONSTEXPR(match_v2si(_mm_srli_pi32((__m64)(__v2si){1, 1025}, 2), 1 >> 2, 
1025 >> 2));
+
+TEST_CONSTEXPR(match_v2si( _mm_srai_pi32((__m64)(__v2si){-32768, 32767}, 30), 
-1, 0));
+TEST_CONSTEXPR(match_v2si( _mm_srai_pi32((__m64)(__v2si){-2, 20}, 1), -1, 20 
>> 1));
+TEST_CONSTEXPR(match_v2si( _mm_srai_pi32((__m64)(__v2si){-1, 20}, 1), -1, 20 
>> 1));
+
+TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){0}, 0), 0));
+TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){1}, 1), 1LL << 1));
+TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){2}, 2), 2LL << 2));
+TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){1}, 63), 1LL << 63));
+TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){1}, 64), 0));
+TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){1}, 65), 0));
+
+TEST_CONSTEXPR(match_v1di(_mm_srli_si64((__m64)(__v1di){1025}, 2), 1025LL >> 
2));
+
+TEST_CONSTEXPR(match_v8hi(_mm_slli_epi16((__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 
6, 7}, 0), 0, 1, 2, 3, 4, 5, 6, 7));
+TEST_CONSTEXPR(match_v8hi(_mm_slli_epi16((__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 
6, 7}, 1), 0, 1 << 1, 2 << 1, 3 << 1, 4 << 1, 5 << 1, 6 << 1, 7 << 1));
+TEST_CONSTEXPR(match_v8hi(_mm_slli_epi16((__m128i)(__v8hi){0, 8, 2, 3, 4, 5, 
6, 7}, 8), 0, 8 << 8, 2 << 8, 3 << 8, 4 << 8, 5 << 8, 6 << 8, 7 << 8));
+TEST_CONSTEXPR(match_v8hi(_mm_slli_epi16((__m128i)(__v8hi){0, 8, 2, 3, 4, 5, 
6, 7}, 16), 0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8hi(_mm_srli_epi16((__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 
6, 7}, 1), 0, 1 >> 1, 2 >> 1, 3 >> 1, 4 >> 1, 5 >> 1, 6 >> 1, 7 >> 1));
+
+TEST_CONSTEXPR(match_v8hi(_mm_srai_epi16((__m128i)(__v8hi){-32768, 32767, -3, 
-2, -1, 0, 1, 2}, 1), -16384, 16383, -2, -1, -1, 0, 0, 1));
+
+TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 0), 0, 
1, 2, 3));
+TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 1), 0, 
1 << 1, 2 << 1, 3 << 1));
+TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 31), 
0, 1U << 31, 2U << 31, 3U << 31));
+TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 32), 
0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 33), 
0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v4si(_mm_srli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 8), 0, 
1U >> 8, 2U >> 8, 3U >> 8));
+
+TEST_CONSTEXPR(match_v4si(_mm_srai_epi32((__m128i)(__v4si){-32768, 32767, -3, 
2}, 1), -16384, 16383, -2, 1));
+
+TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 0), 0, 1));
+TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 1), 0, 1LL 
<< 1));
+TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){5, 8}, 6), 5 << 6, 
8 << 6));
+TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 63), 0, 1LL 
<< 63));
+TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 64), 0, 0));
+TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 65), 0, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_srli_epi64((__m128i)(__v2di){100005, 100008}, 
6), 100005 >> 6, 100008 >> 6));
+TEST_CONSTEXPR(match_v2di(_mm_srai_epi64((__m128i)(__v2di){-32768, -3}, 1), 
-16384, -2));
+
+TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 0),
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
+TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1),
+  0, 1U<<1, 2U<<1, 3U<<1, 4U<<1, 5U<<1, 6U<<1, 7U<<1, 8U<<1, 9U<<1, 10U<<1, 
11U<<1, 12U<<1, 13U<<1, 14U<<1, 15U<<1));
+TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 15),
+  0, 1U<<15, 2U<<15, 3U<<15, 4U<<15, 5U<<15, 6U<<15, 7U<<15, 8U<<15, 9U<<15, 
10U<<15, 11U<<15, 12U<<15, 13U<<15, 14U<<15, 15U<<15));
+TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 16),
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 17),
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v16hi(_mm256_srli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1),
+  0, 1U>>1, 2U>>1, 3U>>1, 4U>>1, 5U>>1, 6U>>1, 7U>>1, 8U>>1, 9U>>1, 10U>>1, 
11U>>1, 12U>>1, 13U>>1, 14U>>1, 15U>>1));
+TEST_CONSTEXPR(match_v16hi(_mm256_srai_epi16((__m256i)(__v16hi){-32768, 32767, 
-3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, 1),
+  -16384, 16383, -2, -1, -1, 0, 0, 1, -16384, 16383, -2, -1, -1, 0, 0, 1));
+
+TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 
5, 6, 7}, 0), 0, 1, 2, 3, 4, 5, 6, 7));
+TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 
5, 6, 7}, 1), 0, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1));
+TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 
5, 6, 7}, 31), 0, 1U<<31, 2U<<31, 3U<<31, 4U<<31, 5U<<31, 6U<<31, 7U<<31));
+TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 
5, 6, 7}, 32), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 
5, 6, 7}, 33), 0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8si(_mm256_srli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 
5, 6, 7}, 31), 0, 1U>>31, 2U>>31, 3U>>31, 4U>>31, 5U>>31, 6U>>31, 7U>>31));
+TEST_CONSTEXPR(match_v8si(_mm256_srai_epi32((__m256i)(__v8si){-32768, 32767, 
-3, -2, -1, 0, 1, 2}, 1), -16384, 16383, -2, -1, -1, 0, 0, 1));
+
+TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 0), 
0, 1, 2, 3));
+TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 1), 
0, 1<<1, 2<<1, 3<<1));
+TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 
33), 0, 1ULL<<33, 2ULL<<33, 3ULL<<33));
+TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 
64), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 
65), 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_srli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 
33), 0, 1ULL>>33, 2ULL>>33, 3ULL>>33));
+TEST_CONSTEXPR(match_v4di(_mm256_srai_epi64((__m256i)(__v4di){-32768, 32767, 
-3, -2}, 1), -16384, 16383, -2, -1));
+
+TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 
26, 27, 28, 29, 30, 31}, 0),
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31));
+TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 
26, 27, 28, 29, 30, 31}, 1),
+  0, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1, 8<<1, 9<<1, 10<<1, 11<<1, 
12<<1, 13<<1, 14<<1, 15<<1, 16<<1, 17<<1, 18<<1, 19<<1, 20<<1, 21<<1, 22<<1, 
23<<1, 24<<1, 25<<1, 26<<1, 27<<1, 28<<1, 29<<1, 30<<1, 31<<1));
+TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 
26, 27, 28, 29, 30, 31}, 15),
+  0, 1U<<15, 2U<<15, 3U<<15, 4U<<15, 5U<<15, 6U<<15, 7U<<15, 8U<<15, 9U<<15, 
10U<<15, 11U<<15, 12U<<15, 13U<<15, 14U<<15, 15U<<15, 16U<<15, 17U<<15, 
18U<<15, 19U<<15, 20U<<15, 21U<<15, 22U<<15, 23U<<15, 24U<<15, 25U<<15, 
26U<<15, 27U<<15, 28U<<15, 29U<<15, 30U<<15, 31U<<15));
+TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 
26, 27, 28, 29, 30, 31}, 16),
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 
26, 27, 28, 29, 30, 31}, 17),
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v32hi(_mm512_srli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 
26, 27, 28, 29, 30, 31}, 15),
+  0, 1U>>15, 2U>>15, 3U>>15, 4U>>15, 5U>>15, 6U>>15, 7U>>15, 8U>>15, 9U>>15, 
10U>>15, 11U>>15, 12U>>15, 13U>>15, 14U>>15, 15U>>15, 16U>>15, 17U>>15, 
18U>>15, 19U>>15, 20U>>15, 21U>>15, 22U>>15, 23U>>15, 24U>>15, 25U>>15, 
26U>>15, 27U>>15, 28U>>15, 29U>>15, 30U>>15, 31U>>15));
+TEST_CONSTEXPR(match_v32hi(_mm512_srai_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 
26, 27, 28, 29, 30, 31}, 10),
+  0, 1U>>10, 2U>>10, 3U>>10, 4U>>10, 5U>>10, 6U>>10, 7U>>10, 8U>>10, 9U>>10, 
10U>>10, 11U>>10, 12U>>10, 13U>>10, 14U>>10, 15U>>10, 16U>>10, 17U>>10, 
18U>>10, 19U>>10, 20U>>10, 21U>>10, 22U>>10, 23U>>10, 24U>>10, 25U>>10, 
26U>>10, 27U>>10, 28U>>10, 29U>>10, 30U>>10, 31U>>10));
+
+TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 0),
+0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
+TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1),
+0, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1, 8<<1, 9<<1, 10<<1, 11<<1, 12<<1, 
13<<1, 14<<1, 15<<1));
+TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 10),
+0, 1U<<10, 2U<<10, 3U<<10, 4U<<10, 5U<<10, 6U<<10, 7U<<10, 8U<<10, 9U<<10, 
10U<<10, 11U<<10, 12U<<10, 13U<<10, 14U<<10, 15U<<10));
+TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 32),
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 33),
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v16si(_mm512_srli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 10),
+0, 1U>>10, 2U>>10, 3U>>10, 4U>>10, 5U>>10, 6U>>10, 7U>>10, 8U>>10, 9U>>10, 
10U>>10, 11U>>10, 12U>>10, 13U>>10, 14U>>10, 15U>>10));
+TEST_CONSTEXPR(match_v16si(_mm512_srai_epi32((__m512i)(__v16si){0, -2, 2, 3, 
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 10),
+0, -1, 2U>>10, 3U>>10, 4U>>10, 5U>>10, 6U>>10, 7U>>10, 8U>>10, 9U>>10, 
10U>>10, 11U>>10, 12U>>10, 13U>>10, 14U>>10, 15U>>10));
+
+TEST_CONSTEXPR(match_v8di(_mm512_slli_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 
5, 6, 7}, 1),
+0<<1, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1));
+TEST_CONSTEXPR(match_v8di(_mm512_srli_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 
5, 6, 7}, 1),
+0>>1, 1>>1, 2>>1, 3>>1, 4>>1, 5>>1, 6>>1, 7>>1));
+TEST_CONSTEXPR(match_v8di(_mm512_srai_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 
5, 6, 7}, 1),
+0>>1, 1>>1, 2>>1, 3>>1, 4>>1, 5>>1, 6>>1, 7>>1));
+
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0x00'ff'cc'71,
+(__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 
0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 16),
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0,
+(__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 
0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 16),
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0xff'ff'ff'ff,
+(__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 
0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1),
+0xffU<< 1, 1<< 1, 2<< 1, 3<< 1, 4<< 1, 5<< 1, 6<< 1, 7<< 1, 8<< 1, 9<< 1, 10<< 
1, 11<< 1, 12<< 1, 13<< 1, 14<< 1, 15<< 1,
+0xffU<< 1, 1<< 1, 2<< 1, 3<< 1, 4<< 1, 5<< 1, 6<< 1, 7<< 1, 8<< 1, 9<< 1, 10<< 
1, 11<< 1, 12<< 1, 13<< 1, 14<< 1, 15<< 1));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0x7f'ff'ff'ff,
+(__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 
0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1),
+0xffU<< 1, 1<< 1, 2<< 1, 3<< 1, 4<< 1, 5<< 1, 6<< 1, 7<< 1, 8<< 1, 9<< 1, 10<< 
1, 11<< 1, 12<< 1, 13<< 1, 14<< 1, 15<< 1,
+0xffU<< 1, 1<< 1, 2<< 1, 3<< 1, 4<< 1, 5<< 1, 6<< 1, 7<< 1, 8<< 1, 9<< 1, 10<< 
1, 11<< 1, 12<< 1, 13<< 1, 14<< 1, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0x71'cc'ff'00,
+(__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 
0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1),
+0, 0, 0, 0, 0, 0, 0, 0, 8<<1, 9<<1, 10<<1, 11<<1, 12<<1, 13<<1, 14<<1, 15<<1, 
0, 0, 2<<1, 3<<1, 0, 0, 6<<1, 7<<1, 8<<1, 0, 0, 0, 12<<1, 13<<1, 14<<1, 0));
+
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_srli_epi16((__mmask32)0x71'cc'ff'00,
+(__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 
0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1),
+0, 0, 0, 0, 0, 0, 0, 0,
+8>>1, 9>>1, 10>>1, 11>>1, 12>>1, 13>>1, 14>>1, 15>>1,
+0, 0, 2>>1, 3>>1, 0, 0, 6>>1, 7>>1,
+8>>1, 0, 0, 0, 12>>1, 13>>1, 14>>1, 0
+));
+TEST_CONSTEXPR(match_v32hi(
+  _mm512_maskz_srai_epi16(
+      (__mmask32)0xAAAAAAAA,
+      (__m512i)(__v32hi){
+        -32768,  32767,   -3,    -2,    -1,     0,     1,     2,
+         -1234,   1234, -32767, 32766,   -5,     5,  -256,   256,
+           -42,     42,   -7,     7, -30000, 30000,   -1,    -1,
+             0,     -2,    2, -32768, 32767, -32768,  -123,   123
+      }, 5),
+  0, 1023, 0, -1, 0, 0, 0, 0, 0, 38, 0, 1023, 0, 0, 0, 8, 0, 1, 0, 0, 0, 937, 
0, -1, 0, -1, 0, -1024, 0, -1024, 0, 3 ));
+
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0x00'ff'cc'71,
+(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
32),
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0,
+(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
16),
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0xff'ff,
+(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
1),
+0xffU<<1, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1, 8<<1, 9<<1, 10<<1, 11<<1, 
12<<1, 13<<1, 14<<1, 15<<1));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0x7f'ff,
+(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
1),
+0xffU<<1, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1, 8<<1, 9<<1, 10<<1, 11<<1, 
12<<1, 13<<1, 14<<1, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0x71'cc,
+(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
1),
+0, 0, 2<<1, 3<<1, 0, 0, 6<<1, 7<<1, 8<<1, 0, 0, 0, 12<<1, 13<<1, 14<<1, 0));
+
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_srli_epi32((__mmask16)0x71'cc,
+(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
1),
+0, 0, 2>>1, 3>>1, 0, 0, 6>>1, 7>>1, 8>>1, 0, 0, 0, 12>>1, 13>>1, 14>>1, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_srai_epi32((__mmask16)0x71'cc,
+(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
1),
+0, 0, 2>>1, 3>>1, 0, 0, 6>>1, 7>>1, 8>>1, 0, 0, 0, 12>>1, 13>>1, 14>>1, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0x00'ff'cc'71,
+(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 64),
+0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0,
+(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 16),
+0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0xff,
+(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1),
+0xffULL<<1, 1LL<<1, 2LL<<1, 3LL<<1, 4LL<<1, 5LL<<1, 6LL<<1, 7LL<<1));
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0x7f,
+(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1),
+0xffULL<<1, 1LL<<1, 2LL<<1, 3LL<<1, 4LL<<1, 5LL<<1, 6LL<<1, 0));
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0x71,
+(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1),
+0xff << 1, 0, 0, 0, 4 << 1, 5 << 1, 6LL<<1, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_srli_epi64((__mmask8)0x71,
+(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1),
+0xff >> 1, 0, 0, 0, 4 >> 1, 5 >> 1, 6LL>>1, 0));
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_srai_epi64((__mmask8)0x71,
+(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1),
+0xff >> 1, 0, 0, 0, 4 >> 1, 5 >> 1, 6LL>>1, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0x00'ff'cc'71, 
(__m256i)(__v4di){0xff, 1, 2, 3}, 64), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0, 
(__m256i)(__v4di){0xff, 1, 2, 3}, 16), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0xff, 
(__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0xffULL<<1, 1LL<<1, 2LL<<1, 3LL<<1));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0x7, 
(__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0xffULL<<1, 1LL<<1, 2LL<<1, 0));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0x71, 
(__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0xff << 1, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_srli_epi64((__mmask8)0x71, 
(__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0xff >> 1, 0, 0, 0));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_srai_epi64((__mmask8)0x71, 
(__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0xff >> 1, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0x00'ff'cc'71, 
(__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 32), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0, 
(__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 16), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0xff, 
(__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0xffU<<1, 1<<1, 2<<1, 3<<1, 
4<<1, 5<<1, 6<<1, 7<<1));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0x7, 
(__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0xffU<<1, 1<<1, 2<<1, 0, 0, 
0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0x71, 
(__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0xff<<1, 0, 0, 0, 4<<1, 5<<1, 
6<<1, 0));
+
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_srli_epi32((__mmask8)0x71, 
(__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0xff>>1, 0, 0, 0, 4>>1, 5>>1, 
6>>1, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_srai_epi32((__mmask8)0x71, 
(__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0xff>>1, 0, 0, 0, 4>>1, 5>>1, 
6>>1, 0));
+
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0x00'ff'cc'71,
+(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
32),
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0,
+(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
16),
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0xff'ff,
+(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
1),
+0xffU<<1, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1, 8<<1, 9<<1, 10<<1, 11<<1, 
12<<1, 13<<1, 14<<1, 15<<1));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0x7,
+(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
1),
+0xffU<<1, 1<<1, 2<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0x71,
+(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
1),
+0xff<<1, 0, 0, 0, 4<<1, 5<<1, 6<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_srli_epi16((__mmask16)0x71,
+(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
1),
+0xff>>1, 0, 0, 0, 4>>1, 5>>1, 6>>1, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_srai_epi16((__mmask16)0x71,
+(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
1),
+0xff>>1, 0, 0, 0, 4>>1, 5>>1, 6>>1, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v32hi(
+  _mm512_mask_slli_epi16(
+      
(__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,
+                         
116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131},
+      (__mmask32)~(__mmask32)0,
+      (__m512i)(__v32hi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
+                         16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31},
+      1),
+  0<<1,1<<1,2<<1,3<<1,4<<1,5<<1,6<<1,7<<1,
+  8<<1,9<<1,10<<1,11<<1,12<<1,13<<1,14<<1,15<<1,
+  16<<1,17<<1,18<<1,19<<1,20<<1,21<<1,22<<1,23<<1,
+  24<<1,25<<1,26<<1,27<<1,28<<1,29<<1,30<<1,31<<1));
+
+TEST_CONSTEXPR(match_v32hi(
+  _mm512_mask_srli_epi16(
+      
(__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,
+                         
116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131},
+      (__mmask32)~(__mmask32)0,
+      (__m512i)(__v32hi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
+                         16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31},
+      1),
+  0>>1,1>>1,2>>1,3>>1,4>>1,5>>1,6>>1,7>>1,
+  8>>1,9>>1,10>>1,11>>1,12>>1,13>>1,14>>1,15>>1,
+  16>>1,17>>1,18>>1,19>>1,20>>1,21>>1,22>>1,23>>1,
+  24>>1,25>>1,26>>1,27>>1,28>>1,29>>1,30>>1,31>>1));
+
+TEST_CONSTEXPR(match_v32hi(
+  _mm512_mask_srai_epi16(
+      
(__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,
+                         
116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131},
+      (__mmask32)~(__mmask32)0,
+      (__m512i)(__v32hi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
+                         16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31},
+      1),
+  0>>1,1>>1,2>>1,3>>1,4>>1,5>>1,6>>1,7>>1,
+  8>>1,9>>1,10>>1,11>>1,12>>1,13>>1,14>>1,15>>1,
+  16>>1,17>>1,18>>1,19>>1,20>>1,21>>1,22>>1,23>>1,
+  24>>1,25>>1,26>>1,27>>1,28>>1,29>>1,30>>1,31>>1));
+
+TEST_CONSTEXPR(match_v16si(
+  _mm512_mask_slli_epi32(
+      
(__m512i)(__v16si){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115},
+      (__mmask16)0x5555,
+      (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
+      1),
+ 0<<1, 101, 2<<1, 103, 4<<1, 105, 6<<1, 107, 8<<1, 109, 10<<1, 111, 12<<1, 
113, 14<<1, 115));
+
+TEST_CONSTEXPR(match_v16si(
+  _mm512_mask_srli_epi32(
+      
(__m512i)(__v16si){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115},
+      (__mmask16)0x5555,
+      (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
+      1),
+  0>>1, 101, 2>>1, 103, 4>>1, 105, 6>>1, 107, 8>>1, 109, 10>>1, 111, 12>>1, 
113, 14>>1, 115));
+
+TEST_CONSTEXPR(match_v16si(
+  _mm512_mask_srai_epi32(
+      
(__m512i)(__v16si){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115},
+      (__mmask16)0x5555,
+      (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
+      1),
+  0>>1, 101, 2>>1, 103, 4>>1, 105, 6>>1, 107, 8>>1, 109, 10>>1, 111, 12>>1, 
113, 14>>1, 115));
+
+TEST_CONSTEXPR(match_v8di(
+  _mm512_mask_slli_epi64(
+      (__m512i)(__v8di){100,101,102,103,104,105,106,107},
+      (__mmask8)0x0F,
+      (__m512i)(__v8di){0,1,2,3,4,5,6,7},
+      2),
+  0<<2,1<<2,2<<2,3<<2, 104,105,106,107));
+
+TEST_CONSTEXPR(match_v8di(
+  _mm512_mask_srli_epi64(
+      (__m512i)(__v8di){100,101,102,103,104,105,106,107},
+      (__mmask8)0x0F,
+      (__m512i)(__v8di){0,1,2,3,4,5,6,7},
+      2),
+  0>>2,1>>2,2>>2,3>>2, 104,105,106,107));
+
+TEST_CONSTEXPR(match_v8di(
+  _mm512_mask_srai_epi64(
+      (__m512i)(__v8di){100,101,102,103,104,105,106,107},
+      (__mmask8)0x0F,
+      (__m512i)(__v8di){0,1,2,3,4,5,6,7},
+      2),
+  0>>2,1>>2,2>>2,3>>2, 104,105,106,107));
+
+TEST_CONSTEXPR(match_v16hi(
+  _mm256_mask_slli_epi16(
+      
(__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115},
+      (__mmask16)0xAAAA,
+      (__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
+      20),
+  100, 0, 102, 0, 104, 0, 106, 0, 108, 0, 110, 0, 112, 0, 114, 0));
+
+TEST_CONSTEXPR(match_v16hi(
+  _mm256_mask_srli_epi16(
+      
(__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115},
+      (__mmask16)0xAAAA,
+      (__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
+      20),
+  100, 0, 102, 0, 104, 0, 106, 0, 108, 0, 110, 0, 112, 0, 114, 0));
+
+TEST_CONSTEXPR(match_v16hi(
+  _mm256_mask_srai_epi16(
+      
(__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115},
+      (__mmask16)0xAAAA,
+      (__m256i)(__v16hi){0, -1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
+      20),
+  100, 0Xffff, 102, 0, 104, 0, 106, 0, 108, 0, 110, 0, 112, 0, 114, 0));
+
+TEST_CONSTEXPR(match_v16hi(
+  _mm256_mask_srli_epi16(
+      (__m256i)(__v16hi){100,101,102,103,104,105,106,107,
+                         108,109,110,111,112,113,114,115},
+      (__mmask16)0xAAAA,
+      (__m256i)(__v16hi){   0,  32,  64,  96, 128, 160, 192, 224,
+                           256, 288, 320, 352, 384, 416, 448, 480},
+      5),
+  100, 1, 102, 3, 104, 5, 106, 7, 108, 9, 110, 11, 112, 13, 114, 15));
+
+TEST_CONSTEXPR(match_v8si(
+  _mm256_mask_slli_epi32(
+      (__m256i)(__v8si){100,101,102,103,104,105,106,107},
+      (__mmask8)0xff,
+      (__m256i)(__v8si){0,1,2,3,4,5,6,7},
+      3),
+  0<<3,1<<3,2<<3,3<<3,4<<3,5<<3,6<<3,7<<3));
+
+TEST_CONSTEXPR(match_v8si(
+  _mm256_mask_srli_epi32(
+      (__m256i)(__v8si){100,101,102,103,104,105,106,107},
+      (__mmask8)0xff,
+      (__m256i)(__v8si){0,1,2,3,4,5,6,7},
+      3),
+  0>>3,1>>3,2>>3,3>>3,4>>3,5>>3,6>>3,7>>3));
+
+TEST_CONSTEXPR(match_v8si(
+  _mm256_mask_srai_epi32(
+      (__m256i)(__v8si){100,101,102,103,104,105,106,107},
+      (__mmask8)0xff,
+      (__m256i)(__v8si){0,1,2,3,4,5,6,7},
+      3),
+  0>>3,1>>3,2>>3,3>>3,4>>3,5>>3,6>>3,7>>3));
+
+TEST_CONSTEXPR(match_v4di(
+  _mm256_mask_slli_epi64(
+      (__m256i)(__v4di){100,101,102,103},
+      (__mmask8)0b1010,
+      (__m256i)(__v4di){0,1,2,3},
+      4),
+  100, 1<<4, 102, 3<<4));
+
+TEST_CONSTEXPR(match_v4di(
+  _mm256_mask_srli_epi64(
+      (__m256i)(__v4di){100,101,102,103},
+      (__mmask8)0b1010,
+      (__m256i)(__v4di){0,0xff80,2,3},
+      1),
+  100, 0x7fc0, 102, 3>>1));
+
+TEST_CONSTEXPR(match_v4di(
+  _mm256_mask_srai_epi64(
+      (__m256i)(__v4di){100,101,102,103},
+      (__mmask8)0b1010,
+      (__m256i)(__v4di){0,-128,2,3},
+      2),
+  100, -32, 102, 3>>2));
\ No newline at end of file

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to