Author: woruyu
Date: 2025-09-24T13:03:29Z
New Revision: 0a268f803cdf914f410728e0258524bb658e5b99

URL: 
https://github.com/llvm/llvm-project/commit/0a268f803cdf914f410728e0258524bb658e5b99
DIFF: 
https://github.com/llvm/llvm-project/commit/0a268f803cdf914f410728e0258524bb658e5b99.diff

LOG: [Headers][X86] VectorExprEvaluator::VisitCallExpr - allow SSE/AVX2/AVX512 
pack intrinsics to be used in constexpr (#156003)

Fixes #154283

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsX86.td
    clang/lib/AST/ByteCode/InterpBuiltin.cpp
    clang/lib/AST/ExprConstant.cpp
    clang/lib/Headers/avx2intrin.h
    clang/lib/Headers/avx512bwintrin.h
    clang/lib/Headers/emmintrin.h
    clang/lib/Headers/mmintrin.h
    clang/lib/Headers/smmintrin.h
    clang/test/CodeGen/X86/avx2-builtins.c
    clang/test/CodeGen/X86/avx512bw-builtins.c
    clang/test/CodeGen/X86/mmx-builtins.c
    clang/test/CodeGen/X86/sse2-builtins.c
    clang/test/CodeGen/X86/sse41-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index b80f733066b65..77e599587edc3 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -93,9 +93,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] 
in {
   }
 
   let Features = "sse2" in {
-    def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, 
_Vector<8, short>)">;
-    def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, 
_Vector<4, int>)">;
-    def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, 
_Vector<8, short>)">;
     def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, 
_Constant int)">;
     def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">;
     def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
@@ -108,6 +105,9 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] 
in {
     def pavgw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned 
short>, _Vector<8, unsigned short>)">;
     def pmulhw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, 
_Vector<8, short>)">;
     def pmulhuw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, 
unsigned short>, _Vector<8, unsigned short>)">;
+    def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, 
_Vector<8, short>)">;
+    def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, 
_Vector<4, int>)">;
+    def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, 
_Vector<8, short>)">;
   }
 
   let Features = "sse3" in {
@@ -312,7 +312,6 @@ let Features = "ssse3", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>]
 
 let Features = "sse4.1", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
   def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, 
_Vector<4, float>, _Constant char)">;
-  def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, 
int>)">;
   def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant 
int)">;
   def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, 
float>, _Constant int)">;
   def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, 
double>, _Constant int)">;
@@ -338,6 +337,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, 
Constexpr, RequiredVector
   def pblendvb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, 
_Vector<16, char>, _Vector<16, char>)">;
 
   def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, 
_Vector<4, int>)">;
+  def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, 
int>)">;
 }
 
 let Features = "sse4.2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
@@ -571,10 +571,6 @@ let Features = "avx", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in
 
 let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
   def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, 
_Vector<32, char>, _Constant char)">;
-  def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, 
_Vector<16, short>)">;
-  def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, 
int>)">;
-  def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, 
_Vector<16, short>)">;
-  def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, 
int>)">;
   def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, 
_Vector<32, char>, _Constant int)">;
   def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Vector<16, short>)">;
   def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, 
int>)">;
@@ -647,6 +643,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, 
Constexpr, RequiredVectorWi
   def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Vector<4, long long int>)">;
 
   def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long 
long int>, _Vector<2, long long int>, _Constant int)">;
+  def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, 
int>)">;
+  def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, 
_Vector<16, short>)">;
+  def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, 
int>)">;
+  def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, 
_Vector<16, short>)">;
 }
 
 let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
@@ -1308,11 +1308,14 @@ let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>
 
 let Features = "avx512bw", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
   def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, 
short>, _Constant int, unsigned int)">;
-  def packssdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, 
_Vector<16, int>)">;
+  def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, 
char>)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def packsswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, 
_Vector<32, short>)">;
-  def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, 
_Vector<16, int>)">;
+  def packssdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, 
_Vector<16, int>)">;
   def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, 
_Vector<32, short>)">;
-  def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, 
char>)">;
+  def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, 
_Vector<16, int>)">;
 }
 
 let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {

diff  --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 5423d3ca73c81..b3ece4ce580cb 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2596,6 +2596,51 @@ static bool interp__builtin_elementwise_int_binop(
   return true;
 }
 
+static bool
+interp__builtin_x86_pack(InterpState &S, CodePtr, const CallExpr *E,
+                         llvm::function_ref<APInt(const APSInt &)> PackFn) {
+  const auto *VT0 = E->getArg(0)->getType()->castAs<VectorType>();
+  const auto *VT1 = E->getArg(1)->getType()->castAs<VectorType>();
+  assert(VT0 && VT1 && "pack builtin VT0 and VT1 must be VectorType");
+  assert(VT0->getElementType() == VT1->getElementType() &&
+         VT0->getNumElements() == VT1->getNumElements() &&
+         "pack builtin VT0 and VT1 ElementType must be same");
+
+  const Pointer &RHS = S.Stk.pop<Pointer>();
+  const Pointer &LHS = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  const ASTContext &ASTCtx = S.getASTContext();
+  const unsigned SrcBits = ASTCtx.getIntWidth(VT0->getElementType());
+  const unsigned LHSVecLen = VT0->getNumElements();
+  const unsigned SrcPerLane = 128 / SrcBits;
+  const unsigned Lanes = LHSVecLen * SrcBits / 128;
+
+  PrimType SrcT = *S.getContext().classify(VT0->getElementType());
+  PrimType DstT = *S.getContext().classify(getElemType(Dst));
+  const bool IsUnsigend = getElemType(Dst)->isUnsignedIntegerType();
+
+  for (unsigned Lane = 0; Lane != Lanes; ++Lane) {
+    const unsigned BaseSrc = Lane * SrcPerLane;
+    const unsigned BaseDst = Lane * (2 * SrcPerLane);
+
+    for (unsigned I = 0; I != SrcPerLane; ++I) {
+      INT_TYPE_SWITCH_NO_BOOL(SrcT, {
+        APSInt A = LHS.elem<T>(BaseSrc + I).toAPSInt();
+        APSInt B = RHS.elem<T>(BaseSrc + I).toAPSInt();
+
+        assignInteger(S, Dst.atIndex(BaseDst + I), DstT,
+                      APSInt(PackFn(A), IsUnsigend));
+        assignInteger(S, Dst.atIndex(BaseDst + SrcPerLane + I), DstT,
+                      APSInt(PackFn(B), IsUnsigend));
+      });
+    }
+  }
+
+  Dst.initializeAllElements();
+  return true;
+}
+
 static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
                                                const CallExpr *Call,
                                                unsigned BuiltinID) {
@@ -3475,6 +3520,29 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
           }
           return LHS.lshr(RHS.getZExtValue());
         });
+  case clang::X86::BI__builtin_ia32_packsswb128:
+  case clang::X86::BI__builtin_ia32_packsswb256:
+  case clang::X86::BI__builtin_ia32_packsswb512:
+  case clang::X86::BI__builtin_ia32_packssdw128:
+  case clang::X86::BI__builtin_ia32_packssdw256:
+  case clang::X86::BI__builtin_ia32_packssdw512:
+    return interp__builtin_x86_pack(S, OpPC, Call, [](const APSInt &Src) {
+      return APInt(Src).truncSSat(Src.getBitWidth() / 2);
+    });
+  case clang::X86::BI__builtin_ia32_packusdw128:
+  case clang::X86::BI__builtin_ia32_packusdw256:
+  case clang::X86::BI__builtin_ia32_packusdw512:
+  case clang::X86::BI__builtin_ia32_packuswb128:
+  case clang::X86::BI__builtin_ia32_packuswb256:
+  case clang::X86::BI__builtin_ia32_packuswb512:
+    return interp__builtin_x86_pack(S, OpPC, Call, [](const APSInt &Src) {
+      unsigned DstBits = Src.getBitWidth() / 2;
+      if (Src.isNegative())
+        return APInt::getZero(DstBits);
+      if (Src.isIntN(DstBits))
+        return APInt(Src).trunc(DstBits);
+      return APInt::getAllOnes(DstBits);
+    });
 
   case clang::X86::BI__builtin_ia32_vprotbi:
   case clang::X86::BI__builtin_ia32_vprotdi:

diff  --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d10e2afeb2341..a4fca1811c92b 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11575,6 +11575,46 @@ static bool handleVectorElementCast(EvalInfo &Info, 
const FPOptions FPO,
   return false;
 }
 
+static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
+                            llvm::function_ref<APInt(const APSInt &)> PackFn) {
+  APValue LHS, RHS;
+  if (!EvaluateAsRValue(Info, E->getArg(0), LHS) ||
+      !EvaluateAsRValue(Info, E->getArg(1), RHS))
+    return false;
+
+  unsigned LHSVecLen = LHS.getVectorLength();
+  unsigned RHSVecLen = RHS.getVectorLength();
+
+  assert(LHSVecLen != 0 && LHSVecLen == RHSVecLen &&
+         "pack builtin LHSVecLen must equal to RHSVecLen");
+
+  const VectorType *VT0 = E->getArg(0)->getType()->castAs<VectorType>();
+  const unsigned SrcBits = Info.Ctx.getIntWidth(VT0->getElementType());
+
+  const VectorType *DstVT = E->getType()->castAs<VectorType>();
+  QualType DstElemTy = DstVT->getElementType();
+  const bool DstIsUnsigned = DstElemTy->isUnsignedIntegerType();
+
+  const unsigned SrcPerLane = 128 / SrcBits;
+  const unsigned Lanes = LHSVecLen * SrcBits / 128;
+
+  SmallVector<APValue, 64> Out;
+  Out.reserve(LHSVecLen + RHSVecLen);
+
+  for (unsigned Lane = 0; Lane != Lanes; ++Lane) {
+    unsigned base = Lane * SrcPerLane;
+    for (unsigned I = 0; I != SrcPerLane; ++I)
+      Out.emplace_back(APValue(
+          APSInt(PackFn(LHS.getVectorElt(base + I).getInt()), DstIsUnsigned)));
+    for (unsigned I = 0; I != SrcPerLane; ++I)
+      Out.emplace_back(APValue(
+          APSInt(PackFn(RHS.getVectorElt(base + I).getInt()), DstIsUnsigned)));
+  }
+
+  Result = APValue(Out.data(), Out.size());
+  return true;
+}
+
 bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
   if (!IsConstantEvaluatedBuiltinCall(E))
     return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -11768,7 +11808,29 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
       }
       return LHS.lshr(RHS.getZExtValue());
     });
-
+  case X86::BI__builtin_ia32_packsswb128:
+  case X86::BI__builtin_ia32_packsswb256:
+  case X86::BI__builtin_ia32_packsswb512:
+  case X86::BI__builtin_ia32_packssdw128:
+  case X86::BI__builtin_ia32_packssdw256:
+  case X86::BI__builtin_ia32_packssdw512:
+    return evalPackBuiltin(E, Info, Result, [](const APSInt &Src) {
+      return APSInt(Src).truncSSat(Src.getBitWidth() / 2);
+    });
+  case X86::BI__builtin_ia32_packusdw128:
+  case X86::BI__builtin_ia32_packusdw256:
+  case X86::BI__builtin_ia32_packusdw512:
+  case X86::BI__builtin_ia32_packuswb128:
+  case X86::BI__builtin_ia32_packuswb256:
+  case X86::BI__builtin_ia32_packuswb512:
+    return evalPackBuiltin(E, Info, Result, [](const APSInt &Src) {
+      unsigned DstBits = Src.getBitWidth() / 2;
+      if (Src.isNegative())
+        return APInt::getZero(DstBits);
+      if (Src.isIntN(DstBits))
+        return APInt((Src).trunc(DstBits));
+      return APInt::getAllOnes(DstBits);
+    });
   case clang::X86::BI__builtin_ia32_pmuldq128:
   case clang::X86::BI__builtin_ia32_pmuldq256:
   case clang::X86::BI__builtin_ia32_pmuldq512:

diff  --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index a3a5b02579081..31759c5386d9f 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -165,9 +165,8 @@ _mm256_abs_epi32(__m256i __a) {
 ///    A 256-bit vector of [16 x i16] used to generate result[127:64] and
 ///    result[255:192].
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_packs_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_packs_epi16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);
 }
 
@@ -197,9 +196,8 @@ _mm256_packs_epi16(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [8 x i32] used to generate result[127:64] and
 ///    result[255:192].
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_packs_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_packs_epi32(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);
 }
 
@@ -228,9 +226,8 @@ _mm256_packs_epi32(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [16 x i16] used to generate result[127:64] and
 ///    result[255:192].
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_packus_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_packus_epi16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);
 }
 
@@ -260,9 +257,8 @@ _mm256_packus_epi16(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [8 x i32] used to generate result[127:64] and
 ///    result[255:192].
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_packus_epi32(__m256i __V1, __m256i __V2)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_packus_epi32(__m256i __V1, __m256i __V2) {
   return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
 }
 

diff  --git a/clang/lib/Headers/avx512bwintrin.h 
b/clang/lib/Headers/avx512bwintrin.h
index 8d80e3ec2911a..c36bd814725fa 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -510,9 +510,8 @@ _mm512_maskz_abs_epi16(__mmask32 __U, __m512i __A) {
                                              (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_packs_epi32(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_packs_epi32(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
 }
 
@@ -532,9 +531,8 @@ _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i 
__A, __m512i __B)
                                        (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_packs_epi16(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_packs_epi16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
 }
 
@@ -554,9 +552,8 @@ _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, 
__m512i __B)
                                         (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_packus_epi32(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_packus_epi32(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
 }
 
@@ -576,9 +573,8 @@ _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, 
__m512i __A, __m512i __B)
                                        (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_packus_epi16(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_packus_epi16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
 }
 

diff  --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index fca6229a065be..6597e7e7d4030 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -4159,8 +4159,8 @@ void _mm_mfence(void);
 ///   A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
 ///   written to the higher 64 bits of the result.
 /// \returns A 128-bit vector of [16 x i8] containing the converted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a,
-                                                             __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_packs_epi16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
 }
 
@@ -4182,8 +4182,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS 
_mm_packs_epi16(__m128i __a,
 ///    A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
 ///    are written to the higher 64 bits of the result.
 /// \returns A 128-bit vector of [8 x i16] containing the converted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a,
-                                                             __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_packs_epi32(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
 }
 
@@ -4205,8 +4205,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS 
_mm_packs_epi32(__m128i __a,
 ///    A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
 ///    written to the higher 64 bits of the result.
 /// \returns A 128-bit vector of [16 x i8] containing the converted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a,
-                                                              __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_packus_epi16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
 }
 

diff  --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index 18e2c2154362a..5f617530b6f78 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -156,11 +156,10 @@ _mm_cvtm64_si64(__m64 __m)
 ///    written to the upper 32 bits of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the converted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_packs_pi16(__m64 __m1, __m64 __m2)
-{
-    return __trunc64(__builtin_ia32_packsswb128(
-        (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_packs_pi16(__m64 __m1, __m64 __m2) {
+  return __trunc64(__builtin_ia32_packsswb128(
+      (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
 }
 
 /// Converts, with saturation, 32-bit signed integers from both 64-bit integer
@@ -182,11 +181,10 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2)
 ///    written to the upper 32 bits of the result.
 /// \returns A 64-bit integer vector of [4 x i16] containing the converted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_packs_pi32(__m64 __m1, __m64 __m2)
-{
-    return __trunc64(__builtin_ia32_packssdw128(
-        (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_packs_pi32(__m64 __m1, __m64 __m2) {
+  return __trunc64(__builtin_ia32_packssdw128(
+      (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){}));
 }
 
 /// Converts, with saturation, 16-bit signed integers from both 64-bit integer
@@ -208,11 +206,10 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2)
 ///    written to the upper 32 bits of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the converted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_packs_pu16(__m64 __m1, __m64 __m2)
-{
-    return __trunc64(__builtin_ia32_packuswb128(
-        (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_packs_pu16(__m64 __m1, __m64 __m2) {
+  return __trunc64(__builtin_ia32_packuswb128(
+      (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
 }
 
 /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]

diff  --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 3aff679b608ba..5e63a1ae321bc 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -1466,8 +1466,8 @@ _mm_cvtepu32_epi64(__m128i __V) {
 ///    A 128-bit vector of [4 x i32]. The converted [4 x i16] values are
 ///    written to the higher 64 bits of the result.
 /// \returns A 128-bit vector of [8 x i16] containing the converted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1,
-                                                              __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_packus_epi32(__m128i __V1, __m128i __V2) {
   return (__m128i)__builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);
 }
 

diff  --git a/clang/test/CodeGen/X86/avx2-builtins.c 
b/clang/test/CodeGen/X86/avx2-builtins.c
index 00cb1a4390d79..20233f6106146 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -1038,24 +1038,28 @@ __m256i test_mm256_packs_epi16(__m256i a, __m256i b) {
   // CHECK: call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %{{.*}}, <16 x 
i16> %{{.*}})
   return _mm256_packs_epi16(a, b);
 }
+TEST_CONSTEXPR(match_v32qi(_mm256_packs_epi16((__m256i)(__v16hi){130, -200, 
127, -128, 300, -1000, 42, -42, 500, -500, 1, -1, 128, -129, 256, -256}, 
(__m256i)(__v16hi){0, 1, -1, 255, -129, 128, 20000, -32768, 32767, -32767, 127, 
-128, 30000, -30000, 90, -90}), 127, -128, 127, -128, 127, -128, 42, -42, 0, 1, 
-1, 127, -128, 127, 127, -128, 127, -128, 1, -1, 127, -128, 127, -128, 127, 
-128, 127, -128, 127, -128, 90, -90));
 
 __m256i test_mm256_packs_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_packs_epi32
   // CHECK: call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %{{.*}}, <8 x 
i32> %{{.*}})
   return _mm256_packs_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_packs_epi32((__m256i)(__v8si){40000, -50000, 
32767, -32768, 70000, -70000, 42, -42}, (__m256i)(__v8si){0, 1, -1, 65536, 
-1000000, 1000000, 32768, -32769}), 32767, -32768, 32767, -32768, 0, 1, -1, 
32767, 32767, -32768, 42, -42, -32768, 32767, 32767, -32768));
 
 __m256i test_mm256_packs_epu16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_packs_epu16
   // CHECK:  call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %{{.*}}, <16 x 
i16> %{{.*}})
   return _mm256_packus_epi16(a, b);
 }
+TEST_CONSTEXPR(match_v32qi(_mm256_packus_epi16((__m256i)(__v16hi){-1, 0, 1, 
127, 128, 255, 256, -200, 300, 42, -42, 500, 20000, -32768, 129, -129}, 
(__m256i)(__v16hi){0, 1, -1, 255, -129, 128, 20000, -32768, 32767, -32767, 127, 
-128, 30000, -30000, 90, -90}), 0, 0, 1, 127, -128, -1, -1, 0, 0, 1, 0, -1, 0, 
-128, -1, 0, -1, 42, 0, -1, -1, 0, -127, 0, -1, 0, 127, 0, -1, 0, 90, 0));
 
 __m256i test_mm256_packs_epu32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_packs_epu32
   // CHECK: call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %{{.*}}, <8 x 
i32> %{{.*}})
   return _mm256_packus_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_packus_epi32((__m256i)(__v8si){40000, 
-50000, 32767, -32768, 70000, -70000, 42, -42}, (__m256i)(__v8si){0, 1, -1, 
65536, -1000000, 1000000, 32768, -32769}), -25536, 0, 32767, 0, 0, 1, 0, -1, 
-1, 0, 42, 0, 0, -1, -32768, 0));
 
 __m256i test_mm256_permute2x128_si256(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_permute2x128_si256

diff  --git a/clang/test/CodeGen/X86/avx512bw-builtins.c 
b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 1875e202b0c0a..3f42ac0268978 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -1058,6 +1058,7 @@ __m512i test_mm512_packs_epi32(__m512i __A, __m512i __B) {
   // CHECK: @llvm.x86.avx512.packssdw.512
   return _mm512_packs_epi32(__A,__B); 
 }
+TEST_CONSTEXPR(match_v32hi(_mm512_packs_epi32((__m512i)(__v16si){40000, 
-50000, 32767, -32768, 70000, -70000, 42, -42, 0, 1, -1, 30000, 32768, -32769, 
65535, -65536}, (__m512i)(__v16si){0, 1, -1, 65536, -1000000, 1000000, 32768, 
-32769, 123456, -123456, 32767, -32768, 22222, -22222, 40000, -40000}), 32767, 
-32768, 32767, -32768, 0, 1, -1, 32767, 32767, -32768, 42, -42, -32768, 32767, 
32767, -32768, 0, 1, -1, 30000, 32767, -32768, 32767, -32768, 32767, -32768, 
32767, -32768, 22222, -22222, 32767, -32768));
 __m512i test_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_packs_epi32
   // CHECK: @llvm.x86.avx512.packssdw.512
@@ -1075,6 +1076,7 @@ __m512i test_mm512_packs_epi16(__m512i __A, __m512i __B) {
   // CHECK: @llvm.x86.avx512.packsswb.512
   return _mm512_packs_epi16(__A,__B); 
 }
+TEST_CONSTEXPR(match_v64qi(_mm512_packs_epi16((__m512i)(__v32hi){130, -200, 
127, -128, 300, -1000, 42, -42, 32767, -32767, 127, -128, 30000, -30000, 90, 
-90, 130, -200, 0, -1, 126, -127, 128, -129, 500, -500, 7, -7, 255, -255, 127, 
-128}, (__m512i)(__v32hi){0, 1, -1, 255, -129, 128, 20000, -32768, 5, -5, 100, 
-100, 127, -128, 512, -512, 1, 2, -2, 300, -300, 127, -128, 42, 0, 1, -1, 127, 
-128, 90, -90, -32768}), 127, -128, 127, -128, 127, -128, 42, -42, 0, 1, -1, 
127, -128, 127, 127, -128, 127, -128, 127, -128, 127, -128, 90, -90, 5, -5, 
100, -100, 127, -128, 127, -128, 127, -128, 0, -1, 126, -127, 127, -128, 1, 2, 
-2, 127, -128, 127, -128, 42, 127, -128, 7, -7, 127, -128, 127, -128, 0, 1, -1, 
127, -128, 90, -90, -128));
 __m512i test_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, 
__m512i __B) {
   // CHECK-LABEL: test_mm512_mask_packs_epi16
   // CHECK: @llvm.x86.avx512.packsswb.512
@@ -1092,6 +1094,7 @@ __m512i test_mm512_packus_epi32(__m512i __A, __m512i __B) 
{
   // CHECK: @llvm.x86.avx512.packusdw.512
   return _mm512_packus_epi32(__A,__B); 
 }
+TEST_CONSTEXPR(match_v32hi(_mm512_packus_epi32((__m512i)(__v16si){40000, 
-50000, 32767, -32768, 70000, -70000, 42, -42, 0, 1, -1, 65535, 32768, -32769, 
22222, -22222}, (__m512i)(__v16si){0, 1, -1, 65536, -1000000, 1000000, 32768, 
-32769, 123456, -123456, 32767, -32768, 40000, -40000, 65535, 0}), -25536, 0, 
32767, 0, 0, 1, 0, -1, -1, 0, 42, 0, 0, -1, -32768, 0, 0, 1, 0, -1, -1, 0, 
32767, 0, -32768, 0, 22222, 0, -25536, 0, -1, 0));
 __m512i test_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) 
{
   // CHECK-LABEL: test_mm512_maskz_packus_epi32
   // CHECK: @llvm.x86.avx512.packusdw.512
@@ -1109,6 +1112,7 @@ __m512i test_mm512_packus_epi16(__m512i __A, __m512i __B) 
{
   // CHECK: @llvm.x86.avx512.packuswb.512
   return _mm512_packus_epi16(__A,__B); 
 }
+TEST_CONSTEXPR(match_v64qi(_mm512_packus_epi16((__m512i)(__v32hi){-1, 0, 1, 
127, 128, 255, 256, -200, 300, 42, -42, 500, 20000, -32768, 129, -129, -1, 0, 
1, 127, 128, 255, 256, -200, 300, 42, -42, 500, 20000, -32768, 129, -129}, 
(__m512i)(__v32hi){0, 1, -1, 255, -129, 128, 20000, -32768, 32767, -32767, 127, 
-128, 30000, -30000, 90, -90, 0, 1, -1, 255, -129, 128, 20000, -32768, 32767, 
-32767, 127, -128, 30000, -30000, 90, -90}), 0, 0, 1, 127, -128, -1, -1, 0, 0, 
1, 0, -1, 0, -128, -1, 0, -1, 42, 0, -1, -1, 0, -127, 0, -1, 0, 127, 0, -1, 0, 
90, 0, 0, 0, 1, 127, -128, -1, -1, 0, 0, 1, 0, -1, 0, -128, -1, 0, -1, 42, 0, 
-1, -1, 0, -127, 0, -1, 0, 127, 0, -1, 0, 90, 0));
 __m512i test_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, 
__m512i __B) {
   // CHECK-LABEL: test_mm512_mask_packus_epi16
   // CHECK: @llvm.x86.avx512.packuswb.512

diff  --git a/clang/test/CodeGen/X86/mmx-builtins.c 
b/clang/test/CodeGen/X86/mmx-builtins.c
index 266c78ebe7a3f..26c5f7315457e 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -448,18 +448,21 @@ __m64 test_mm_packs_pi16(__m64 a, __m64 b) {
   // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(
   return _mm_packs_pi16(a, b);
 }
+TEST_CONSTEXPR(match_v8qi(_mm_packs_pi16((__m64)(__v4hi){130, -200, 127, 
-128}, (__m64)(__v4hi){0, 1, -1, 255}), 127, -128, 127, -128, 0, 1, -1, 127));
 
 __m64 test_mm_packs_pi32(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_packs_pi32
   // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(
   return _mm_packs_pi32(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_packs_pi32((__m64)(__v2si){40000, -50000}, 
(__m64)(__v2si){0, 70000}), 32767, -32768, 0, 32767));
 
 __m64 test_mm_packs_pu16(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_packs_pu16
   // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(
   return _mm_packs_pu16(a, b);
 }
+TEST_CONSTEXPR(match_v8qi(_mm_packs_pu16((__m64)(__v4hi){-1, 0, 128, 300}, 
(__m64)(__v4hi){255, -200, 42, -42}), 0, 0, -128, -1, -1, 0, 42, 0));
 
 __m64 test_mm_sad_pu8(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_sad_pu8

diff  --git a/clang/test/CodeGen/X86/sse2-builtins.c 
b/clang/test/CodeGen/X86/sse2-builtins.c
index ddcff9728c543..84b90c09444c2 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -1026,18 +1026,21 @@ __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
   // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x 
i16> %{{.*}})
   return _mm_packs_epi16(A, B);
 }
+TEST_CONSTEXPR(match_v16qi(_mm_packs_epi16((__m128i)(__v8hi){130, -200, 127, 
-128, 300, -1000, 42, -42}, (__m128i)(__v8hi){0, 1, -1, 255, -129, 128, 20000, 
-32768}), 127, -128, 127, -128, 127, -128, 42, -42, 0, 1, -1, 127, -128, 127, 
127, -128));
 
 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_packs_epi32
   // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x 
i32> %{{.*}})
   return _mm_packs_epi32(A, B);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_packs_epi32((__m128i)(__v4si){40000, -50000, 
32767, -32768}, (__m128i)(__v4si){0, 1, -1, 70000}), 32767, -32768, 32767, 
-32768, 0, 1, -1, 32767));
 
 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_packus_epi16
   // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x 
i16> %{{.*}})
   return _mm_packus_epi16(A, B);
 }
+TEST_CONSTEXPR(match_v16qi(_mm_packus_epi16((__m128i)(__v8hi){-1, 0, 1, 127, 
300, -1000, 255, -42}, (__m128i)(__v8hi){0, 1, -1, 255, -129, 128, 20000, 
-32768}), 0, 0, 1, 127, -1, 0, -1, 0, 0, 1, 0, -1, 0, -128, -1, 0));
 
 void test_mm_pause(void) {
   // CHECK-LABEL: test_mm_pause

diff  --git a/clang/test/CodeGen/X86/sse41-builtins.c 
b/clang/test/CodeGen/X86/sse41-builtins.c
index c7265b188d572..3c3724643870e 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -399,6 +399,7 @@ __m128i test_mm_packus_epi32(__m128i x, __m128i y) {
   // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %{{.*}}, <4 x 
i32> %{{.*}})
   return _mm_packus_epi32(x, y);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_packus_epi32((__m128i)(__v4si){40000, -50000, 
32767, -32768}, (__m128i)(__v4si){0, 1, -1, 70000}), -25536, 0, 32767, 0, 0, 1, 
0, -1));
 
 __m128d test_mm_round_pd(__m128d x) {
   // CHECK-LABEL: test_mm_round_pd


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to