https://github.com/Arghnews updated https://github.com/llvm/llvm-project/pull/155542
>From 759f06ff21d819986603ca50b2ba213ce68d3368 Mon Sep 17 00:00:00 2001 From: Justin Riddell <arghn...@hotmail.co.uk> Date: Wed, 27 Aug 2025 04:04:37 +0100 Subject: [PATCH] [Headers][X86] VisitCallExpr constexpr immediate shifts (#154293) Implement VectorExprEvaluator::VisitCallExpr constexpr support for left, right, arithmetic shift for MMX/SSE/AVX2/AVX512 intrinsics _mm*_slli_epi* _mm*_srli_epi* _mm*_srai_epi* _mm*_mask_slli_epi* _mm*_maskz_slli_epi* NOTE: not all intrinsics have all widths i.e. _mm_srli_pi32 doesn't have pi64 etc. --- clang/include/clang/Basic/BuiltinsX86.td | 96 ++-- clang/lib/AST/ExprConstant.cpp | 256 +++++++--- clang/lib/Headers/avx2intrin.h | 40 +- clang/lib/Headers/avx512bwintrin.h | 45 +- clang/lib/Headers/avx512fintrin.h | 87 ++-- clang/lib/Headers/avx512vlbwintrin.h | 50 +- clang/lib/Headers/avx512vlintrin.h | 95 ++-- clang/lib/Headers/emmintrin.h | 32 +- clang/lib/Headers/mmintrin.h | 64 +-- .../CodeGen/X86/shift-immediate-constexpr.c | 441 ++++++++++++++++++ 10 files changed, 877 insertions(+), 329 deletions(-) create mode 100644 clang/test/CodeGen/X86/shift-immediate-constexpr.c diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 527acd9ef086e..7fdfd363b299f 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -275,15 +275,8 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; - def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; - def psllwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">; - def pslldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">; - def psllqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">; - def psrlwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">; - def psrldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">; - def psrlqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">; - def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">; - def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">; + def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long " + "int>, _Vector<2, long long int>)">; def pmaddwd128 : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>)">; def pslldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">; def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">; @@ -291,6 +284,19 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">; + + def psllwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">; + def pslldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">; + def psllqi128 + : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">; + + def psrlwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">; + def psrldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">; + def psrlqi128 + : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">; + + def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">; + def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">; } let Features = "sse3", Attributes = [NoThrow] in { @@ -594,24 +600,23 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; - def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; - def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">; + def psignd256 + : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; - def pslldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; - def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">; + def pslldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, " + "long long int>, _Constant int)">; def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; - def psllqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">; - def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; - def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">; - def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; - def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">; + def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long " + "int>, _Vector<2, long long int>)">; + def psraw256 + : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; - def psrldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; - def psrlwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">; - def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; - def psrldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">; - def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; - def psrlqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">; + def psrldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, " + "long long int>, _Constant int)">; + def psrlw256 + : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; + def psrld256 + : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; @@ -628,6 +633,19 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">; def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">; + def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">; + def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">; + def psllqi256 + : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">; + + def psrlwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">; + def psrldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">; + def psrlqi256 + : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">; + + def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">; + def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">; + def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; @@ -2097,8 +2115,8 @@ let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorW def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; - def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; - def psllwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">; + def psllw512 + : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; } let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { @@ -2109,7 +2127,9 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVector def psllv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; } -let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f,evex512", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def psllwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">; def pslldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">; def psllqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">; } @@ -2126,7 +2146,9 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVector def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; } -let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f,evex512", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">; def psrldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">; def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">; } @@ -2152,10 +2174,10 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256 } let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def psraw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; - def psrawi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">; - def psrlw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; - def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">; + def psraw512 + : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; + def psrlw512 + : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; def pslldqi512_byteshift : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">; def psrldqi512_byteshift : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">; } @@ -2487,7 +2509,9 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> def scalefss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">; } -let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f,evex512", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def psrawi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">; def psradi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">; def psraqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">; } @@ -2500,11 +2524,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256 def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def psraqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def psraqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 19703e40d2696..2d4c8a7c11017 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11621,6 +11621,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_pmulhw128: case clang::X86::BI__builtin_ia32_pmulhw256: case clang::X86::BI__builtin_ia32_pmulhw512: + case clang::X86::BI__builtin_ia32_psllv2di: case clang::X86::BI__builtin_ia32_psllv4di: case clang::X86::BI__builtin_ia32_psllv4si: @@ -11630,7 +11631,41 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_psrlv2di: case clang::X86::BI__builtin_ia32_psrlv4di: case clang::X86::BI__builtin_ia32_psrlv4si: - case clang::X86::BI__builtin_ia32_psrlv8si:{ + case clang::X86::BI__builtin_ia32_psrlv8si: + + // Logical left shift by immediate + case clang::X86::BI__builtin_ia32_psllwi128: + case clang::X86::BI__builtin_ia32_pslldi128: + case clang::X86::BI__builtin_ia32_psllqi128: + case clang::X86::BI__builtin_ia32_psllwi256: + case clang::X86::BI__builtin_ia32_pslldi256: + case clang::X86::BI__builtin_ia32_psllqi256: + case clang::X86::BI__builtin_ia32_psllwi512: + case clang::X86::BI__builtin_ia32_pslldi512: + case clang::X86::BI__builtin_ia32_psllqi512: + + // Logical right shift by immediate + case clang::X86::BI__builtin_ia32_psrlwi128: + case clang::X86::BI__builtin_ia32_psrldi128: + case clang::X86::BI__builtin_ia32_psrlqi128: + case clang::X86::BI__builtin_ia32_psrlwi256: + case clang::X86::BI__builtin_ia32_psrldi256: + case clang::X86::BI__builtin_ia32_psrlqi256: + case clang::X86::BI__builtin_ia32_psrlwi512: + case clang::X86::BI__builtin_ia32_psrldi512: + case clang::X86::BI__builtin_ia32_psrlqi512: + + // Arithmetic right shift by immediate + case clang::X86::BI__builtin_ia32_psrawi128: + case clang::X86::BI__builtin_ia32_psradi128: + case clang::X86::BI__builtin_ia32_psraqi128: + case clang::X86::BI__builtin_ia32_psrawi256: + case clang::X86::BI__builtin_ia32_psradi256: + case clang::X86::BI__builtin_ia32_psraqi256: + case clang::X86::BI__builtin_ia32_psrawi512: + case clang::X86::BI__builtin_ia32_psradi512: + case clang::X86::BI__builtin_ia32_psraqi512: { + APValue SourceLHS, SourceRHS; if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) @@ -11644,64 +11679,181 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt(); - APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt(); - switch (E->getBuiltinCallee()) { - case Builtin::BI__builtin_elementwise_add_sat: - ResultElements.push_back(APValue( - APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS), - DestUnsigned))); - break; - case Builtin::BI__builtin_elementwise_sub_sat: - ResultElements.push_back(APValue( - APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS), - DestUnsigned))); - break; - case clang::X86::BI__builtin_ia32_pmulhuw128: - case clang::X86::BI__builtin_ia32_pmulhuw256: - case clang::X86::BI__builtin_ia32_pmulhuw512: - ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhu(LHS, RHS), - /*isUnsigned=*/true))); - break; - case clang::X86::BI__builtin_ia32_pmulhw128: - case clang::X86::BI__builtin_ia32_pmulhw256: - case clang::X86::BI__builtin_ia32_pmulhw512: - ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS), - /*isUnsigned=*/false))); - break; - case clang::X86::BI__builtin_ia32_psllv2di: - case clang::X86::BI__builtin_ia32_psllv4di: - case clang::X86::BI__builtin_ia32_psllv4si: - case clang::X86::BI__builtin_ia32_psllv8si: - if (RHS.uge(RHS.getBitWidth())) { - ResultElements.push_back( - APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned))); + + if (SourceRHS.isInt()) { + uint64_t LaneWidth = 0; + bool IsLeftShift = false; + bool IsRightShift = false; + bool IsArithmeticRightShift = false; + + switch (E->getBuiltinCallee()) { + case clang::X86::BI__builtin_ia32_psllwi128: + case clang::X86::BI__builtin_ia32_psllwi256: + case clang::X86::BI__builtin_ia32_psllwi512: + IsLeftShift = true; + LaneWidth = 16; + break; + case clang::X86::BI__builtin_ia32_pslldi128: + case clang::X86::BI__builtin_ia32_pslldi256: + case clang::X86::BI__builtin_ia32_pslldi512: + IsLeftShift = true; + LaneWidth = 32; + break; + case clang::X86::BI__builtin_ia32_psllqi128: + case clang::X86::BI__builtin_ia32_psllqi256: + case clang::X86::BI__builtin_ia32_psllqi512: + IsLeftShift = true; + LaneWidth = 64; break; + + case clang::X86::BI__builtin_ia32_psrlwi128: + case clang::X86::BI__builtin_ia32_psrlwi256: + case clang::X86::BI__builtin_ia32_psrlwi512: + IsRightShift = true; + LaneWidth = 16; + break; + case clang::X86::BI__builtin_ia32_psrldi128: + case clang::X86::BI__builtin_ia32_psrldi256: + case clang::X86::BI__builtin_ia32_psrldi512: + IsRightShift = true; + LaneWidth = 32; + break; + case clang::X86::BI__builtin_ia32_psrlqi128: + case clang::X86::BI__builtin_ia32_psrlqi256: + case clang::X86::BI__builtin_ia32_psrlqi512: + IsRightShift = true; + LaneWidth = 64; + break; + + case clang::X86::BI__builtin_ia32_psrawi128: + case clang::X86::BI__builtin_ia32_psrawi256: + case clang::X86::BI__builtin_ia32_psrawi512: + IsArithmeticRightShift = true; + LaneWidth = 16; + break; + case clang::X86::BI__builtin_ia32_psradi128: + case clang::X86::BI__builtin_ia32_psradi256: + case clang::X86::BI__builtin_ia32_psradi512: + IsArithmeticRightShift = true; + LaneWidth = 32; + break; + case clang::X86::BI__builtin_ia32_psraqi128: + case clang::X86::BI__builtin_ia32_psraqi256: + case clang::X86::BI__builtin_ia32_psraqi512: + IsArithmeticRightShift = true; + LaneWidth = 64; + break; + + default: + llvm_unreachable("Unexpected builtin callee"); } - ResultElements.push_back( - APValue(APSInt(LHS.shl(RHS.getZExtValue()), DestUnsigned))); - break; - case clang::X86::BI__builtin_ia32_psrav4si: - case clang::X86::BI__builtin_ia32_psrav8si: - if (RHS.uge(RHS.getBitWidth())) { + + const APSInt RHS = SourceRHS.getInt(); + const auto ShiftAmount = RHS.getZExtValue(); + APInt ResultOut; + if (IsArithmeticRightShift) { + ResultOut = LHS.ashr(std::min(ShiftAmount, LaneWidth)); + } else if (ShiftAmount >= LaneWidth) { + ResultOut = APInt(LaneWidth, 0); + } else if (IsLeftShift) { + ResultOut = LHS.shl(ShiftAmount); + } else if (IsRightShift) { + ResultOut = LHS.lshr(ShiftAmount); + } else { + llvm_unreachable("Invalid shift type"); + } + ResultElements.push_back(APValue(APSInt( + std::move(ResultOut), + /*isUnsigned=*/DestEltTy->isUnsignedIntegerOrEnumerationType()))); + } else { + APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt(); + switch (E->getBuiltinCallee()) { + case Builtin::BI__builtin_elementwise_add_sat: + ResultElements.push_back(APValue( + APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS), + DestUnsigned))); + break; + case Builtin::BI__builtin_elementwise_sub_sat: + ResultElements.push_back(APValue( + APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS), + DestUnsigned))); + break; + case clang::X86::BI__builtin_ia32_pmulhuw128: + case clang::X86::BI__builtin_ia32_pmulhuw256: + case clang::X86::BI__builtin_ia32_pmulhuw512: ResultElements.push_back( - APValue(APSInt(LHS.ashr(RHS.getBitWidth() - 1), DestUnsigned))); + APValue(APSInt(llvm::APIntOps::mulhu(LHS, RHS), + /*isUnsigned=*/true))); break; - } - ResultElements.push_back( - APValue(APSInt(LHS.ashr(RHS.getZExtValue()), DestUnsigned))); - break; - case clang::X86::BI__builtin_ia32_psrlv2di: - case clang::X86::BI__builtin_ia32_psrlv4di: - case clang::X86::BI__builtin_ia32_psrlv4si: - case clang::X86::BI__builtin_ia32_psrlv8si: - if (RHS.uge(RHS.getBitWidth())) { + case clang::X86::BI__builtin_ia32_pmulhw128: + case clang::X86::BI__builtin_ia32_pmulhw256: + case clang::X86::BI__builtin_ia32_pmulhw512: + ResultElements.push_back( + APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS), + /*isUnsigned=*/false))); + break; + case clang::X86::BI__builtin_ia32_psllv2di: + case clang::X86::BI__builtin_ia32_psllv4di: + case clang::X86::BI__builtin_ia32_psllv4si: + case clang::X86::BI__builtin_ia32_psllv8si: + if (RHS.uge(RHS.getBitWidth())) { + ResultElements.push_back(APValue( + APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned))); + break; + } + ResultElements.push_back( + APValue(APSInt(LHS.shl(RHS.getZExtValue()), DestUnsigned))); + break; + case clang::X86::BI__builtin_ia32_psrav4si: + case clang::X86::BI__builtin_ia32_psrav8si: + if (RHS.uge(RHS.getBitWidth())) { + ResultElements.push_back( + APValue(APSInt(LHS.ashr(RHS.getBitWidth() - 1), DestUnsigned))); + break; + } ResultElements.push_back( - APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned))); + APValue(APSInt(LHS.ashr(RHS.getZExtValue()), DestUnsigned))); break; + case clang::X86::BI__builtin_ia32_psrlv2di: + case clang::X86::BI__builtin_ia32_psrlv4di: + case clang::X86::BI__builtin_ia32_psrlv4si: + case clang::X86::BI__builtin_ia32_psrlv8si: + if (RHS.uge(RHS.getBitWidth())) { + ResultElements.push_back(APValue( + APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned))); + break; + } + ResultElements.push_back( + APValue(APSInt(LHS.lshr(RHS.getZExtValue()), DestUnsigned))); + break; + APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt(); + switch (E->getBuiltinCallee()) { + case Builtin::BI__builtin_elementwise_add_sat: + ResultElements.push_back(APValue( + APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS), + DestEltTy->isUnsignedIntegerOrEnumerationType()))); + break; + case Builtin::BI__builtin_elementwise_sub_sat: + ResultElements.push_back(APValue( + APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS), + DestEltTy->isUnsignedIntegerOrEnumerationType()))); + break; + case clang::X86::BI__builtin_ia32_pmulhuw128: + case clang::X86::BI__builtin_ia32_pmulhuw256: + case clang::X86::BI__builtin_ia32_pmulhuw512: + ResultElements.push_back( + APValue(APSInt(llvm::APIntOps::mulhu(LHS, RHS), + /*isUnsigned=*/true))); + break; + case clang::X86::BI__builtin_ia32_pmulhw128: + case clang::X86::BI__builtin_ia32_pmulhw256: + case clang::X86::BI__builtin_ia32_pmulhw512: + ResultElements.push_back( + APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS), + /*isUnsigned=*/false))); + break; + } } - ResultElements.push_back( - APValue(APSInt(LHS.lshr(RHS.getZExtValue()), DestUnsigned))); - break; } } diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index ce5b2b7544d8c..baeb2e7cbff24 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -2124,9 +2124,8 @@ _mm256_sign_epi32(__m256i __a, __m256i __b) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_slli_epi16(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_slli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count); } @@ -2164,9 +2163,8 @@ _mm256_sll_epi16(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_slli_epi32(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_slli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count); } @@ -2204,9 +2202,8 @@ _mm256_sll_epi32(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_slli_epi64(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_slli_epi64(__m256i __a, int __count) { return __builtin_ia32_psllqi256((__v4di)__a, __count); } @@ -2245,9 +2242,8 @@ _mm256_sll_epi64(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srai_epi16(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srai_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count); } @@ -2287,9 +2283,8 @@ _mm256_sra_epi16(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srai_epi32(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srai_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count); } @@ -2368,9 +2363,8 @@ _mm256_sra_epi32(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srli_epi16(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count); } @@ -2408,9 +2402,8 @@ _mm256_srl_epi16(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srli_epi32(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count); } @@ -2448,9 +2441,8 @@ _mm256_srl_epi32(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srli_epi64(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srli_epi64(__m256i __a, int __count) { return __builtin_ia32_psrlqi256((__v4di)__a, __count); } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 9263f7af3ee2f..723865ffa755e 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1483,24 +1483,21 @@ _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_slli_epi16(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_slli_epi16(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_slli_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_slli_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); @@ -1575,24 +1572,21 @@ _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srai_epi16(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srai_epi16(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srai_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srai_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); @@ -1620,24 +1614,21 @@ _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srli_epi16(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srli_epi16(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srli_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srli_epi16(__A, (unsigned int)__B), (__v32hi)_mm512_setzero_si512()); diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 81c8e8e934493..611c8314b0ed9 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -5095,91 +5095,81 @@ _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) (__v8di)_mm512_ror_epi64((A), (B)), \ (__v8di)_mm512_setzero_si512())) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_slli_epi32(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_slli_epi32(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_slli_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_slli_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_slli_epi64(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_slli_epi64(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, + unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_slli_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_slli_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srli_epi32(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srli_epi32(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srli_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srli_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srli_epi64(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srli_epi64(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srli_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, - unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srli_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); @@ -6584,46 +6574,41 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) (__mmask8)(U), \ (int)(R))) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srai_epi32(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srai_epi32(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srai_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, - unsigned int __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srai_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srai_epi64(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srai_epi64(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, + unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srai_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srai_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index dcd72e9240f2c..29880968c0562 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -1963,18 +1963,16 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_slli_epi16(__A, (int)__B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_slli_epi16(__A, (int)__B), (__v16hi)_mm256_setzero_si256()); @@ -2100,34 +2098,30 @@ _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srai_epi16(__A, (int)__B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srai_epi16(__A, (int)__B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srai_epi16(__A, (int)__B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srai_epi16(__A, (int)__B), (__v16hi)_mm256_setzero_si256()); @@ -2165,33 +2159,29 @@ _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srli_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_srli_epi16(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srli_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srli_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srli_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index a1f2a1c92a863..c41621d4850b7 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -4503,17 +4503,16 @@ _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, + unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_slli_epi32(__A, (int)__B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_slli_epi32(__A, (int)__B), (__v8si)_mm256_setzero_si256()); @@ -4567,17 +4566,16 @@ _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, + unsigned int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_slli_epi64(__A, (int)__B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_slli_epi64(__A, (int)__B), (__v4di)_mm256_setzero_si256()); @@ -4847,17 +4845,16 @@ _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, + unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srli_epi32(__A, (int)__B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srli_epi32(__A, (int)__B), (__v8si)_mm256_setzero_si256()); @@ -4911,17 +4908,16 @@ _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, + unsigned int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srli_epi64(__A, (int)__B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srli_epi64(__A, (int)__B), (__v4di)_mm256_setzero_si256()); @@ -6370,33 +6366,30 @@ _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srai_epi32(__A, (int)__B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srai_epi32(__A, (int)__B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, + unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srai_epi32(__A, (int)__B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srai_epi32(__A, (int)__B), (__v8si)_mm256_setzero_si256()); @@ -6446,46 +6439,40 @@ _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_srai_epi64(__m128i __A, unsigned int __imm) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_srai_epi64(__m128i __A, unsigned int __imm) { return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srai_epi64( + __m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_srai_epi64(__A, __imm), \ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_srai_epi64(__A, __imm), \ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srai_epi64(__m256i __A, unsigned int __imm) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srai_epi64(__m256i __A, unsigned int __imm) { return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, - unsigned int __imm) -{ + unsigned int __imm) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ (__v4di)_mm256_srai_epi64(__A, __imm), \ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ (__v4di)_mm256_srai_epi64(__A, __imm), \ (__v4di)_mm256_setzero_si256()); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 8b6b62458dac1..99ab5a2475105 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2772,8 +2772,8 @@ _mm_xor_si128(__m128i __a, __m128i __b) { /// An integer value specifying the number of bits to left-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_slli_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); } @@ -2808,8 +2808,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, /// An integer value specifying the number of bits to left-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_slli_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); } @@ -2844,8 +2844,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, /// An integer value specifying the number of bits to left-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_slli_epi64(__m128i __a, int __count) { return __builtin_ia32_psllqi128((__v2di)__a, __count); } @@ -2881,8 +2881,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_srai_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); } @@ -2919,8 +2919,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_srai_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); } @@ -2981,8 +2981,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_srli_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); } @@ -3017,8 +3017,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_srli_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); } @@ -3053,8 +3053,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_srli_epi64(__m128i __a, int __count) { return __builtin_ia32_psrlqi128((__v2di)__a, __count); } diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h index 6fe9d67b8976d..4ed95c5b7bb71 100644 --- a/clang/lib/Headers/mmintrin.h +++ b/clang/lib/Headers/mmintrin.h @@ -778,11 +778,9 @@ _mm_sll_pi16(__m64 __m, __m64 __count) /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted /// values. If \a __count is greater or equal to 16, the result is set to all /// 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_slli_pi16(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psllwi128((__v8hi)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_slli_pi16(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psllwi128((__v8hi)__zext128(__m), __count)); } /// Left-shifts each 32-bit signed integer element of the first @@ -825,11 +823,9 @@ _mm_sll_pi32(__m64 __m, __m64 __count) /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted /// values. If \a __count is greater or equal to 32, the result is set to all /// 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_slli_pi32(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_pslldi128((__v4si)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_slli_pi32(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_pslldi128((__v4si)__zext128(__m), __count)); } /// Left-shifts the first 64-bit integer parameter by the number of bits @@ -867,11 +863,9 @@ _mm_sll_si64(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector containing the left-shifted value. If /// \a __count is greater or equal to 64, the result is set to 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_slli_si64(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_slli_si64(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psllqi128((__v2di)__zext128(__m), __count)); } /// Right-shifts each 16-bit integer element of the first parameter, @@ -916,11 +910,9 @@ _mm_sra_pi16(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srai_pi16(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psrawi128((__v8hi)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_srai_pi16(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psrawi128((__v8hi)__zext128(__m), __count)); } /// Right-shifts each 32-bit integer element of the first parameter, @@ -965,11 +957,9 @@ _mm_sra_pi32(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srai_pi32(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psradi128((__v4si)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_srai_pi32(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psradi128((__v4si)__zext128(__m), __count)); } /// Right-shifts each 16-bit integer element of the first parameter, @@ -1012,11 +1002,9 @@ _mm_srl_pi16(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srli_pi16(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_srli_pi16(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__zext128(__m), __count)); } /// Right-shifts each 32-bit integer element of the first parameter, @@ -1059,11 +1047,9 @@ _mm_srl_pi32(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srli_pi32(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psrldi128((__v4si)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_srli_pi32(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psrldi128((__v4si)__zext128(__m), __count)); } /// Right-shifts the first 64-bit integer parameter by the number of bits @@ -1102,11 +1088,9 @@ _mm_srl_si64(__m64 __m, __m64 __count) /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector containing the right-shifted value. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srli_si64(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_srli_si64(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psrlqi128((__v2di)__zext128(__m), __count)); } /// Performs a bitwise AND of two 64-bit integer vectors. diff --git a/clang/test/CodeGen/X86/shift-immediate-constexpr.c b/clang/test/CodeGen/X86/shift-immediate-constexpr.c new file mode 100644 index 0000000000000..c0888e09ccec1 --- /dev/null +++ b/clang/test/CodeGen/X86/shift-immediate-constexpr.c @@ -0,0 +1,441 @@ +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386 -target-feature +sse2 -emit-llvm -Wall -Werror + +#include <mmintrin.h> +#include "builtin_test_helpers.h" + +TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 0), 0, 1, 2, 3)); +TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 1), 0, 1 << 1, 2 << 1, 3 << 1)); +TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 15), 0, 1U << 15, 2 << 15, 3 << 15)); +TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 16), 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 17), 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 1), 0, 1 >> 1, 2 >> 1, 3 >> 1)); +TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 1), 0, 1 >> 1, 2 >> 1, 3 >> 1)); +TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){-1, 0, 0, 0}, 1), 0x7f'ff, 0, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 16), 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 17), 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-1, 1, 2, 3}, 1), -1, 0, 1, 1)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-1, 1, 2, 3}, 15), -1, 0, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-1, 1, 2, 3}, 16), -1, 0, 0, 0)); + +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-1, 1, 2, 3}, 200), -1, 0, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-32768, 32767, -2, 0}, 1), -16384, 16383, -1, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-32768, 32767, -2, 0}, 15), -1, 0, -1, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-32768, 32767, -2, 0}, 30), -1, 0, -1, 0)); + +TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){0, 1}, 0), 0, 1)); +TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){0, 1}, 1), 0, 1 << 1)); +TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){1, 2}, 2), 1 << 2, 2 << 2)); +TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){1, 1}, 31), 1 << 31, 1 << 31)); +TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){1, 1}, 32), 0, 0)); +TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){1, 1}, 33), 0, 0)); + +TEST_CONSTEXPR(match_v2si(_mm_srli_pi32((__m64)(__v2si){1, 1025}, 2), 1 >> 2, 1025 >> 2)); + +TEST_CONSTEXPR(match_v2si( _mm_srai_pi32((__m64)(__v2si){-32768, 32767}, 30), -1, 0)); +TEST_CONSTEXPR(match_v2si( _mm_srai_pi32((__m64)(__v2si){-2, 20}, 1), -1, 20 >> 1)); +TEST_CONSTEXPR(match_v2si( _mm_srai_pi32((__m64)(__v2si){-1, 20}, 1), -1, 20 >> 1)); + +TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){0}, 0), 0)); +TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){1}, 1), 1LL << 1)); +TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){2}, 2), 2LL << 2)); +TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){1}, 63), 1LL << 63)); +TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){1}, 64), 0)); +TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){1}, 65), 0)); + +TEST_CONSTEXPR(match_v1di(_mm_srli_si64((__m64)(__v1di){1025}, 2), 1025LL >> 2)); + +TEST_CONSTEXPR(match_v8hi(_mm_slli_epi16((__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 6, 7}, 0), 0, 1, 2, 3, 4, 5, 6, 7)); +TEST_CONSTEXPR(match_v8hi(_mm_slli_epi16((__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 6, 7}, 1), 0, 1 << 1, 2 << 1, 3 << 1, 4 << 1, 5 << 1, 6 << 1, 7 << 1)); +TEST_CONSTEXPR(match_v8hi(_mm_slli_epi16((__m128i)(__v8hi){0, 8, 2, 3, 4, 5, 6, 7}, 8), 0, 8 << 8, 2 << 8, 3 << 8, 4 << 8, 5 << 8, 6 << 8, 7 << 8)); +TEST_CONSTEXPR(match_v8hi(_mm_slli_epi16((__m128i)(__v8hi){0, 8, 2, 3, 4, 5, 6, 7}, 16), 0, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8hi(_mm_srli_epi16((__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 6, 7}, 1), 0, 1 >> 1, 2 >> 1, 3 >> 1, 4 >> 1, 5 >> 1, 6 >> 1, 7 >> 1)); + +TEST_CONSTEXPR(match_v8hi(_mm_srai_epi16((__m128i)(__v8hi){-32768, 32767, -3, -2, -1, 0, 1, 2}, 1), -16384, 16383, -2, -1, -1, 0, 0, 1)); + +TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 0), 0, 1, 2, 3)); +TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 1), 0, 1 << 1, 2 << 1, 3 << 1)); +TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 31), 0, 1U << 31, 2U << 31, 3U << 31)); +TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 32), 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 33), 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v4si(_mm_srli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 8), 0, 1U >> 8, 2U >> 8, 3U >> 8)); + +TEST_CONSTEXPR(match_v4si(_mm_srai_epi32((__m128i)(__v4si){-32768, 32767, -3, 2}, 1), -16384, 16383, -2, 1)); + +TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 0), 0, 1)); +TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 1), 0, 1LL << 1)); +TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){5, 8}, 6), 5 << 6, 8 << 6)); +TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 63), 0, 1LL << 63)); +TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 64), 0, 0)); +TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 65), 0, 0)); + +TEST_CONSTEXPR(match_v2di(_mm_srli_epi64((__m128i)(__v2di){100005, 100008}, 6), 100005 >> 6, 100008 >> 6)); +TEST_CONSTEXPR(match_v2di(_mm_srai_epi64((__m128i)(__v2di){-32768, -3}, 1), -16384, -2)); + +TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 0), + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); +TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), + 0, 1U<<1, 2U<<1, 3U<<1, 4U<<1, 5U<<1, 6U<<1, 7U<<1, 8U<<1, 9U<<1, 10U<<1, 11U<<1, 12U<<1, 13U<<1, 14U<<1, 15U<<1)); +TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 15), + 0, 1U<<15, 2U<<15, 3U<<15, 4U<<15, 5U<<15, 6U<<15, 7U<<15, 8U<<15, 9U<<15, 10U<<15, 11U<<15, 12U<<15, 13U<<15, 14U<<15, 15U<<15)); +TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 16), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 17), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v16hi(_mm256_srli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), + 0, 1U>>1, 2U>>1, 3U>>1, 4U>>1, 5U>>1, 6U>>1, 7U>>1, 8U>>1, 9U>>1, 10U>>1, 11U>>1, 12U>>1, 13U>>1, 14U>>1, 15U>>1)); +TEST_CONSTEXPR(match_v16hi(_mm256_srai_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, 1), + -16384, 16383, -2, -1, -1, 0, 0, 1, -16384, 16383, -2, -1, -1, 0, 0, 1)); + +TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 0), 0, 1, 2, 3, 4, 5, 6, 7)); +TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 1), 0, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1)); +TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 31), 0, 1U<<31, 2U<<31, 3U<<31, 4U<<31, 5U<<31, 6U<<31, 7U<<31)); +TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 32), 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 33), 0, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8si(_mm256_srli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 31), 0, 1U>>31, 2U>>31, 3U>>31, 4U>>31, 5U>>31, 6U>>31, 7U>>31)); +TEST_CONSTEXPR(match_v8si(_mm256_srai_epi32((__m256i)(__v8si){-32768, 32767, -3, -2, -1, 0, 1, 2}, 1), -16384, 16383, -2, -1, -1, 0, 0, 1)); + +TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 0), 0, 1, 2, 3)); +TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 1), 0, 1<<1, 2<<1, 3<<1)); +TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 33), 0, 1ULL<<33, 2ULL<<33, 3ULL<<33)); +TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 64), 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 65), 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v4di(_mm256_srli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 33), 0, 1ULL>>33, 2ULL>>33, 3ULL>>33)); +TEST_CONSTEXPR(match_v4di(_mm256_srai_epi64((__m256i)(__v4di){-32768, 32767, -3, -2}, 1), -16384, 16383, -2, -1)); + +TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 0), + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); +TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 1), + 0, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1, 8<<1, 9<<1, 10<<1, 11<<1, 12<<1, 13<<1, 14<<1, 15<<1, 16<<1, 17<<1, 18<<1, 19<<1, 20<<1, 21<<1, 22<<1, 23<<1, 24<<1, 25<<1, 26<<1, 27<<1, 28<<1, 29<<1, 30<<1, 31<<1)); +TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 15), + 0, 1U<<15, 2U<<15, 3U<<15, 4U<<15, 5U<<15, 6U<<15, 7U<<15, 8U<<15, 9U<<15, 10U<<15, 11U<<15, 12U<<15, 13U<<15, 14U<<15, 15U<<15, 16U<<15, 17U<<15, 18U<<15, 19U<<15, 20U<<15, 21U<<15, 22U<<15, 23U<<15, 24U<<15, 25U<<15, 26U<<15, 27U<<15, 28U<<15, 29U<<15, 30U<<15, 31U<<15)); +TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 16), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 17), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v32hi(_mm512_srli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 15), + 0, 1U>>15, 2U>>15, 3U>>15, 4U>>15, 5U>>15, 6U>>15, 7U>>15, 8U>>15, 9U>>15, 10U>>15, 11U>>15, 12U>>15, 13U>>15, 14U>>15, 15U>>15, 16U>>15, 17U>>15, 18U>>15, 19U>>15, 20U>>15, 21U>>15, 22U>>15, 23U>>15, 24U>>15, 25U>>15, 26U>>15, 27U>>15, 28U>>15, 29U>>15, 30U>>15, 31U>>15)); +TEST_CONSTEXPR(match_v32hi(_mm512_srai_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 10), + 0, 1U>>10, 2U>>10, 3U>>10, 4U>>10, 5U>>10, 6U>>10, 7U>>10, 8U>>10, 9U>>10, 10U>>10, 11U>>10, 12U>>10, 13U>>10, 14U>>10, 15U>>10, 16U>>10, 17U>>10, 18U>>10, 19U>>10, 20U>>10, 21U>>10, 22U>>10, 23U>>10, 24U>>10, 25U>>10, 26U>>10, 27U>>10, 28U>>10, 29U>>10, 30U>>10, 31U>>10)); + +TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 0), +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); +TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1, 8<<1, 9<<1, 10<<1, 11<<1, 12<<1, 13<<1, 14<<1, 15<<1)); +TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 10), +0, 1U<<10, 2U<<10, 3U<<10, 4U<<10, 5U<<10, 6U<<10, 7U<<10, 8U<<10, 9U<<10, 10U<<10, 11U<<10, 12U<<10, 13U<<10, 14U<<10, 15U<<10)); +TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 32), +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 33), +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v16si(_mm512_srli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 10), +0, 1U>>10, 2U>>10, 3U>>10, 4U>>10, 5U>>10, 6U>>10, 7U>>10, 8U>>10, 9U>>10, 10U>>10, 11U>>10, 12U>>10, 13U>>10, 14U>>10, 15U>>10)); +TEST_CONSTEXPR(match_v16si(_mm512_srai_epi32((__m512i)(__v16si){0, -2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 10), +0, -1, 2U>>10, 3U>>10, 4U>>10, 5U>>10, 6U>>10, 7U>>10, 8U>>10, 9U>>10, 10U>>10, 11U>>10, 12U>>10, 13U>>10, 14U>>10, 15U>>10)); + +TEST_CONSTEXPR(match_v8di(_mm512_slli_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, 1), +0<<1, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1)); +TEST_CONSTEXPR(match_v8di(_mm512_srli_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, 1), +0>>1, 1>>1, 2>>1, 3>>1, 4>>1, 5>>1, 6>>1, 7>>1)); +TEST_CONSTEXPR(match_v8di(_mm512_srai_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, 1), +0>>1, 1>>1, 2>>1, 3>>1, 4>>1, 5>>1, 6>>1, 7>>1)); + +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0x00'ff'cc'71, +(__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 16), +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0, +(__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 16), +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0xff'ff'ff'ff, +(__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0xffU<< 1, 1<< 1, 2<< 1, 3<< 1, 4<< 1, 5<< 1, 6<< 1, 7<< 1, 8<< 1, 9<< 1, 10<< 1, 11<< 1, 12<< 1, 13<< 1, 14<< 1, 15<< 1, +0xffU<< 1, 1<< 1, 2<< 1, 3<< 1, 4<< 1, 5<< 1, 6<< 1, 7<< 1, 8<< 1, 9<< 1, 10<< 1, 11<< 1, 12<< 1, 13<< 1, 14<< 1, 15<< 1)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0x7f'ff'ff'ff, +(__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0xffU<< 1, 1<< 1, 2<< 1, 3<< 1, 4<< 1, 5<< 1, 6<< 1, 7<< 1, 8<< 1, 9<< 1, 10<< 1, 11<< 1, 12<< 1, 13<< 1, 14<< 1, 15<< 1, +0xffU<< 1, 1<< 1, 2<< 1, 3<< 1, 4<< 1, 5<< 1, 6<< 1, 7<< 1, 8<< 1, 9<< 1, 10<< 1, 11<< 1, 12<< 1, 13<< 1, 14<< 1, 0)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0x71'cc'ff'00, +(__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0, 0, 0, 0, 0, 0, 0, 0, 8<<1, 9<<1, 10<<1, 11<<1, 12<<1, 13<<1, 14<<1, 15<<1, 0, 0, 2<<1, 3<<1, 0, 0, 6<<1, 7<<1, 8<<1, 0, 0, 0, 12<<1, 13<<1, 14<<1, 0)); + +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_srli_epi16((__mmask32)0x71'cc'ff'00, +(__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0, 0, 0, 0, 0, 0, 0, 0, +8>>1, 9>>1, 10>>1, 11>>1, 12>>1, 13>>1, 14>>1, 15>>1, +0, 0, 2>>1, 3>>1, 0, 0, 6>>1, 7>>1, +8>>1, 0, 0, 0, 12>>1, 13>>1, 14>>1, 0 +)); +TEST_CONSTEXPR(match_v32hi( + _mm512_maskz_srai_epi16( + (__mmask32)0xAAAAAAAA, + (__m512i)(__v32hi){ + -32768, 32767, -3, -2, -1, 0, 1, 2, + -1234, 1234, -32767, 32766, -5, 5, -256, 256, + -42, 42, -7, 7, -30000, 30000, -1, -1, + 0, -2, 2, -32768, 32767, -32768, -123, 123 + }, 5), + 0, 1023, 0, -1, 0, 0, 0, 0, 0, 38, 0, 1023, 0, 0, 0, 8, 0, 1, 0, 0, 0, 937, 0, -1, 0, -1, 0, -1024, 0, -1024, 0, 3 )); + +TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0x00'ff'cc'71, +(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 32), +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0, +(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 16), +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0xff'ff, +(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0xffU<<1, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1, 8<<1, 9<<1, 10<<1, 11<<1, 12<<1, 13<<1, 14<<1, 15<<1)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0x7f'ff, +(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0xffU<<1, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1, 8<<1, 9<<1, 10<<1, 11<<1, 12<<1, 13<<1, 14<<1, 0)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0x71'cc, +(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0, 0, 2<<1, 3<<1, 0, 0, 6<<1, 7<<1, 8<<1, 0, 0, 0, 12<<1, 13<<1, 14<<1, 0)); + +TEST_CONSTEXPR(match_v16si(_mm512_maskz_srli_epi32((__mmask16)0x71'cc, +(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0, 0, 2>>1, 3>>1, 0, 0, 6>>1, 7>>1, 8>>1, 0, 0, 0, 12>>1, 13>>1, 14>>1, 0)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_srai_epi32((__mmask16)0x71'cc, +(__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0, 0, 2>>1, 3>>1, 0, 0, 6>>1, 7>>1, 8>>1, 0, 0, 0, 12>>1, 13>>1, 14>>1, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0x00'ff'cc'71, +(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 64), +0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0, +(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 16), +0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0xff, +(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), +0xffULL<<1, 1LL<<1, 2LL<<1, 3LL<<1, 4LL<<1, 5LL<<1, 6LL<<1, 7LL<<1)); +TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0x7f, +(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), +0xffULL<<1, 1LL<<1, 2LL<<1, 3LL<<1, 4LL<<1, 5LL<<1, 6LL<<1, 0)); +TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0x71, +(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), +0xff << 1, 0, 0, 0, 4 << 1, 5 << 1, 6LL<<1, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_maskz_srli_epi64((__mmask8)0x71, +(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), +0xff >> 1, 0, 0, 0, 4 >> 1, 5 >> 1, 6LL>>1, 0)); +TEST_CONSTEXPR(match_v8di(_mm512_maskz_srai_epi64((__mmask8)0x71, +(__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), +0xff >> 1, 0, 0, 0, 4 >> 1, 5 >> 1, 6LL>>1, 0)); + +TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0x00'ff'cc'71, (__m256i)(__v4di){0xff, 1, 2, 3}, 64), 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0, (__m256i)(__v4di){0xff, 1, 2, 3}, 16), 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0xff, (__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0xffULL<<1, 1LL<<1, 2LL<<1, 3LL<<1)); +TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0x7, (__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0xffULL<<1, 1LL<<1, 2LL<<1, 0)); +TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0x71, (__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0xff << 1, 0, 0, 0)); + +TEST_CONSTEXPR(match_v4di(_mm256_maskz_srli_epi64((__mmask8)0x71, (__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0xff >> 1, 0, 0, 0)); +TEST_CONSTEXPR(match_v4di(_mm256_maskz_srai_epi64((__mmask8)0x71, (__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0xff >> 1, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0x00'ff'cc'71, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 32), 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 16), 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0xff, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0xffU<<1, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0x7, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0xffU<<1, 1<<1, 2<<1, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0x71, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0xff<<1, 0, 0, 0, 4<<1, 5<<1, 6<<1, 0)); + +TEST_CONSTEXPR(match_v8si(_mm256_maskz_srli_epi32((__mmask8)0x71, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0xff>>1, 0, 0, 0, 4>>1, 5>>1, 6>>1, 0)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_srai_epi32((__mmask8)0x71, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0xff>>1, 0, 0, 0, 4>>1, 5>>1, 6>>1, 0)); + +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0x00'ff'cc'71, +(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 32), +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0, +(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 16), +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0xff'ff, +(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0xffU<<1, 1<<1, 2<<1, 3<<1, 4<<1, 5<<1, 6<<1, 7<<1, 8<<1, 9<<1, 10<<1, 11<<1, 12<<1, 13<<1, 14<<1, 15<<1)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0x7, +(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0xffU<<1, 1<<1, 2<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0x71, +(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0xff<<1, 0, 0, 0, 4<<1, 5<<1, 6<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_srli_epi16((__mmask16)0x71, +(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0xff>>1, 0, 0, 0, 4>>1, 5>>1, 6>>1, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_srai_epi16((__mmask16)0x71, +(__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), +0xff>>1, 0, 0, 0, 4>>1, 5>>1, 6>>1, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v32hi( + _mm512_mask_slli_epi16( + (__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115, + 116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131}, + (__mmask32)~(__mmask32)0, + (__m512i)(__v32hi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}, + 1), + 0<<1,1<<1,2<<1,3<<1,4<<1,5<<1,6<<1,7<<1, + 8<<1,9<<1,10<<1,11<<1,12<<1,13<<1,14<<1,15<<1, + 16<<1,17<<1,18<<1,19<<1,20<<1,21<<1,22<<1,23<<1, + 24<<1,25<<1,26<<1,27<<1,28<<1,29<<1,30<<1,31<<1)); + +TEST_CONSTEXPR(match_v32hi( + _mm512_mask_srli_epi16( + (__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115, + 116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131}, + (__mmask32)~(__mmask32)0, + (__m512i)(__v32hi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}, + 1), + 0>>1,1>>1,2>>1,3>>1,4>>1,5>>1,6>>1,7>>1, + 8>>1,9>>1,10>>1,11>>1,12>>1,13>>1,14>>1,15>>1, + 16>>1,17>>1,18>>1,19>>1,20>>1,21>>1,22>>1,23>>1, + 24>>1,25>>1,26>>1,27>>1,28>>1,29>>1,30>>1,31>>1)); + +TEST_CONSTEXPR(match_v32hi( + _mm512_mask_srai_epi16( + (__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115, + 116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131}, + (__mmask32)~(__mmask32)0, + (__m512i)(__v32hi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}, + 1), + 0>>1,1>>1,2>>1,3>>1,4>>1,5>>1,6>>1,7>>1, + 8>>1,9>>1,10>>1,11>>1,12>>1,13>>1,14>>1,15>>1, + 16>>1,17>>1,18>>1,19>>1,20>>1,21>>1,22>>1,23>>1, + 24>>1,25>>1,26>>1,27>>1,28>>1,29>>1,30>>1,31>>1)); + +TEST_CONSTEXPR(match_v16si( + _mm512_mask_slli_epi32( + (__m512i)(__v16si){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}, + (__mmask16)0x5555, + (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}, + 1), + 0<<1, 101, 2<<1, 103, 4<<1, 105, 6<<1, 107, 8<<1, 109, 10<<1, 111, 12<<1, 113, 14<<1, 115)); + +TEST_CONSTEXPR(match_v16si( + _mm512_mask_srli_epi32( + (__m512i)(__v16si){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}, + (__mmask16)0x5555, + (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}, + 1), + 0>>1, 101, 2>>1, 103, 4>>1, 105, 6>>1, 107, 8>>1, 109, 10>>1, 111, 12>>1, 113, 14>>1, 115)); + +TEST_CONSTEXPR(match_v16si( + _mm512_mask_srai_epi32( + (__m512i)(__v16si){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}, + (__mmask16)0x5555, + (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}, + 1), + 0>>1, 101, 2>>1, 103, 4>>1, 105, 6>>1, 107, 8>>1, 109, 10>>1, 111, 12>>1, 113, 14>>1, 115)); + +TEST_CONSTEXPR(match_v8di( + _mm512_mask_slli_epi64( + (__m512i)(__v8di){100,101,102,103,104,105,106,107}, + (__mmask8)0x0F, + (__m512i)(__v8di){0,1,2,3,4,5,6,7}, + 2), + 0<<2,1<<2,2<<2,3<<2, 104,105,106,107)); + +TEST_CONSTEXPR(match_v8di( + _mm512_mask_srli_epi64( + (__m512i)(__v8di){100,101,102,103,104,105,106,107}, + (__mmask8)0x0F, + (__m512i)(__v8di){0,1,2,3,4,5,6,7}, + 2), + 0>>2,1>>2,2>>2,3>>2, 104,105,106,107)); + +TEST_CONSTEXPR(match_v8di( + _mm512_mask_srai_epi64( + (__m512i)(__v8di){100,101,102,103,104,105,106,107}, + (__mmask8)0x0F, + (__m512i)(__v8di){0,1,2,3,4,5,6,7}, + 2), + 0>>2,1>>2,2>>2,3>>2, 104,105,106,107)); + +TEST_CONSTEXPR(match_v16hi( + _mm256_mask_slli_epi16( + (__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}, + (__mmask16)0xAAAA, + (__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}, + 20), + 100, 0, 102, 0, 104, 0, 106, 0, 108, 0, 110, 0, 112, 0, 114, 0)); + +TEST_CONSTEXPR(match_v16hi( + _mm256_mask_srli_epi16( + (__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}, + (__mmask16)0xAAAA, + (__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}, + 20), + 100, 0, 102, 0, 104, 0, 106, 0, 108, 0, 110, 0, 112, 0, 114, 0)); + +TEST_CONSTEXPR(match_v16hi( + _mm256_mask_srai_epi16( + (__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}, + (__mmask16)0xAAAA, + (__m256i)(__v16hi){0, -1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}, + 20), + 100, 0Xffff, 102, 0, 104, 0, 106, 0, 108, 0, 110, 0, 112, 0, 114, 0)); + +TEST_CONSTEXPR(match_v16hi( + _mm256_mask_srli_epi16( + (__m256i)(__v16hi){100,101,102,103,104,105,106,107, + 108,109,110,111,112,113,114,115}, + (__mmask16)0xAAAA, + (__m256i)(__v16hi){ 0, 32, 64, 96, 128, 160, 192, 224, + 256, 288, 320, 352, 384, 416, 448, 480}, + 5), + 100, 1, 102, 3, 104, 5, 106, 7, 108, 9, 110, 11, 112, 13, 114, 15)); + +TEST_CONSTEXPR(match_v8si( + _mm256_mask_slli_epi32( + (__m256i)(__v8si){100,101,102,103,104,105,106,107}, + (__mmask8)0xff, + (__m256i)(__v8si){0,1,2,3,4,5,6,7}, + 3), + 0<<3,1<<3,2<<3,3<<3,4<<3,5<<3,6<<3,7<<3)); + +TEST_CONSTEXPR(match_v8si( + _mm256_mask_srli_epi32( + (__m256i)(__v8si){100,101,102,103,104,105,106,107}, + (__mmask8)0xff, + (__m256i)(__v8si){0,1,2,3,4,5,6,7}, + 3), + 0>>3,1>>3,2>>3,3>>3,4>>3,5>>3,6>>3,7>>3)); + +TEST_CONSTEXPR(match_v8si( + _mm256_mask_srai_epi32( + (__m256i)(__v8si){100,101,102,103,104,105,106,107}, + (__mmask8)0xff, + (__m256i)(__v8si){0,1,2,3,4,5,6,7}, + 3), + 0>>3,1>>3,2>>3,3>>3,4>>3,5>>3,6>>3,7>>3)); + +TEST_CONSTEXPR(match_v4di( + _mm256_mask_slli_epi64( + (__m256i)(__v4di){100,101,102,103}, + (__mmask8)0b1010, + (__m256i)(__v4di){0,1,2,3}, + 4), + 100, 1<<4, 102, 3<<4)); + +TEST_CONSTEXPR(match_v4di( + _mm256_mask_srli_epi64( + (__m256i)(__v4di){100,101,102,103}, + (__mmask8)0b1010, + (__m256i)(__v4di){0,0xff80,2,3}, + 1), + 100, 0x7fc0, 102, 3>>1)); + +TEST_CONSTEXPR(match_v4di( + _mm256_mask_srai_epi64( + (__m256i)(__v4di){100,101,102,103}, + (__mmask8)0b1010, + (__m256i)(__v4di){0,-128,2,3}, + 2), + 100, -32, 102, 3>>2)); \ No newline at end of file _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits