Author: Brandon Date: 2025-09-15T09:21:05Z New Revision: 50bcf6818e045a39eb21201f7c512e514476385e
URL: https://github.com/llvm/llvm-project/commit/50bcf6818e045a39eb21201f7c512e514476385e DIFF: https://github.com/llvm/llvm-project/commit/50bcf6818e045a39eb21201f7c512e514476385e.diff LOG: [X86][bytecode] Allow SSE/AVX BLEND imm intrinsics to be used in constexpr (#157776) This marks the following builtins as constexpr, which allows their corresponding intrinsics to be used in constexprs. | Intrinsics | X86 Builtins | CPUID Flags | Header | | -------------------- | --------------------------- | ----------- | ----------- | | `_mm_blend_pd` | `__builtin_ia32_blendpd` | SSE4.1 | smmintrin.h | | `_mm256_blend_pd` | `__builtin_ia32_blendpd256` | AVX | immintrin.h | | `_mm_blend_ps` | `__builtin_ia32_blendps` | SSE4.1 | smmintrin.h | | `_mm256_blend_ps` | `__builtin_ia32_blendps256` | AVX | immintrin.h | | `_mm_blend_epi16` | `__builtin_ia32_pblendw128` | SSE4.1 | smmintrin.h | | `_mm256_blend_epi16` | `__builtin_ia32_pblendw256` | AVX2 | immintrin.h | | `_mm_blend_epi32` | `__builtin_ia32_pblendd128` | AVX2 | immintrin.h | | `_mm256_blend_epi32` | `__builtin_ia32_pblendd256` | AVX2 | immintrin.h | Fixes #157065 --------- Co-authored-by: Timm Baeder <tbae...@redhat.com> Co-authored-by: Simon Pilgrim <llvm-...@redking.me.uk> Added: Modified: clang/include/clang/Basic/BuiltinsX86.td clang/lib/AST/ByteCode/InterpBuiltin.cpp clang/lib/AST/ExprConstant.cpp clang/test/CodeGen/X86/avx-builtins.c clang/test/CodeGen/X86/avx2-builtins.c clang/test/CodeGen/X86/sse41-builtins.c Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 1a8645f99e281..dd7727a39f693 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -312,9 +312,6 @@ let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; - def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; - def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; - def blendps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">; def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; @@ -333,6 +330,9 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] } let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; + def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; + def blendps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">; def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">; def pblendvb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">; @@ -469,8 +469,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">; def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">; def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; - def blendpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">; - def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; def shufpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">; def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; @@ -495,6 +493,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in } let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def blendpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">; + def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">; def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">; } @@ -575,7 +575,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">; def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">; def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; - def pblendw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def phaddsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; @@ -604,8 +603,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; - def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; - def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; @@ -619,6 +616,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">; def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; + def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; + def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; + def pblendw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; + def pblendvb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">; def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 4461731c25648..40b9e04aa335c 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2837,6 +2837,40 @@ static bool interp__builtin_select(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_blend(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + PrimType MaskT = *S.getContext().classify(Call->getArg(2)); + APSInt Mask = popToAPSInt(S.Stk, MaskT); + const Pointer &TrueVec = S.Stk.pop<Pointer>(); + const Pointer &FalseVec = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + + assert(FalseVec.getNumElems() == TrueVec.getNumElems()); + assert(FalseVec.getNumElems() == Dst.getNumElems()); + unsigned NumElems = FalseVec.getNumElems(); + PrimType ElemT = FalseVec.getFieldDesc()->getPrimType(); + PrimType DstElemT = Dst.getFieldDesc()->getPrimType(); + + for (unsigned I = 0; I != NumElems; ++I) { + bool MaskBit = Mask[I % 8]; + if (ElemT == PT_Float) { + assert(DstElemT == PT_Float); + Dst.elem<Floating>(I) = + MaskBit ? TrueVec.elem<Floating>(I) : FalseVec.elem<Floating>(I); + } else { + assert(DstElemT == ElemT); + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { + Dst.elem<T>(I) = + static_cast<T>(MaskBit ? TrueVec.elem<T>(I).toAPSInt() + : FalseVec.elem<T>(I).toAPSInt()); + }); + } + } + Dst.initializeAllElements(); + + return true; +} + static bool interp__builtin_elementwise_triop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)> @@ -3502,6 +3536,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return llvm::APIntOps::fshr(Hi, Lo, Amt); }); + case clang::X86::BI__builtin_ia32_blendpd: + case clang::X86::BI__builtin_ia32_blendpd256: + case clang::X86::BI__builtin_ia32_blendps: + case clang::X86::BI__builtin_ia32_blendps256: + case clang::X86::BI__builtin_ia32_pblendw128: + case clang::X86::BI__builtin_ia32_pblendw256: + case clang::X86::BI__builtin_ia32_pblendd128: + case clang::X86::BI__builtin_ia32_pblendd256: + return interp__builtin_blend(S, OpPC, Call); + case clang::X86::BI__builtin_ia32_blendvpd: case clang::X86::BI__builtin_ia32_blendvpd256: case clang::X86::BI__builtin_ia32_blendvps: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 820b053057067..77dc2203576b3 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11926,6 +11926,33 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_blendpd: + case X86::BI__builtin_ia32_blendpd256: + case X86::BI__builtin_ia32_blendps: + case X86::BI__builtin_ia32_blendps256: + case X86::BI__builtin_ia32_pblendw128: + case X86::BI__builtin_ia32_pblendw256: + case X86::BI__builtin_ia32_pblendd128: + case X86::BI__builtin_ia32_pblendd256: { + APValue SourceF, SourceT, SourceC; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceF) || + !EvaluateAsRValue(Info, E->getArg(1), SourceT) || + !EvaluateAsRValue(Info, E->getArg(2), SourceC)) + return false; + + const APInt &C = SourceC.getInt(); + unsigned SourceLen = SourceF.getVectorLength(); + SmallVector<APValue, 32> ResultElements; + ResultElements.reserve(SourceLen); + for (unsigned EltNum = 0; EltNum != SourceLen; ++EltNum) { + const APValue &F = SourceF.getVectorElt(EltNum); + const APValue &T = SourceT.getVectorElt(EltNum); + ResultElements.push_back(C[EltNum % 8] ? T : F); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case X86::BI__builtin_ia32_blendvpd: case X86::BI__builtin_ia32_blendvpd256: case X86::BI__builtin_ia32_blendvps: diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 8223ab2b52cac..7b1a9cc4d9a7f 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -87,12 +87,20 @@ __m256d test_mm256_blend_pd(__m256d A, __m256d B) { // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3> return _mm256_blend_pd(A, B, 0x05); } +TEST_CONSTEXPR(match_m256d(_mm256_blend_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x00), 1.0, 2.0, 3.0, 4.0)); +TEST_CONSTEXPR(match_m256d(_mm256_blend_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x05), 5.0, 2.0, 7.0, 4.0)); +TEST_CONSTEXPR(match_m256d(_mm256_blend_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x0A), 1.0, 6.0, 3.0, 8.0)); +TEST_CONSTEXPR(match_m256d(_mm256_blend_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x0F), 5.0, 6.0, 7.0, 8.0)); __m256 test_mm256_blend_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_blend_ps // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7> return _mm256_blend_ps(A, B, 0x35); } +TEST_CONSTEXPR(match_m256(_mm256_blend_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f}), 0x00), 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f)); +TEST_CONSTEXPR(match_m256(_mm256_blend_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f}), 0x35), -1.0f, 2.0f, -3.0f, 4.0f, -5.0f, -6.0f, 7.0f, 8.0f)); +TEST_CONSTEXPR(match_m256(_mm256_blend_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f}), 0xAA), 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f)); +TEST_CONSTEXPR(match_m256(_mm256_blend_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f}), 0xFF), -1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f)); __m256d test_mm256_blendv_pd(__m256d V1, __m256d V2, __m256d V3) { // CHECK-LABEL: test_mm256_blendv_pd diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index aeb1aee4ea946..17ab47c72ad4b 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -146,6 +146,10 @@ __m256i test_mm256_blend_epi16(__m256i a, __m256i b) { // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 25, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> return _mm256_blend_epi16(a, b, 2); } +TEST_CONSTEXPR(match_v16hi(_mm256_blend_epi16(((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v16hi){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}), 0x00), 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)); +TEST_CONSTEXPR(match_v16hi(_mm256_blend_epi16(((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v16hi){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}), 0x5A), 1,-2,3,-4,-5,6,-7,8,9,-10,11,-12,-13,14,-15,16)); +TEST_CONSTEXPR(match_v16hi(_mm256_blend_epi16(((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v16hi){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}), 0x94), 1,2,-3,4,-5,6,7,-8,9,10,-11,12,-13,14,15,-16)); +TEST_CONSTEXPR(match_v16hi(_mm256_blend_epi16(((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v16hi){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}), 0xFF), -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16)); __m128i test_mm_blend_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_blend_epi32 @@ -153,6 +157,10 @@ __m128i test_mm_blend_epi32(__m128i a, __m128i b) { // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3> return _mm_blend_epi32(a, b, 0x05); } +TEST_CONSTEXPR(match_v4si(_mm_blend_epi32(((__m128i)(__v4si){1,2,3,4}), ((__m128i)(__v4si){-1,-2,-3,-4}), 0x0), 1,2,3,4)); +TEST_CONSTEXPR(match_v4si(_mm_blend_epi32(((__m128i)(__v4si){1,2,3,4}), ((__m128i)(__v4si){-1,-2,-3,-4}), 0x5), -1,2,-3,4)); +TEST_CONSTEXPR(match_v4si(_mm_blend_epi32(((__m128i)(__v4si){1,2,3,4}), ((__m128i)(__v4si){-1,-2,-3,-4}), 0xA), 1,-2,3,-4)); +TEST_CONSTEXPR(match_v4si(_mm_blend_epi32(((__m128i)(__v4si){1,2,3,4}), ((__m128i)(__v4si){-1,-2,-3,-4}), 0xF), -1,-2,-3,-4)); __m256i test_mm256_blend_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_blend_epi32 @@ -160,6 +168,10 @@ __m256i test_mm256_blend_epi32(__m256i a, __m256i b) { // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7> return _mm256_blend_epi32(a, b, 0x35); } +TEST_CONSTEXPR(match_v8si(_mm256_blend_epi32(((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m256i)(__v8si){-1,-2,-3,-4,-5,-6,-7,-8}), 0x00), 1,2,3,4,5,6,7,8)); +TEST_CONSTEXPR(match_v8si(_mm256_blend_epi32(((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m256i)(__v8si){-1,-2,-3,-4,-5,-6,-7,-8}), 0xA5), -1,2,-3,4,5,-6,7,-8)); +TEST_CONSTEXPR(match_v8si(_mm256_blend_epi32(((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m256i)(__v8si){-1,-2,-3,-4,-5,-6,-7,-8}), 0x94), 1,2,-3,4,-5,6,7,-8)); +TEST_CONSTEXPR(match_v8si(_mm256_blend_epi32(((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m256i)(__v8si){-1,-2,-3,-4,-5,-6,-7,-8}), 0xFF), -1,-2,-3,-4,-5,-6,-7,-8)); __m256i test_mm256_blendv_epi8(__m256i a, __m256i b, __m256i m) { // CHECK-LABEL: test_mm256_blendv_epi8 diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index dca161c8038a2..c7265b188d572 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -27,18 +27,30 @@ __m128i test_mm_blend_epi16(__m128i V1, __m128i V2) { // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7> return _mm_blend_epi16(V1, V2, 42); } +TEST_CONSTEXPR(match_v8hi(_mm_blend_epi16(((__m128i)(__v8hi){1,2,3,4,5,6,7,8}),((__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,-7,-8}),0x00),1,2,3,4,5,6,7,8)); +TEST_CONSTEXPR(match_v8hi(_mm_blend_epi16(((__m128i)(__v8hi){1,2,3,4,5,6,7,8}),((__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,-7,-8}),0x5A),1,-2,3,-4,-5,6,-7,8)); +TEST_CONSTEXPR(match_v8hi(_mm_blend_epi16(((__m128i)(__v8hi){1,2,3,4,5,6,7,8}),((__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,-7,-8}),0x94),1,2,-3,4,-5,6,7,-8)); +TEST_CONSTEXPR(match_v8hi(_mm_blend_epi16(((__m128i)(__v8hi){1,2,3,4,5,6,7,8}),((__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,-7,-8}),0xFF),-1,-2,-3,-4,-5,-6,-7,-8)); __m128d test_mm_blend_pd(__m128d V1, __m128d V2) { // CHECK-LABEL: test_mm_blend_pd // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3> return _mm_blend_pd(V1, V2, 2); } +TEST_CONSTEXPR(match_m128d(_mm_blend_pd(((__m128d){1.0, 2.0}), ((__m128d){3.0, 4.0}), 0), 1.0, 2.0)); +TEST_CONSTEXPR(match_m128d(_mm_blend_pd(((__m128d){1.0, 2.0}), ((__m128d){3.0, 4.0}), 1), 3.0, 2.0)); +TEST_CONSTEXPR(match_m128d(_mm_blend_pd(((__m128d){1.0, 2.0}), ((__m128d){3.0, 4.0}), 2), 1.0, 4.0)); +TEST_CONSTEXPR(match_m128d(_mm_blend_pd(((__m128d){1.0, 2.0}), ((__m128d){3.0, 4.0}), 3), 3.0, 4.0)); __m128 test_mm_blend_ps(__m128 V1, __m128 V2) { // CHECK-LABEL: test_mm_blend_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3> return _mm_blend_ps(V1, V2, 6); } +TEST_CONSTEXPR(match_m128(_mm_blend_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), 0x0), 1.0f, 2.0f, 3.0f, 4.0f)); +TEST_CONSTEXPR(match_m128(_mm_blend_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), 0x5), 5.0f, 2.0f, 7.0f, 4.0f)); +TEST_CONSTEXPR(match_m128(_mm_blend_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), 0xA), 1.0f, 6.0f, 3.0f, 8.0f)); +TEST_CONSTEXPR(match_m128(_mm_blend_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), 0xF), 5.0f, 6.0f, 7.0f, 8.0f)); __m128i test_mm_blendv_epi8(__m128i V1, __m128i V2, __m128i V3) { // CHECK-LABEL: test_mm_blendv_epi8 @@ -459,4 +471,3 @@ int test_mm_testz_si128(__m128i x, __m128i y) { // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_testz_si128(x, y); } - _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits