https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/154780
This handles constant folding for the AVX2 per-element shift intrinsics, which handle out of bounds shift amounts (logical result = 0, arithmetic result = signbit splat) AVX512 intrinsics will follow in follow up patches First stage of #154287 >From 5fb843a4e9e636de92ff01f4b94007599dfea713 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <llvm-...@redking.me.uk> Date: Thu, 21 Aug 2025 16:02:07 +0100 Subject: [PATCH] [X86] Allow AVX2 per-element shift intrinsics to be used in constexpr This handles constant folding for the AVX2 per-element shift intrinsics, which handle out of bounds shift amounts (logical result = 0, arithmetic result = signbit splat) AVX512 intrinsics will follow in follow up patches First stage of #154287 --- clang/include/clang/Basic/BuiltinsX86.td | 56 ++++++------------------ clang/lib/AST/ExprConstant.cpp | 51 +++++++++++++++++++-- clang/lib/Headers/avx2intrin.h | 20 ++++----- clang/test/CodeGen/X86/avx2-builtins.c | 22 +++++----- 4 files changed, 82 insertions(+), 67 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index cc1da937455a2..527acd9ef086e 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -627,11 +627,23 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">; def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">; -} -let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; + + def psllv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; + def psrav8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; + def psrlv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; + def psllv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; + def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; +} + +let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def psllv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; + def psrav4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; + def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; + def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; + def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; } let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in { @@ -654,46 +666,6 @@ let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in { def maskstoreq : X86Builtin<"void(_Vector<2, long long int *>, _Vector<2, long long int>, _Vector<2, long long int>)">; } -let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def psllv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; -} - -let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def psllv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; -} - -let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def psllv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; -} - -let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; -} - -let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def psrav8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; -} - -let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def psrav4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; -} - -let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def psrlv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; -} - -let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; -} - -let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; -} - -let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; -} - let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in { def gatherd_pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, double const *, _Vector<4, int>, _Vector<2, double>, _Constant char)">; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a03e64fcffde2..9b934753bcc3c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11669,13 +11669,24 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_pmulhuw512: case clang::X86::BI__builtin_ia32_pmulhw128: case clang::X86::BI__builtin_ia32_pmulhw256: - case clang::X86::BI__builtin_ia32_pmulhw512: { + case clang::X86::BI__builtin_ia32_pmulhw512: + case clang::X86::BI__builtin_ia32_psllv2di: + case clang::X86::BI__builtin_ia32_psllv4di: + case clang::X86::BI__builtin_ia32_psllv4si: + case clang::X86::BI__builtin_ia32_psllv8si: + case clang::X86::BI__builtin_ia32_psrav4si: + case clang::X86::BI__builtin_ia32_psrav8si: + case clang::X86::BI__builtin_ia32_psrlv2di: + case clang::X86::BI__builtin_ia32_psrlv4di: + case clang::X86::BI__builtin_ia32_psrlv4si: + case clang::X86::BI__builtin_ia32_psrlv8si:{ APValue SourceLHS, SourceRHS; if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) return false; QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); unsigned SourceLen = SourceLHS.getVectorLength(); SmallVector<APValue, 4> ResultElements; ResultElements.reserve(SourceLen); @@ -11687,12 +11698,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case Builtin::BI__builtin_elementwise_add_sat: ResultElements.push_back(APValue( APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS), - DestEltTy->isUnsignedIntegerOrEnumerationType()))); + DestUnsigned))); break; case Builtin::BI__builtin_elementwise_sub_sat: ResultElements.push_back(APValue( APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS), - DestEltTy->isUnsignedIntegerOrEnumerationType()))); + DestUnsigned))); break; case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: @@ -11706,6 +11717,40 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS), /*isUnsigned=*/false))); break; + case clang::X86::BI__builtin_ia32_psllv2di: + case clang::X86::BI__builtin_ia32_psllv4di: + case clang::X86::BI__builtin_ia32_psllv4si: + case clang::X86::BI__builtin_ia32_psllv8si: + if (RHS.uge(RHS.getBitWidth())) { + ResultElements.push_back( + APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned))); + break; + } + ResultElements.push_back( + APValue(APSInt(LHS.shl(RHS.getZExtValue()), DestUnsigned))); + break; + case clang::X86::BI__builtin_ia32_psrav4si: + case clang::X86::BI__builtin_ia32_psrav8si: + if (RHS.uge(RHS.getBitWidth())) { + ResultElements.push_back( + APValue(APSInt(LHS.ashr(RHS.getBitWidth() - 1), DestUnsigned))); + break; + } + ResultElements.push_back( + APValue(APSInt(LHS.ashr(RHS.getZExtValue()), DestUnsigned))); + break; + case clang::X86::BI__builtin_ia32_psrlv2di: + case clang::X86::BI__builtin_ia32_psrlv4di: + case clang::X86::BI__builtin_ia32_psrlv4si: + case clang::X86::BI__builtin_ia32_psrlv8si: + if (RHS.uge(RHS.getBitWidth())) { + ResultElements.push_back( + APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned))); + break; + } + ResultElements.push_back( + APValue(APSInt(LHS.lshr(RHS.getZExtValue()), DestUnsigned))); + break; } } diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index c7e1c4446e85d..ce5b2b7544d8c 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -3721,7 +3721,7 @@ _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); @@ -3743,7 +3743,7 @@ _mm256_sllv_epi32(__m256i __X, __m256i __Y) /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); @@ -3765,7 +3765,7 @@ _mm_sllv_epi32(__m128i __X, __m128i __Y) /// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y); @@ -3787,7 +3787,7 @@ _mm256_sllv_epi64(__m256i __X, __m256i __Y) /// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [2 x i64] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y); @@ -3810,7 +3810,7 @@ _mm_sllv_epi64(__m128i __X, __m128i __Y) /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); @@ -3833,7 +3833,7 @@ _mm256_srav_epi32(__m256i __X, __m256i __Y) /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); @@ -3855,7 +3855,7 @@ _mm_srav_epi32(__m128i __X, __m128i __Y) /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); @@ -3877,7 +3877,7 @@ _mm256_srlv_epi32(__m256i __X, __m256i __Y) /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); @@ -3899,7 +3899,7 @@ _mm_srlv_epi32(__m128i __X, __m128i __Y) /// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y); @@ -3921,7 +3921,7 @@ _mm256_srlv_epi64(__m256i __X, __m256i __Y) /// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [2 x i64] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y); diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 5b252fa315ef8..29cb3e8860be9 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -327,7 +327,6 @@ __m256i test_mm256_cvtepi8_epi16(__m128i a) { // CHECK: sext <16 x i8> %{{.*}} to <16 x i16> return _mm256_cvtepi8_epi16(a); } - TEST_CONSTEXPR(match_v16hi(_mm256_cvtepi8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)); __m256i test_mm256_cvtepi8_epi32(__m128i a) { @@ -336,7 +335,6 @@ __m256i test_mm256_cvtepi8_epi32(__m128i a) { // CHECK: sext <8 x i8> %{{.*}} to <8 x i32> return _mm256_cvtepi8_epi32(a); } - TEST_CONSTEXPR(match_v8si(_mm256_cvtepi8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0, 1, -2, 3, -4)); __m256i test_mm256_cvtepi8_epi64(__m128i a) { @@ -345,7 +343,6 @@ __m256i test_mm256_cvtepi8_epi64(__m128i a) { // CHECK: sext <4 x i8> %{{.*}} to <4 x i64> return _mm256_cvtepi8_epi64(a); } - TEST_CONSTEXPR(match_v4di(_mm256_cvtepi8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0)); __m256i test_mm256_cvtepi16_epi32(__m128i a) { @@ -353,7 +350,6 @@ __m256i test_mm256_cvtepi16_epi32(__m128i a) { // CHECK: sext <8 x i16> %{{.*}} to <8 x i32> return _mm256_cvtepi16_epi32(a); } - TEST_CONSTEXPR(match_v8si(_mm256_cvtepi16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), -300, 2, -1, 0, 1, -2, 3, -4)); __m256i test_mm256_cvtepi16_epi64(__m128i a) { @@ -362,7 +358,6 @@ __m256i test_mm256_cvtepi16_epi64(__m128i a) { // CHECK: sext <4 x i16> %{{.*}} to <4 x i64> return _mm256_cvtepi16_epi64(a); } - TEST_CONSTEXPR(match_v4di(_mm256_cvtepi16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), -300, 2, -1, 0)); __m256i test_mm256_cvtepi32_epi64(__m128i a) { @@ -370,7 +365,6 @@ __m256i test_mm256_cvtepi32_epi64(__m128i a) { // CHECK: sext <4 x i32> %{{.*}} to <4 x i64> return _mm256_cvtepi32_epi64(a); } - TEST_CONSTEXPR(match_v4di(_mm256_cvtepi32_epi64(_mm_setr_epi32(-70000, 2, -1, 0)), -70000, 2, -1, 0)); __m256i test_mm256_cvtepu8_epi16(__m128i a) { @@ -378,7 +372,6 @@ __m256i test_mm256_cvtepu8_epi16(__m128i a) { // CHECK: zext <16 x i8> %{{.*}} to <16 x i16> return _mm256_cvtepu8_epi16(a); } - TEST_CONSTEXPR(match_v16hi(_mm256_cvtepu8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0, 1, 254, 3, 252, 5, 250, 7, 248, 9, 246, 11, 244)); __m256i test_mm256_cvtepu8_epi32(__m128i a) { @@ -387,7 +380,6 @@ __m256i test_mm256_cvtepu8_epi32(__m128i a) { // CHECK: zext <8 x i8> %{{.*}} to <8 x i32> return _mm256_cvtepu8_epi32(a); } - TEST_CONSTEXPR(match_v8si(_mm256_cvtepu8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0, 1, 254, 3, 252)); __m256i test_mm256_cvtepu8_epi64(__m128i a) { @@ -396,7 +388,6 @@ __m256i test_mm256_cvtepu8_epi64(__m128i a) { // CHECK: zext <4 x i8> %{{.*}} to <4 x i64> return _mm256_cvtepu8_epi64(a); } - TEST_CONSTEXPR(match_v4di(_mm256_cvtepu8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0)); __m256i test_mm256_cvtepu16_epi32(__m128i a) { @@ -404,7 +395,6 @@ __m256i test_mm256_cvtepu16_epi32(__m128i a) { // CHECK: zext <8 x i16> {{.*}} to <8 x i32> return _mm256_cvtepu16_epi32(a); } - TEST_CONSTEXPR(match_v8si(_mm256_cvtepu16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532)); __m256i test_mm256_cvtepu16_epi64(__m128i a) { @@ -413,7 +403,6 @@ __m256i test_mm256_cvtepu16_epi64(__m128i a) { // CHECK: zext <4 x i16> %{{.*}} to <4 x i64> return _mm256_cvtepu16_epi64(a); } - TEST_CONSTEXPR(match_v4di(_mm256_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0)); __m256i test_mm256_cvtepu32_epi64(__m128i a) { @@ -421,7 +410,6 @@ __m256i test_mm256_cvtepu32_epi64(__m128i a) { // CHECK: zext <4 x i32> %{{.*}} to <4 x i64> return _mm256_cvtepu32_epi64(a); } - TEST_CONSTEXPR(match_v4di(_mm256_cvtepu32_epi64(_mm_setr_epi32(-70000, 2, -1, 0)), 4294897296, 2, 4294967295, 0)); __m128i test0_mm256_extracti128_si256_0(__m256i a) { @@ -1120,24 +1108,28 @@ __m128i test_mm_sllv_epi32(__m128i a, __m128i b) { // CHECK: call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_sllv_epi32(a, b); } +TEST_CONSTEXPR(match_v4si(_mm_sllv_epi32((__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 2, -8, 24, 0)); __m256i test_mm256_sllv_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sllv_epi32 // CHECK: call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_sllv_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_sllv_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 2, -8, 24, -64, 0, 0, 0, 0)); __m128i test_mm_sllv_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_sllv_epi64 // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_sllv_epi64(a, b); } +TEST_CONSTEXPR(match_m128i(_mm_sllv_epi64((__m128i)(__v2di){1, -3}, (__m128i)(__v2di){8, 63}), 256, 0x8000000000000000ULL)); __m256i test_mm256_sllv_epi64(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sllv_epi64 // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_sllv_epi64(a, b); } +TEST_CONSTEXPR(match_m256i(_mm256_sllv_epi64((__m256i)(__v4di){1, -2, 3, -4}, (__m256i)(__v4di){1, 2, 3, -4}), 2, -8, 24, 0)); __m256i test_mm256_sra_epi16(__m256i a, __m128i b) { // CHECK-LABEL: test_mm256_sra_epi16 @@ -1180,12 +1172,14 @@ __m128i test_mm_srav_epi32(__m128i a, __m128i b) { // CHECK: call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_srav_epi32(a, b); } +TEST_CONSTEXPR(match_v4si(_mm_srav_epi32((__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 0, -1, 0, -1)); __m256i test_mm256_srav_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_srav_epi32 // CHECK: call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_srav_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_srav_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 0, -1, 0, -1, 0, -1, 0, -1)); __m256i test_mm256_srl_epi16(__m256i a, __m128i b) { // CHECK-LABEL: test_mm256_srl_epi16 @@ -1252,24 +1246,28 @@ __m128i test_mm_srlv_epi32(__m128i a, __m128i b) { // CHECK: call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_srlv_epi32(a, b); } +TEST_CONSTEXPR(match_v4si(_mm_srlv_epi32((__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 0, 1073741823, 0, 0)); __m256i test_mm256_srlv_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_srlv_epi32 // CHECK: call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_srlv_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_srlv_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 0, 1073741823, 0, 268435455, 0, 1, 0, 7)); __m128i test_mm_srlv_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_srlv_epi64 // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_srlv_epi64(a, b); } +TEST_CONSTEXPR(match_m128i(_mm_srlv_epi64((__m128i)(__v2di){1, -3}, (__m128i)(__v2di){8, 63}), 0, 1)); __m256i test_mm256_srlv_epi64(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_srlv_epi64 // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_srlv_epi64(a, b); } +TEST_CONSTEXPR(match_m256i(_mm256_srlv_epi64((__m256i)(__v4di){1, -2, 3, -4}, (__m256i)(__v4di){1, 2, 3, -4}), 0, 0x3FFFFFFFFFFFFFFFULL, 0, 0)); __m256i test_mm256_stream_load_si256(__m256i const *a) { // CHECK-LABEL: test_mm256_stream_load_si256 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits