llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: NagaChaitanya Vellanki (chaitanyav) <details> <summary>Changes</summary> Resolves:#<!-- -->169176 --- Patch is 88.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169276.diff 13 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.td (+24-36) - (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+105) - (modified) clang/lib/AST/ExprConstant.cpp (+106) - (modified) clang/lib/Headers/avx2intrin.h (+16-24) - (modified) clang/lib/Headers/avx512bwintrin.h (+18-27) - (modified) clang/lib/Headers/avx512fintrin.h (+36-54) - (modified) clang/lib/Headers/avx512vlintrin.h (+52-78) - (modified) clang/lib/Headers/emmintrin.h (+16-16) - (modified) clang/test/CodeGen/X86/avx2-builtins.c (+34) - (modified) clang/test/CodeGen/X86/avx512bw-builtins.c (+21-6) - (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+42-12) - (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+34-7) - (modified) clang/test/CodeGen/X86/sse2-builtins.c (+16) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 4aa3d51931980..253eb3cbd7ee9 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -214,17 +214,6 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in { def _mm_pause : X86LibBuiltin<"void()">; } -let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; - def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; - def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; - def psrld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; - def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; - def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; - def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; - def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; -} - let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def movmskpd : X86Builtin<"int(_Vector<2, double>)">; def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">; @@ -265,6 +254,15 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">; def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">; def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">; + + def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; + def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def psrld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; + def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; + def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; + def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; } let Features = "sse3", Attributes = [NoThrow] in { @@ -585,14 +583,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def psadbw256 : X86Builtin< "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; - def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; - def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; - def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; - def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; - def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; - def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; - def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; - def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; @@ -669,6 +659,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; + + def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; + def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; + def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; + def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; + def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; + def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; + def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; + def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; } let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { @@ -1930,16 +1929,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; -} - let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def pmaddubsw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>)">; def pmaddwd512 : X86Builtin<"_Vector<16, int>(_Vector<32, short>, _Vector<32, short>)">; def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; + def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; } let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { @@ -1995,7 +1991,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect def psravq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def psraw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; def psrlw512 @@ -2312,25 +2308,17 @@ let Features = "avx512f", def psraqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def psraq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; -} - -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; -} - -let Features = "avx512vl", - Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def psraqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">; } -let Features = "avx512vl", - Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; def psraqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def pslld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; def psllq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; def psrad512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 83e40f64fd979..1b1866034b50d 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3468,6 +3468,69 @@ static bool interp__builtin_ia32_shuffle_generic( return true; } +static bool interp__builtin_ia32_shift_with_count( + InterpState &S, CodePtr OpPC, const CallExpr *Call, + llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp, + llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) { + + assert(Call->getNumArgs() == 2); + + const Pointer &Count = S.Stk.pop<Pointer>(); + const Pointer &Source = S.Stk.pop<Pointer>(); + + QualType SourceType = Call->getArg(0)->getType(); + QualType CountType = Call->getArg(1)->getType(); + assert(SourceType->isVectorType() && CountType->isVectorType()); + + const auto *SourceVecT = SourceType->castAs<VectorType>(); + const auto *CountVecT = CountType->castAs<VectorType>(); + PrimType SourceElemT = *S.getContext().classify(SourceVecT->getElementType()); + PrimType CountElemT = *S.getContext().classify(CountVecT->getElementType()); + + const Pointer &Dst = S.Stk.peek<Pointer>(); + + unsigned DestEltWidth = + S.getASTContext().getTypeSize(SourceVecT->getElementType()); + bool IsDestUnsigned = SourceVecT->getElementType()->isUnsignedIntegerType(); + unsigned DestLen = SourceVecT->getNumElements(); + unsigned CountEltWidth = + S.getASTContext().getTypeSize(CountVecT->getElementType()); + unsigned NumBitsInQWord = 64; + unsigned NumCountElts = NumBitsInQWord / CountEltWidth; + + uint64_t CountLQWord = 0; + for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) { + uint64_t Elt = 0; + INT_TYPE_SWITCH(CountElemT, + { Elt = static_cast<uint64_t>(Count.elem<T>(EltIdx)); }); + CountLQWord |= (Elt << (EltIdx * CountEltWidth)); + } + + for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) { + APSInt Elt; + INT_TYPE_SWITCH(SourceElemT, { Elt = Source.elem<T>(EltIdx).toAPSInt(); }); + + APInt Result; + if (CountLQWord < DestEltWidth) { + Result = ShiftOp(Elt, CountLQWord); + } else { + Result = OverflowOp(Elt, DestEltWidth); + } + if (IsDestUnsigned) { + INT_TYPE_SWITCH(SourceElemT, { + Dst.elem<T>(EltIdx) = T::from(Result.getZExtValue()); + }); + } else { + INT_TYPE_SWITCH(SourceElemT, { + Dst.elem<T>(EltIdx) = T::from(Result.getSExtValue()); + }); + } + } + + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC, const CallExpr *Call) { @@ -4826,6 +4889,48 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_phminposuw128: return interp__builtin_ia32_phminposuw(S, OpPC, Call); + case X86::BI__builtin_ia32_psraq128: + case X86::BI__builtin_ia32_psraq256: + case X86::BI__builtin_ia32_psraq512: + case X86::BI__builtin_ia32_psrad128: + case X86::BI__builtin_ia32_psrad256: + case X86::BI__builtin_ia32_psrad512: + case X86::BI__builtin_ia32_psraw128: + case X86::BI__builtin_ia32_psraw256: + case X86::BI__builtin_ia32_psraw512: + return interp__builtin_ia32_shift_with_count( + S, OpPC, Call, + [](const APInt &Elt, uint64_t Count) { return Elt.ashr(Count); }, + [](const APInt &Elt, unsigned Width) { return Elt.ashr(Width - 1); }); + + case X86::BI__builtin_ia32_psllq128: + case X86::BI__builtin_ia32_psllq256: + case X86::BI__builtin_ia32_psllq512: + case X86::BI__builtin_ia32_pslld128: + case X86::BI__builtin_ia32_pslld256: + case X86::BI__builtin_ia32_pslld512: + case X86::BI__builtin_ia32_psllw128: + case X86::BI__builtin_ia32_psllw256: + case X86::BI__builtin_ia32_psllw512: + return interp__builtin_ia32_shift_with_count( + S, OpPC, Call, + [](const APInt &Elt, uint64_t Count) { return Elt.shl(Count); }, + [](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); }); + + case X86::BI__builtin_ia32_psrlq128: + case X86::BI__builtin_ia32_psrlq256: + case X86::BI__builtin_ia32_psrlq512: + case X86::BI__builtin_ia32_psrld128: + case X86::BI__builtin_ia32_psrld256: + case X86::BI__builtin_ia32_psrld512: + case X86::BI__builtin_ia32_psrlw128: + case X86::BI__builtin_ia32_psrlw256: + case X86::BI__builtin_ia32_psrlw512: + return interp__builtin_ia32_shift_with_count( + S, OpPC, Call, + [](const APInt &Elt, uint64_t Count) { return Elt.lshr(Count); }, + [](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); }); + case X86::BI__builtin_ia32_pternlogd128_mask: case X86::BI__builtin_ia32_pternlogd256_mask: case X86::BI__builtin_ia32_pternlogd512_mask: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3b91678f7d400..7e86f1252a23d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12166,6 +12166,52 @@ static bool evalShuffleGeneric( return true; } +static bool evalShiftWithCount( + EvalInfo &Info, const CallExpr *Call, APValue &Out, + llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp, + llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) { + + APValue Source, Count; + if (!EvaluateAsRValue(Info, Call->getArg(0), Source) || + !EvaluateAsRValue(Info, Call->getArg(1), Count)) + return false; + + assert(Call->getNumArgs() == 2); + + QualType SourceTy = Call->getArg(0)->getType(); + QualType CountTy = Call->getArg(1)->getType(); + assert(SourceTy->isVectorType() && CountTy->isVectorType()); + + QualType DestEltTy = SourceTy->castAs<VectorType>()->getElementType(); + unsigned DestEltWidth = Source.getVectorElt(0).getInt().getBitWidth(); + unsigned DestLen = Source.getVectorLength(); + bool IsDestUnsigned = DestEltTy->isUnsignedIntegerType(); + unsigned CountEltWidth = Count.getVectorElt(0).getInt().getBitWidth(); + unsigned NumBitsInQWord = 64; + unsigned NumCountElts = NumBitsInQWord / CountEltWidth; + SmallVector<APValue, 64> Result; + Result.reserve(DestLen); + + uint64_t CountLQWord = 0; + for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) { + uint64_t Elt = Count.getVectorElt(EltIdx).getInt().getZExtValue(); + CountLQWord |= (Elt << (EltIdx * CountEltWidth)); + } + + for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) { + APInt Elt = Source.getVectorElt(EltIdx).getInt(); + if (CountLQWord < DestEltWidth) { + Result.push_back( + APValue(APSInt(ShiftOp(Elt, CountLQWord), IsDestUnsigned))); + } else { + Result.push_back( + APValue(APSInt(OverflowOp(Elt, DestEltWidth), IsDestUnsigned))); + } + } + Out = APValue(Result.data(), Result.size()); + return true; +} + bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!IsConstantEvaluatedBuiltinCall(E)) return ExprEvaluatorBaseTy::VisitCallExpr(E); @@ -13130,6 +13176,66 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(Result.data(), Result.size()), E); } + case X86::BI__builtin_ia32_psraq128: + case X86::BI__builtin_ia32_psraq256: + case X86::BI__builtin_ia32_psraq512: + case X86::BI__builtin_ia32_psrad128: + case X86::BI__builtin_ia32_psrad256: + case X86::BI__builtin_ia32_psrad512: + case X86::BI__builtin_ia32_psraw128: + case X86::BI__builtin_ia32_psraw256: + case X86::BI__builtin_ia32_psraw512: { + APValue R; + if (!evalShiftWithCount( + Info, E, R, + [](const APInt &Elt, uint64_t Count) { return Elt.ashr(Count); }, + [](const APInt &Elt, unsigned Width) { + return Elt.ashr(Width - 1); + })) + return false; + return Success(R, E); + } + + case X86::BI__builtin_ia32_psllq128: + case X86::BI__builtin_ia32_psllq256: + case X86::BI__builtin_ia32_psllq512: + case X86::BI__builtin_ia32_pslld128: + case X86::BI__builtin_ia32_pslld256: + case X86::BI__builtin_ia32_pslld512: + case X86::BI__builtin_ia32_psllw128: + case X86::BI__builtin_ia32_psllw256: + case X86::BI__builtin_ia32_psllw512: { + APValue R; + if (!evalShiftWithCount( + Info, E, R, + [](const APInt &Elt, uint64_t Count) { return Elt.shl(Count); }, + [](const APInt &Elt, unsigned Width) { + return APInt::getZero(Width); + })) + return false; + return Success(R, E); + } + + case X86::BI__builtin_ia32_psrlq128: + case X86::BI__builtin_ia32_psrlq256: + case X86::BI__builtin_ia32_psrlq512: + case X86::BI__builtin_ia32_psrld128: + case X86::BI__builtin_ia32_psrld256: + case X86::BI__builtin_ia32_psrld512: + case X86::BI__builtin_ia32_psrlw128: + case X86::BI__builtin_ia32_psrlw256: + case X86::BI__builtin_ia32_psrlw512: { + APValue R; + if (!evalShiftWithCount( + Info, E, R, + [](const APInt &Elt, uint64_t Count) { return Elt.lshr(Count); }, + [](const APInt &Elt, unsigned Width) { + return APInt::getZero(Width); + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_pternlogd128_mask: case X86::BI__builtin_ia32_pternlogd256_mask: case X86::BI__builtin_ia32_pternlogd512_mask: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 3e3c13d8bd662..d3ceb2327ac62 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -2095,9 +2095,8 @@ _mm256_slli_epi16(__m256i __a, int __count) { /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sll_epi16(__m256i __a, __m128i __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sll_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count); } @@ -2134,9 +2133,8 @@ _mm256_slli_epi32(__m256i __a, int __count) { /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sll_epi32(__m256i __a, __m128i __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sll_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count); } @@ -2173,9 +2171,8 @@ _mm256_slli_epi64(__m256i __a, int __count) { /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sll_epi64(__m256i __a, __m128i __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sll_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psllq256((__v4di)__a, __count); } @@ -2214,9 +2211,8 @@ _mm256_srai_epi16(__m256i __a, int __count) { /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sra_epi16(__m256i __a, __m128i __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sra_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count); } @@ -2255,9 +2251,8 @@ _mm256_srai_epi32(__m256i __a, int __count) { /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/169276 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
