llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Akash Deo (AkashDeoNU) <details> <summary>Changes</summary> This fixes #<!-- -->161340 @<!-- -->RKSimon. Sorry for the delay. Please let me know about any fixes I can make. --- Patch is 92.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/190549.diff 9 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.td (+12-12) - (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+91) - (modified) clang/lib/AST/ExprConstant.cpp (+80) - (modified) clang/lib/Headers/avx512vlvnniintrin.h (+42-48) - (modified) clang/lib/Headers/avx512vnniintrin.h (+37-36) - (modified) clang/lib/Headers/avxvnniintrin.h (+24-24) - (modified) clang/test/CodeGen/X86/avx512vlvnni-builtins.c (+298) - (modified) clang/test/CodeGen/X86/avx512vnni-builtins.c (+156) - (modified) clang/test/CodeGen/X86/avxvnni-builtins.c (+246) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 0cab8c77d465d..342a23e1f2aab 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1075,51 +1075,51 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto def extractf32x4_mask : X86Builtin<"_Vector<4, float>(_Vector<16, float>, _Constant int, _Vector<4, float>, unsigned char)">; } -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpdpbusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">; } -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vpdpbusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">; } -let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def vpdpbusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">; } -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpdpbusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">; } -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vpdpbusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">; } -let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def vpdpbusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">; } -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpdpwssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">; } -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vpdpwssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">; } -let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def vpdpwssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">; } -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpdpwssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">; } -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vpdpwssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">; } -let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def vpdpwssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index e7b3ef6ce1510..03c9add584658 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4189,6 +4189,53 @@ static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_vpdp(InterpState &S, CodePtr OpPC, + const CallExpr *Call, bool IsDottingWord, + bool IsSaturating) { + const auto *SrcVecT = Call->getArg(0)->getType()->castAs<VectorType>(); + const auto *OpAVecT = Call->getArg(1)->getType()->castAs<VectorType>(); + const auto *OpBVecT = Call->getArg(2)->getType()->castAs<VectorType>(); + + PrimType SrcElemT = *S.getContext().classify(SrcVecT->getElementType()); + PrimType OpAElemT = *S.getContext().classify(OpAVecT->getElementType()); + PrimType OpBElemT = *S.getContext().classify(OpBVecT->getElementType()); + + unsigned NumElements = SrcVecT->getNumElements(); + unsigned Iters = IsDottingWord ? 2 : 4; + + const Pointer &OpBPtr = S.Stk.pop<Pointer>(); + const Pointer &OpAPtr = S.Stk.pop<Pointer>(); + const Pointer &SrcPtr = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + + for (unsigned I = 0; I < NumElements; ++I) { + APSInt Acc; + INT_TYPE_SWITCH_NO_BOOL(SrcElemT, { Acc = SrcPtr.elem<T>(I).toAPSInt(); }); + Acc = Acc.sext(64); + for (unsigned J = 0; J < Iters; ++J) { + APSInt OpA, OpB; + INT_TYPE_SWITCH_NO_BOOL( + OpAElemT, { OpA = OpAPtr.elem<T>(Iters * I + J).toAPSInt(); }); + INT_TYPE_SWITCH_NO_BOOL( + OpBElemT, { OpB = OpBPtr.elem<T>(Iters * I + J).toAPSInt(); }); + if (IsDottingWord) { + OpA = APSInt(OpA.sext(64), false); + } else { + OpA = APSInt(OpA.zext(64), false); + } + OpB = APSInt(OpB.sext(64), false); + Acc += OpA * OpB; + } + if (IsSaturating) { + Acc = APSInt(Acc.truncSSat(32), false); + } + INT_TYPE_SWITCH_NO_BOOL(SrcElemT, + { Dst.elem<T>(I) = static_cast<T>(Acc); }); + } + Dst.initializeAllElements(); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -6049,6 +6096,50 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return EvalScalarMinMaxFp(A, B, RoundingMode, /*IsMin=*/false); }, /*IsScalar=*/true); + case X86::BI__builtin_ia32_vpdpwssd128: + case X86::BI__builtin_ia32_vpdpwssd256: + case X86::BI__builtin_ia32_vpdpwssd512: + case X86::BI__builtin_ia32_vpdpwssds128: + case X86::BI__builtin_ia32_vpdpwssds256: + case X86::BI__builtin_ia32_vpdpwssds512: + case X86::BI__builtin_ia32_vpdpbusds128: + case X86::BI__builtin_ia32_vpdpbusds256: + case X86::BI__builtin_ia32_vpdpbusds512: + case X86::BI__builtin_ia32_vpdpbusd128: + case X86::BI__builtin_ia32_vpdpbusd256: + case X86::BI__builtin_ia32_vpdpbusd512: { + unsigned BuiltinID = Call->getBuiltinCallee(); + bool IsDottingWord; + bool IsSaturating; + switch (BuiltinID) { + case X86::BI__builtin_ia32_vpdpwssd128: + case X86::BI__builtin_ia32_vpdpwssd256: + case X86::BI__builtin_ia32_vpdpwssd512: + IsDottingWord = true; + IsSaturating = false; + break; + case X86::BI__builtin_ia32_vpdpwssds128: + case X86::BI__builtin_ia32_vpdpwssds256: + case X86::BI__builtin_ia32_vpdpwssds512: + IsDottingWord = true; + IsSaturating = true; + break; + case X86::BI__builtin_ia32_vpdpbusds128: + case X86::BI__builtin_ia32_vpdpbusds256: + case X86::BI__builtin_ia32_vpdpbusds512: + IsDottingWord = false; + IsSaturating = true; + break; + case X86::BI__builtin_ia32_vpdpbusd128: + case X86::BI__builtin_ia32_vpdpbusd256: + case X86::BI__builtin_ia32_vpdpbusd512: + IsDottingWord = false; + IsSaturating = false; + break; + } + return interp__builtin_ia32_vpdp(S, OpPC, Call, IsDottingWord, + IsSaturating); + } default: S.FFDiag(S.Current->getLocation(OpPC), diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 4f45fa728c605..ecbdb8cac301d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14634,6 +14634,86 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return false; return Success(R, E); } + case X86::BI__builtin_ia32_vpdpwssd128: + case X86::BI__builtin_ia32_vpdpwssd256: + case X86::BI__builtin_ia32_vpdpwssd512: + case X86::BI__builtin_ia32_vpdpwssds128: + case X86::BI__builtin_ia32_vpdpwssds256: + case X86::BI__builtin_ia32_vpdpwssds512: + case X86::BI__builtin_ia32_vpdpbusds128: + case X86::BI__builtin_ia32_vpdpbusds256: + case X86::BI__builtin_ia32_vpdpbusds512: + case X86::BI__builtin_ia32_vpdpbusd128: + case X86::BI__builtin_ia32_vpdpbusd256: + case X86::BI__builtin_ia32_vpdpbusd512: { + unsigned BuiltinID = E->getBuiltinCallee(); + bool IsDottingWord = false; + bool IsSaturating = false; + switch (BuiltinID) { + case X86::BI__builtin_ia32_vpdpwssd128: + case X86::BI__builtin_ia32_vpdpwssd256: + case X86::BI__builtin_ia32_vpdpwssd512: + IsDottingWord = true; + IsSaturating = false; + break; + case X86::BI__builtin_ia32_vpdpwssds128: + case X86::BI__builtin_ia32_vpdpwssds256: + case X86::BI__builtin_ia32_vpdpwssds512: + IsDottingWord = true; + IsSaturating = true; + break; + case X86::BI__builtin_ia32_vpdpbusds128: + case X86::BI__builtin_ia32_vpdpbusds256: + case X86::BI__builtin_ia32_vpdpbusds512: + IsDottingWord = false; + IsSaturating = true; + break; + case X86::BI__builtin_ia32_vpdpbusd128: + case X86::BI__builtin_ia32_vpdpbusd256: + case X86::BI__builtin_ia32_vpdpbusd512: + IsDottingWord = false; + IsSaturating = false; + break; + } + + APValue Source, OperandA, OperandB; + if (!EvaluateAsRValue(Info, E->getArg(0), Source) || + !EvaluateAsRValue(Info, E->getArg(1), OperandA) || + !EvaluateAsRValue(Info, E->getArg(2), OperandB)) { + return false; + } + + unsigned NumElements = Source.getVectorLength(); + + SmallVector<APValue, 16> Result; + Result.reserve(NumElements); + unsigned Iters = IsDottingWord ? 2 : 4; + for (unsigned I = 0; I < NumElements; ++I) { + APSInt DotProduct = Source.getVectorElt(I).getInt(); + DotProduct = DotProduct.sext(64); + for (unsigned J = 0; J < Iters; ++J) { + APSInt OpA; + if (IsDottingWord) { + OpA = APSInt(OperandA.getVectorElt(Iters * I + J).getInt().sext(64), + false); + } else { + OpA = APSInt(OperandA.getVectorElt(Iters * I + J).getInt().zext(64), + false); + } + APSInt OpB = APSInt( + OperandB.getVectorElt(Iters * I + J).getInt().sext(64), false); + DotProduct += OpA * OpB; + } + if (IsSaturating) { + DotProduct = APSInt(DotProduct.truncSSat(32), false); + } else { + DotProduct = APSInt(DotProduct.trunc(32), false); + } + Result.push_back(APValue(DotProduct)); + } + + return Success(APValue(Result.data(), Result.size()), E); + } } } diff --git a/clang/lib/Headers/avx512vlvnniintrin.h b/clang/lib/Headers/avx512vlvnniintrin.h index 4b8a199af32e5..053807032fcb3 100644 --- a/clang/lib/Headers/avx512vlvnniintrin.h +++ b/clang/lib/Headers/avx512vlvnniintrin.h @@ -24,6 +24,14 @@ __target__("avx512vl,avx512vnni"), \ __min_vector_width__(256))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer @@ -179,129 +187,115 @@ #define _mm_dpwssds_epi32(S, A, B) \ ((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v8hi)(A), (__v8hi)(B))) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpbusd_epi32(__S, __A, __B), (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpbusd_epi32(__S, __A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpbusds_epi32(__S, __A, __B), (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpbusds_epi32(__S, __A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpwssd_epi32(__S, __A, __B), (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpwssd_epi32(__S, __A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpwssds_epi32(__S, __A, __B), (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpwssds_epi32(__S, __A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpbusd_epi32(__S, __A, __B), (__v4si)__S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpbusd_epi32(__S, __A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpbusds_epi32(__S, __A, __B), (__v4si)__S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpbusds_epi32(__S, __A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpwssd_epi32(__S, __A, __B), (__v4si)__S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpwssd_epi32(__S, __A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpwssds_epi32(__S, __A, __B), (__v4si)__S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_dpwssds_epi32(__mmask8 __U, __... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/190549 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
