https://github.com/kimsh02 updated https://github.com/llvm/llvm-project/pull/158703
>From e49dc71e548a561db03368317373c6b3f23ec47e Mon Sep 17 00:00:00 2001 From: kimsh02 <kimshaw...@icloud.com> Date: Mon, 15 Sep 2025 10:58:34 -0700 Subject: [PATCH 1/5] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add AVX512 VPTERNLOGD/VPTERNLOGQ intrinsics to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 16 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 64 ++++- clang/lib/AST/ExprConstant.cpp | 90 +++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 138 ++++++++++ clang/test/CodeGen/X86/avx512vl-builtins.c | 277 +++++++++++++++++++++ 5 files changed, 580 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 1a8645f99e281..a4335256f6deb 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -2398,28 +2398,36 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512> def psraq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; def psrld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; def psrlq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; +} + +let Features = "avx512f", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def pternlogd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def pternlogd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def pternlogq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">; def pternlogq512_maskz : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pternlogd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">; def pternlogd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def pternlogd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">; def pternlogd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pternlogq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">; def pternlogq128_maskz : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def pternlogq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">; def pternlogq256_maskz : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 4461731c25648..4004e9fca86b8 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2903,6 +2903,55 @@ static bool interp__builtin_elementwise_triop( return true; } +static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, + const CallExpr *Call, bool MaskZ) { + assert(Call->getNumArgs() == 5); + + const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>(); + unsigned DstLen = VecT->getNumElements(); + PrimType DstElemT = *S.getContext().classify(VecT->getElementType()); + + APSInt U = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(4))); + APSInt Imm = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(3))); + const Pointer &C = S.Stk.pop<Pointer>(); + const Pointer &B = S.Stk.pop<Pointer>(); + const Pointer &A = S.Stk.pop<Pointer>(); + + const Pointer &Dst = S.Stk.peek<Pointer>(); + + for (unsigned I = 0; I < DstLen; ++I) { + APSInt a, b, c; + INT_TYPE_SWITCH(DstElemT, { + a = A.elem<T>(I).toAPSInt(); + b = B.elem<T>(I).toAPSInt(); + c = C.elem<T>(I).toAPSInt(); + }); + + unsigned BitWidth = a.getBitWidth(); + APInt R(BitWidth, 0); + bool DstUnsigned = a.isUnsigned(); + + if (U[I]) { + for (unsigned Bit = 0; Bit < BitWidth; ++Bit) { + unsigned Idx = (a[Bit] << 2) | (b[Bit] << 1) | (c[Bit]); + R.setBitVal(Bit, Imm[Idx]); + } + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { + Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned)); + }); + } else if (MaskZ) { + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { + Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned)); + }); + } else { + INT_TYPE_SWITCH_NO_BOOL(DstElemT, + { Dst.elem<T>(I) = static_cast<T>(a); }); + } + } + Dst.initializeAllElements(); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -3543,7 +3592,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_selectpd_256: case X86::BI__builtin_ia32_selectpd_512: return interp__builtin_select(S, OpPC, Call); - + case X86::BI__builtin_ia32_pternlogd128_mask: + case X86::BI__builtin_ia32_pternlogd256_mask: + case X86::BI__builtin_ia32_pternlogd512_mask: + case X86::BI__builtin_ia32_pternlogq128_mask: + case X86::BI__builtin_ia32_pternlogq256_mask: + case X86::BI__builtin_ia32_pternlogq512_mask: + return interp__builtin_pternlog(S, OpPC, Call, false); + case X86::BI__builtin_ia32_pternlogd128_maskz: + case X86::BI__builtin_ia32_pternlogd256_maskz: + case X86::BI__builtin_ia32_pternlogd512_maskz: + case X86::BI__builtin_ia32_pternlogq128_maskz: + case X86::BI__builtin_ia32_pternlogq256_maskz: + case X86::BI__builtin_ia32_pternlogq512_maskz: + return interp__builtin_pternlog(S, OpPC, Call, true); case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 5145896930153..1f063308aeca5 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11997,6 +11997,96 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_pternlogd128_mask: + case X86::BI__builtin_ia32_pternlogd256_mask: + case X86::BI__builtin_ia32_pternlogd512_mask: + case X86::BI__builtin_ia32_pternlogq128_mask: + case X86::BI__builtin_ia32_pternlogq256_mask: + case X86::BI__builtin_ia32_pternlogq512_mask: { + APValue AValue, BValue, CValue, ImmValue, UValue; + if (!EvaluateAsRValue(Info, E->getArg(0), AValue) || + !EvaluateAsRValue(Info, E->getArg(1), BValue) || + !EvaluateAsRValue(Info, E->getArg(2), CValue) || + !EvaluateAsRValue(Info, E->getArg(3), ImmValue) || + !EvaluateAsRValue(Info, E->getArg(4), UValue)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + APInt Imm = ImmValue.getInt(); + APInt U = UValue.getInt(); + unsigned ResultLen = AValue.getVectorLength(); + SmallVector<APValue, 16> ResultElements; + ResultElements.reserve(ResultLen); + + for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) { + APInt ALane = AValue.getVectorElt(EltNum).getInt(); + APInt BLane = BValue.getVectorElt(EltNum).getInt(); + APInt CLane = CValue.getVectorElt(EltNum).getInt(); + + if (U[EltNum]) { + unsigned BitWidth = ALane.getBitWidth(); + APInt ResLane(BitWidth, 0); + + for (unsigned Bit = 0; Bit < BitWidth; ++Bit) { + unsigned ABit = ALane[Bit]; + unsigned BBit = BLane[Bit]; + unsigned CBit = CLane[Bit]; + + unsigned Idx = (ABit << 2) | (BBit << 1) | CBit; + ResLane.setBitVal(Bit, Imm[Idx]); + } + ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned))); + } else { + ResultElements.push_back(APValue(APSInt(ALane, DestUnsigned))); + } + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case X86::BI__builtin_ia32_pternlogd128_maskz: + case X86::BI__builtin_ia32_pternlogd256_maskz: + case X86::BI__builtin_ia32_pternlogd512_maskz: + case X86::BI__builtin_ia32_pternlogq128_maskz: + case X86::BI__builtin_ia32_pternlogq256_maskz: + case X86::BI__builtin_ia32_pternlogq512_maskz: { + APValue AValue, BValue, CValue, ImmValue, UValue; + if (!EvaluateAsRValue(Info, E->getArg(0), AValue) || + !EvaluateAsRValue(Info, E->getArg(1), BValue) || + !EvaluateAsRValue(Info, E->getArg(2), CValue) || + !EvaluateAsRValue(Info, E->getArg(3), ImmValue) || + !EvaluateAsRValue(Info, E->getArg(4), UValue)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + APInt Imm = ImmValue.getInt(); + APInt U = UValue.getInt(); + unsigned ResultLen = AValue.getVectorLength(); + SmallVector<APValue, 16> ResultElements; + ResultElements.reserve(ResultLen); + + for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) { + APInt ALane = AValue.getVectorElt(EltNum).getInt(); + APInt BLane = BValue.getVectorElt(EltNum).getInt(); + APInt CLane = CValue.getVectorElt(EltNum).getInt(); + + unsigned BitWidth = ALane.getBitWidth(); + APInt ResLane(BitWidth, 0); + + if (U[EltNum]) { + for (unsigned Bit = 0; Bit < BitWidth; ++Bit) { + unsigned ABit = ALane[Bit]; + unsigned BBit = BLane[Bit]; + unsigned CBit = CLane[Bit]; + + unsigned Idx = (ABit << 2) | (BBit << 1) | CBit; + ResLane.setBitVal(Bit, Imm[Idx]); + } + } + ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned))); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case Builtin::BI__builtin_elementwise_ctlz: case Builtin::BI__builtin_elementwise_cttz: { APValue SourceLHS; diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index f93216e546a63..9138bdef7f92b 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -6206,6 +6206,27 @@ __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) { // CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 240) return _mm512_ternarylogic_epi32(__A, __B, __C, _MM_TERNLOG_A); } +TEST_CONSTEXPR(match_v16si( + _mm512_ternarylogic_epi32( + ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC)); +TEST_CONSTEXPR(match_v16si( + _mm512_ternarylogic_epi32( + ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF)); +TEST_CONSTEXPR(match_v16si( + _mm512_ternarylogic_epi32( + ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_mask_ternarylogic_epi32 @@ -6213,6 +6234,30 @@ __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i _ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_ternarylogic_epi32(__A, __U, __B, __C, _MM_TERNLOG_B); } +TEST_CONSTEXPR(match_v16si( + _mm512_mask_ternarylogic_epi32( + ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + (__mmask16)0x3333, + ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0)); +TEST_CONSTEXPR(match_v16si( + _mm512_mask_ternarylogic_epi32( + ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + (__mmask16)0xCCCC, + ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF)); +TEST_CONSTEXPR(match_v16si( + _mm512_mask_ternarylogic_epi32( + ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + (__mmask16)0x5555, + ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9)); __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi32 @@ -6220,12 +6265,57 @@ __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> zeroinitializer return _mm512_maskz_ternarylogic_epi32(__U, __A, __B, __C, _MM_TERNLOG_C); } +TEST_CONSTEXPR(match_v16si( + _mm512_maskz_ternarylogic_epi32( + (__mmask16)0x3333, + ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0)); +TEST_CONSTEXPR(match_v16si( + _mm512_maskz_ternarylogic_epi32( + (__mmask16)0xCCCC, + ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF)); +TEST_CONSTEXPR(match_v16si( + _mm512_maskz_ternarylogic_epi32( + (__mmask16)0x5555, + ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_ternarylogic_epi64 // CHECK: @llvm.x86.avx512.pternlog.q.512({{.*}}, i32 192) return _mm512_ternarylogic_epi64(__A, __B, __C, _MM_TERNLOG_A & _MM_TERNLOG_B); } +TEST_CONSTEXPR(match_v8di( + _mm512_ternarylogic_epi64( + ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC)); +TEST_CONSTEXPR(match_v8di( + _mm512_ternarylogic_epi64( + ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF)); +TEST_CONSTEXPR(match_v8di( + _mm512_ternarylogic_epi64( + ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_mask_ternarylogic_epi64 @@ -6233,6 +6323,30 @@ __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_ternarylogic_epi64(__A, __U, __B, __C, _MM_TERNLOG_B | _MM_TERNLOG_C); } +TEST_CONSTEXPR(match_v8di( + _mm512_mask_ternarylogic_epi64( + ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + (__mmask8)0x33, + ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0)); +TEST_CONSTEXPR(match_v8di( + _mm512_mask_ternarylogic_epi64( + ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + (__mmask8)0xCC, + ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF)); +TEST_CONSTEXPR(match_v8di( + _mm512_mask_ternarylogic_epi64( + ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + (__mmask8)0x55, + ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9)); __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi64 @@ -6240,6 +6354,30 @@ __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i _ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> zeroinitializer return _mm512_maskz_ternarylogic_epi64(__U, __A, __B, __C, ~_MM_TERNLOG_A | (_MM_TERNLOG_B ^ _MM_TERNLOG_C)); } +TEST_CONSTEXPR(match_v8di( + _mm512_maskz_ternarylogic_epi64( + (__mmask8)0x33, + ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0)); +TEST_CONSTEXPR(match_v8di( + _mm512_maskz_ternarylogic_epi64( + (__mmask8)0xCC, + ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF)); +TEST_CONSTEXPR(match_v8di( + _mm512_maskz_ternarylogic_epi64( + (__mmask8)0x55, + ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) { // CHECK-LABEL: test_mm512_shuffle_f32x4 diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 9daecd0d9875f..b5058d1b516b9 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -8229,6 +8229,27 @@ __m128i test_mm_ternarylogic_epi32(__m128i __A, __m128i __B, __m128i __C) { // CHECK: @llvm.x86.avx512.pternlog.d.128 return _mm_ternarylogic_epi32(__A, __B, __C, 4); } +TEST_CONSTEXPR(match_v4si( + _mm_ternarylogic_epi32( + ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})), + ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})), + ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })), + (unsigned char)0xCA), + 0xB, 0xC, 0xB, 0xC)); +TEST_CONSTEXPR(match_v4si( + _mm_ternarylogic_epi32( + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0xF, 0xF, 0xF, 0xF)); +TEST_CONSTEXPR(match_v4si( + _mm_ternarylogic_epi32( + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0)); __m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_mask_ternarylogic_epi32 @@ -8236,6 +8257,30 @@ __m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B, // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_ternarylogic_epi32(__A, __U, __B, __C, 4); } +TEST_CONSTEXPR(match_v4si( + _mm_mask_ternarylogic_epi32( + ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})), + (__mmask8)0x03, + ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})), + ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })), + (unsigned char)0xCA), + 0xB, 0xC, -0x1, 0x0)); +TEST_CONSTEXPR(match_v4si( + _mm_mask_ternarylogic_epi32( + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + (__mmask8)0x0C, + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x9, 0x9, 0xF, 0xF)); +TEST_CONSTEXPR(match_v4si( + _mm_mask_ternarylogic_epi32( + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + (__mmask8)0x05, + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x9, 0x0, 0x9)); __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_maskz_ternarylogic_epi32 @@ -8243,12 +8288,57 @@ __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer return _mm_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); } +TEST_CONSTEXPR(match_v4si( + _mm_maskz_ternarylogic_epi32( + (__mmask8)0x03, + ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})), + ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})), + ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })), + (unsigned char)0xCA), + 0xB, 0xC, 0x0, 0x0)); +TEST_CONSTEXPR(match_v4si( + _mm_maskz_ternarylogic_epi32( + (__mmask8)0x0C, + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x0, 0x0, 0xF, 0xF)); +TEST_CONSTEXPR(match_v4si( + _mm_maskz_ternarylogic_epi32( + (__mmask8)0x05, + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0)); __m256i test_mm256_ternarylogic_epi32(__m256i __A, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_ternarylogic_epi32 // CHECK: @llvm.x86.avx512.pternlog.d.256 return _mm256_ternarylogic_epi32(__A, __B, __C, 4); } +TEST_CONSTEXPR(match_v8si( + _mm256_ternarylogic_epi32( + ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC)); +TEST_CONSTEXPR(match_v8si( + _mm256_ternarylogic_epi32( + ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF)); +TEST_CONSTEXPR(match_v8si( + _mm256_ternarylogic_epi32( + ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_mask_ternarylogic_epi32 @@ -8256,6 +8346,30 @@ __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_ternarylogic_epi32(__A, __U, __B, __C, 4); } +TEST_CONSTEXPR(match_v8si( + _mm256_mask_ternarylogic_epi32( + ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + (__mmask8)0x33, + ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0)); +TEST_CONSTEXPR(match_v8si( + _mm256_mask_ternarylogic_epi32( + ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + (__mmask8)0xCC, + ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF)); +TEST_CONSTEXPR(match_v8si( + _mm256_mask_ternarylogic_epi32( + ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + (__mmask8)0x55, + ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9)); __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_maskz_ternarylogic_epi32 @@ -8263,12 +8377,57 @@ __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i _ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> zeroinitializer return _mm256_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); } +TEST_CONSTEXPR(match_v8si( + _mm256_maskz_ternarylogic_epi32( + (__mmask8)0x33, + ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0)); +TEST_CONSTEXPR(match_v8si( + _mm256_maskz_ternarylogic_epi32( + (__mmask8)0xCC, + ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF)); +TEST_CONSTEXPR(match_v8si( + _mm256_maskz_ternarylogic_epi32( + (__mmask8)0x55, + ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m128i test_mm_ternarylogic_epi64(__m128i __A, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_ternarylogic_epi64 // CHECK: @llvm.x86.avx512.pternlog.q.128 return _mm_ternarylogic_epi64(__A, __B, __C, 4); } +TEST_CONSTEXPR(match_v2di( + _mm_ternarylogic_epi64( + ((__m128i)((__v2di){-0x1, 0x0})), + ((__m128i)((__v2di){0xB, 0xB})), + ((__m128i)((__v2di){0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC)); +TEST_CONSTEXPR(match_v2di( + _mm_ternarylogic_epi64( + ((__m128i)((__v2di){0x9, 0x9})), + ((__m128i)((__v2di){0x4, 0x4})), + ((__m128i)((__v2di){0x2, 0x2})), + (unsigned char)0xFE), + 0xF, 0xF)); +TEST_CONSTEXPR(match_v2di( + _mm_ternarylogic_epi64( + ((__m128i)((__v2di){0x9, 0x9})), + ((__m128i)((__v2di){0x4, 0x4})), + ((__m128i)((__v2di){0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0)); __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_mask_ternarylogic_epi64 @@ -8276,6 +8435,30 @@ __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B, // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_ternarylogic_epi64(__A, __U, __B, __C, 4); } +TEST_CONSTEXPR(match_v2di( + _mm_mask_ternarylogic_epi64( + ((__m128i)((__v2di){-0x1, 0x0})), + (__mmask8)0x33, + ((__m128i)((__v2di){0xB, 0xB})), + ((__m128i)((__v2di){0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC)); +TEST_CONSTEXPR(match_v2di( + _mm_mask_ternarylogic_epi64( + ((__m128i)((__v2di){0x9, 0x9})), + (__mmask8)0xCC, + ((__m128i)((__v2di){0x4, 0x4})), + ((__m128i)((__v2di){0x2, 0x2})), + (unsigned char)0xFE), + 0x9, 0x9)); +TEST_CONSTEXPR(match_v2di( + _mm_mask_ternarylogic_epi64( + ((__m128i)((__v2di){0x9, 0x9})), + (__mmask8)0x55, + ((__m128i)((__v2di){0x4, 0x4})), + ((__m128i)((__v2di){0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x9)); __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_maskz_ternarylogic_epi64 @@ -8283,12 +8466,57 @@ __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B, // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> zeroinitializer return _mm_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4); } +TEST_CONSTEXPR(match_v2di( + _mm_maskz_ternarylogic_epi64( + (__mmask8)0x03, + ((__m128i)((__v2di){-0x1, 0x0})), + ((__m128i)((__v2di){0xB, 0xB})), + ((__m128i)((__v2di){0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC)); +TEST_CONSTEXPR(match_v2di( + _mm_maskz_ternarylogic_epi64( + (__mmask8)0x0C, + ((__m128i)((__v2di){0x9, 0x9})), + ((__m128i)((__v2di){0x4, 0x4})), + ((__m128i)((__v2di){0x2, 0x2})), + (unsigned char)0xFE), + 0x0, 0x0)); +TEST_CONSTEXPR(match_v2di( + _mm_maskz_ternarylogic_epi64( + (__mmask8)0x05, + ((__m128i)((__v2di){0x9, 0x9})), + ((__m128i)((__v2di){0x4, 0x4})), + ((__m128i)((__v2di){0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0)); __m256i test_mm256_ternarylogic_epi64(__m256i __A, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_ternarylogic_epi64 // CHECK: @llvm.x86.avx512.pternlog.q.256 return _mm256_ternarylogic_epi64(__A, __B, __C, 4); } +TEST_CONSTEXPR(match_v4di( + _mm256_ternarylogic_epi64( + ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})), + ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0xB, 0xC)); +TEST_CONSTEXPR(match_v4di( + _mm256_ternarylogic_epi64( + ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0xF, 0xF, 0xF, 0xF)); +TEST_CONSTEXPR(match_v4di( + _mm256_ternarylogic_epi64( + ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0)); __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_mask_ternarylogic_epi64 @@ -8296,6 +8524,30 @@ __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_ternarylogic_epi64(__A, __U, __B, __C, 4); } +TEST_CONSTEXPR(match_v4di( + _mm256_mask_ternarylogic_epi64( + ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})), + (__mmask8)0x33, + ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, -0x1, 0x0)); +TEST_CONSTEXPR(match_v4di( + _mm256_mask_ternarylogic_epi64( + ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})), + (__mmask8)0xCC, + ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x9, 0x9, 0xF, 0xF)); +TEST_CONSTEXPR(match_v4di( + _mm256_mask_ternarylogic_epi64( + ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})), + (__mmask8)0x55, + ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x9, 0x0, 0x9)); __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_maskz_ternarylogic_epi64 @@ -8303,6 +8555,31 @@ __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i _ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> zeroinitializer return _mm256_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4); } +TEST_CONSTEXPR(match_v4di( + _mm256_maskz_ternarylogic_epi64( + (__mmask8)0x33, + ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})), + ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0x0, 0x0)); +TEST_CONSTEXPR(match_v4di( + _mm256_maskz_ternarylogic_epi64( + (__mmask8)0xCC, + ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x0, 0x0, 0xF, 0xF)); +TEST_CONSTEXPR(match_v4di( + _mm256_maskz_ternarylogic_epi64( + (__mmask8)0x55, + ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0)); + __m256 test_mm256_shuffle_f32x4(__m256 __A, __m256 __B) { // CHECK-LABEL: test_mm256_shuffle_f32x4 // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> >From 350385d402af5475f5e282066af98fd507583729 Mon Sep 17 00:00:00 2001 From: Shawn <kimshaw...@icloud.com> Date: Tue, 16 Sep 2025 00:05:54 -0700 Subject: [PATCH 2/5] Apply suggestion from @tbaederr Co-authored-by: Timm Baeder <tbae...@redhat.com> --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 4004e9fca86b8..b08c64397765e 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2919,7 +2919,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, const Pointer &Dst = S.Stk.peek<Pointer>(); - for (unsigned I = 0; I < DstLen; ++I) { + for (unsigned I = 0; I != DstLen; ++I) { APSInt a, b, c; INT_TYPE_SWITCH(DstElemT, { a = A.elem<T>(I).toAPSInt(); >From 47c3330c3f89049b86bea35cc9d32db400090756 Mon Sep 17 00:00:00 2001 From: Shawn <kimshaw...@icloud.com> Date: Tue, 16 Sep 2025 00:06:30 -0700 Subject: [PATCH 3/5] Apply suggestion from @tbaederr Co-authored-by: Timm Baeder <tbae...@redhat.com> --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b08c64397765e..6392c96cca050 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2932,7 +2932,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, bool DstUnsigned = a.isUnsigned(); if (U[I]) { - for (unsigned Bit = 0; Bit < BitWidth; ++Bit) { + for (unsigned Bit = 0; Bit != BitWidth; ++Bit) { unsigned Idx = (a[Bit] << 2) | (b[Bit] << 1) | (c[Bit]); R.setBitVal(Bit, Imm[Idx]); } >From c3790d8c7a51e43ea47413c6b5886134d8885b88 Mon Sep 17 00:00:00 2001 From: Shawn <kimshaw...@icloud.com> Date: Tue, 16 Sep 2025 00:09:14 -0700 Subject: [PATCH 4/5] Apply suggestion from @tbaederr Co-authored-by: Timm Baeder <tbae...@redhat.com> --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 6392c96cca050..729fdccd9e198 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3598,7 +3598,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_pternlogq128_mask: case X86::BI__builtin_ia32_pternlogq256_mask: case X86::BI__builtin_ia32_pternlogq512_mask: - return interp__builtin_pternlog(S, OpPC, Call, false); + return interp__builtin_pternlog(S, OpPC, Call, /*MaskZ=*/false); case X86::BI__builtin_ia32_pternlogd128_maskz: case X86::BI__builtin_ia32_pternlogd256_maskz: case X86::BI__builtin_ia32_pternlogd512_maskz: >From d80ac109b58e81b8b7fc44c3d66d338a2cbd2d76 Mon Sep 17 00:00:00 2001 From: Shawn <kimshaw...@icloud.com> Date: Tue, 16 Sep 2025 00:09:42 -0700 Subject: [PATCH 5/5] Apply suggestion from @tbaederr Co-authored-by: Timm Baeder <tbae...@redhat.com> --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 729fdccd9e198..091950f94d4c0 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3605,7 +3605,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_pternlogq128_maskz: case X86::BI__builtin_ia32_pternlogq256_maskz: case X86::BI__builtin_ia32_pternlogq512_maskz: - return interp__builtin_pternlog(S, OpPC, Call, true); + return interp__builtin_pternlog(S, OpPC, Call, /*MaskZ=*/true); case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits