https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/165513
>From eca52c0e1d9e79911f79d7339926e295fcbcf84f Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Tue, 28 Oct 2025 23:49:56 +0300 Subject: [PATCH 1/5] feat: VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow insertps intrinsic to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 5 ++- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 40 +++++++++++++++++++++--- clang/lib/AST/ExprConstant.cpp | 39 ++++++++++++++++++++--- clang/test/CodeGen/X86/sse41-builtins.c | 10 ++++++ 4 files changed, 85 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 0c85e280e748b..a431fc36b41c1 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -327,8 +327,11 @@ let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorW } } -let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; +} + +let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">; def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 8f23001ea5a39..b1f0832860476 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3358,7 +3358,8 @@ static bool interp__builtin_x86_byteshift( static bool interp__builtin_ia32_shuffle_generic( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> - GetSourceIndex) { + GetSourceIndex, + llvm::function_ref<bool(unsigned, unsigned)> ShouldZero = nullptr) { assert(Call->getNumArgs() == 3); unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue(); @@ -3373,9 +3374,20 @@ static bool interp__builtin_ia32_shuffle_generic( const Pointer &Dst = S.Stk.peek<Pointer>(); for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) { - auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); - const Pointer &Src = (SrcVecIdx == 0) ? A : B; - TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); }); + if (ShouldZero && ShouldZero(DstIdx, ShuffleMask)) { + // Zero out this element + if (ElemT == PT_Float) { + Dst.elem<Floating>(DstIdx) = Floating(S.getASTContext().getFloatTypeSemantics(VecT->getElementType())); + } else { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + Dst.elem<T>(DstIdx) = T::from(0); + }); + } + } else { + auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); + const Pointer &Src = (SrcVecIdx == 0) ? A : B; + TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); }); + } } Dst.initializeAllElements(); @@ -4348,6 +4360,26 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index}; }); + case X86::BI__builtin_ia32_insertps128: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, + [](unsigned DstIdx, unsigned Mask) { + // Bits [7:6]: select element from source vector Y (0-3) + // Bits [5:4]: select destination position (0-3) + unsigned SrcElem = (Mask >> 6) & 0x3; + unsigned DstElem = (Mask >> 4) & 0x3; + if (DstIdx == DstElem) { + // Insert element from source vector (B) at this position + return std::pair<unsigned, unsigned>{1, SrcElem}; + } else { + // Copy from destination vector (A) + return std::pair<unsigned, unsigned>{0, DstIdx}; + } + }, + [](unsigned DstIdx, unsigned Mask) { + // Bits [3:0]: zero mask + return (Mask & (1 << DstIdx)) != 0; + }); case X86::BI__builtin_ia32_pshufb128: case X86::BI__builtin_ia32_pshufb256: case X86::BI__builtin_ia32_pshufb512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 29ee089505125..17c966b8c9f4c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11622,7 +11622,8 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result, static bool evalShuffleGeneric( EvalInfo &Info, const CallExpr *Call, APValue &Out, llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> - GetSourceIndex) { + GetSourceIndex, + llvm::function_ref<bool(unsigned, unsigned)> ShouldZero = nullptr) { const auto *VT = Call->getType()->getAs<VectorType>(); if (!VT) @@ -11643,9 +11644,15 @@ static bool evalShuffleGeneric( ResultElements.reserve(NumElts); for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) { - auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); - const APValue &Src = (SrcVecIdx == 0) ? A : B; - ResultElements.push_back(Src.getVectorElt(SrcIdx)); + if (ShouldZero && ShouldZero(DstIdx, ShuffleMask)) { + // Zero out this element + QualType ElemTy = VT->getElementType(); + ResultElements.push_back(APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy)))); + } else { + auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); + const APValue &Src = (SrcVecIdx == 0) ? A : B; + ResultElements.push_back(Src.getVectorElt(SrcIdx)); + } } Out = APValue(ResultElements.data(), ResultElements.size()); @@ -12481,6 +12488,30 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return false; return Success(R, E); } + case X86::BI__builtin_ia32_insertps128: { + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, unsigned> { + // Bits [7:6]: select element from source vector Y (0-3) + // Bits [5:4]: select destination position (0-3) + unsigned SrcElem = (Mask >> 6) & 0x3; + unsigned DstElem = (Mask >> 4) & 0x3; + if (DstIdx == DstElem) { + // Insert element from source vector (B) at this position + return {1, SrcElem}; + } else { + // Copy from destination vector (A) + return {0, DstIdx}; + } + }, + [](unsigned DstIdx, unsigned Mask) -> bool { + // Bits [3:0]: zero mask + return (Mask & (1 << DstIdx)) != 0; + })) + return false; + return Success(R, E); + } case X86::BI__builtin_ia32_pshufb128: case X86::BI__builtin_ia32_pshufb256: case X86::BI__builtin_ia32_pshufb512: { diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 62cd392824bb2..35fa65a99836b 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -307,6 +307,16 @@ __m128 test_mm_insert_ps(__m128 x, __m128 y) { return _mm_insert_ps(x, y, 4); } +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x10), 1.0f, 10.0f, 3.0f, 4.0f))); // Insert Y[0] into X[1] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x00), 10.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x20), 1.0f, 2.0f, 10.0f, 4.0f))); // Insert Y[0] into X[2] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x30), 1.0f, 2.0f, 3.0f, 10.0f))); // Insert Y[0] into X[3] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x80), 30.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[2] into X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x01), 0.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0], zero X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0A), 10.0f, 0.0f, 3.0f, 0.0f))); // Insert Y[0] into X[0], zero X[1] and X[3] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0F), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[0] into X[0], zero all +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0xCF), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[3] into X[0], zero all + __m128i test_mm_max_epi8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epi8 // CHECK: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) >From 578e7f7152c4ed04c9008d02f17e10b63930840b Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Wed, 29 Oct 2025 10:36:13 +0300 Subject: [PATCH 2/5] chore: apply formatting --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 7 +++---- clang/lib/AST/ExprConstant.cpp | 6 ++++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index c4d92804d9c4a..117b10b1c6c0b 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3431,11 +3431,10 @@ static bool interp__builtin_ia32_shuffle_generic( if (ShouldZero && ShouldZero(DstIdx, ShuffleMask)) { // Zero out this element if (ElemT == PT_Float) { - Dst.elem<Floating>(DstIdx) = Floating(S.getASTContext().getFloatTypeSemantics(VecT->getElementType())); + Dst.elem<Floating>(DstIdx) = Floating( + S.getASTContext().getFloatTypeSemantics(VecT->getElementType())); } else { - INT_TYPE_SWITCH_NO_BOOL(ElemT, { - Dst.elem<T>(DstIdx) = T::from(0); - }); + INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); }); } } else { auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index d27e923949c1f..d406ac52f1121 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11647,7 +11647,8 @@ static bool evalShuffleGeneric( if (ShouldZero && ShouldZero(DstIdx, ShuffleMask)) { // Zero out this element QualType ElemTy = VT->getElementType(); - ResultElements.push_back(APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy)))); + ResultElements.push_back( + APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy)))); } else { auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); const APValue &Src = (SrcVecIdx == 0) ? A : B; @@ -12492,7 +12493,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { APValue R; if (!evalShuffleGeneric( Info, E, R, - [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, unsigned> { + [](unsigned DstIdx, + unsigned Mask) -> std::pair<unsigned, unsigned> { // Bits [7:6]: select element from source vector Y (0-3) // Bits [5:4]: select destination position (0-3) unsigned SrcElem = (Mask >> 6) & 0x3; >From 11e06ea5c30034288dd2ec84f6a178523b5c5202 Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Thu, 30 Oct 2025 16:24:23 +0300 Subject: [PATCH 3/5] chore: PR Feedback --- clang/include/clang/Basic/BuiltinsX86.td | 5 +---- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 26 ++++++++++++------------ 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index b6cb475e25ab1..d9e9c91b8141b 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -327,10 +327,6 @@ let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorW } } -let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { - def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; -} - let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">; def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; @@ -345,6 +341,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; def ptestc128 diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 117b10b1c6c0b..a0f0a1c11607d 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3411,9 +3411,8 @@ static bool interp__builtin_x86_byteshift( static bool interp__builtin_ia32_shuffle_generic( InterpState &S, CodePtr OpPC, const CallExpr *Call, - llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> - GetSourceIndex, - llvm::function_ref<bool(unsigned, unsigned)> ShouldZero = nullptr) { + llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)> + GetSourceIndex) { assert(Call->getNumArgs() == 3); unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue(); @@ -3428,7 +3427,9 @@ static bool interp__builtin_ia32_shuffle_generic( const Pointer &Dst = S.Stk.peek<Pointer>(); for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) { - if (ShouldZero && ShouldZero(DstIdx, ShuffleMask)) { + auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); + + if (SrcIdx < 0) { // Zero out this element if (ElemT == PT_Float) { Dst.elem<Floating>(DstIdx) = Floating( @@ -3437,7 +3438,6 @@ static bool interp__builtin_ia32_shuffle_generic( INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); }); } } else { - auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); const Pointer &Src = (SrcVecIdx == 0) ? A : B; TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); }); } @@ -4393,7 +4393,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index}; + return std::pair<unsigned, int>{SrcIdx, static_cast<int>(LaneOffset + Index)}; }); case X86::BI__builtin_ia32_shufpd: case X86::BI__builtin_ia32_shufpd256: @@ -4411,27 +4411,27 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index}; + return std::pair<unsigned, int>{SrcIdx, static_cast<int>(LaneOffset + Index)}; }); case X86::BI__builtin_ia32_insertps128: return interp__builtin_ia32_shuffle_generic( S, OpPC, Call, [](unsigned DstIdx, unsigned Mask) { + // Bits [3:0]: zero mask - if bit is set, zero this element + if ((Mask & (1 << DstIdx)) != 0) { + return std::pair<unsigned, int>{0, -1}; + } // Bits [7:6]: select element from source vector Y (0-3) // Bits [5:4]: select destination position (0-3) unsigned SrcElem = (Mask >> 6) & 0x3; unsigned DstElem = (Mask >> 4) & 0x3; if (DstIdx == DstElem) { // Insert element from source vector (B) at this position - return std::pair<unsigned, unsigned>{1, SrcElem}; + return std::pair<unsigned, int>{1, static_cast<int>(SrcElem)}; } else { // Copy from destination vector (A) - return std::pair<unsigned, unsigned>{0, DstIdx}; + return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)}; } - }, - [](unsigned DstIdx, unsigned Mask) { - // Bits [3:0]: zero mask - return (Mask & (1 << DstIdx)) != 0; }); case X86::BI__builtin_ia32_pshufb128: case X86::BI__builtin_ia32_pshufb256: >From b1b98bd0ab4f54ab533b6db9f1d90a69977776eb Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Thu, 30 Oct 2025 16:30:40 +0300 Subject: [PATCH 4/5] chore: Format files --- clang/include/clang/Basic/BuiltinsX86.td | 3 ++- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index d9e9c91b8141b..9e877b92eac68 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -341,7 +341,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { - def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; + def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, " + "_Vector<4, float>, _Constant char)">; def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; def ptestc128 diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index a0f0a1c11607d..31b48172401f9 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3428,7 +3428,7 @@ static bool interp__builtin_ia32_shuffle_generic( for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) { auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); - + if (SrcIdx < 0) { // Zero out this element if (ElemT == PT_Float) { @@ -4393,7 +4393,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return std::pair<unsigned, int>{SrcIdx, static_cast<int>(LaneOffset + Index)}; + return std::pair<unsigned, int>{SrcIdx, + static_cast<int>(LaneOffset + Index)}; }); case X86::BI__builtin_ia32_shufpd: case X86::BI__builtin_ia32_shufpd256: @@ -4411,12 +4412,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return std::pair<unsigned, int>{SrcIdx, static_cast<int>(LaneOffset + Index)}; + return std::pair<unsigned, int>{SrcIdx, + static_cast<int>(LaneOffset + Index)}; }); case X86::BI__builtin_ia32_insertps128: return interp__builtin_ia32_shuffle_generic( - S, OpPC, Call, - [](unsigned DstIdx, unsigned Mask) { + S, OpPC, Call, [](unsigned DstIdx, unsigned Mask) { // Bits [3:0]: zero mask - if bit is set, zero this element if ((Mask & (1 << DstIdx)) != 0) { return std::pair<unsigned, int>{0, -1}; >From d0d22764405032d89c142da31e2feee300b8af05 Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Thu, 30 Oct 2025 19:11:27 +0300 Subject: [PATCH 5/5] chore: PR Feedback --- clang/lib/AST/ExprConstant.cpp | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index d406ac52f1121..97eeba8b9d6cc 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11621,9 +11621,8 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result, static bool evalShuffleGeneric( EvalInfo &Info, const CallExpr *Call, APValue &Out, - llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> - GetSourceIndex, - llvm::function_ref<bool(unsigned, unsigned)> ShouldZero = nullptr) { + llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)> + GetSourceIndex) { const auto *VT = Call->getType()->getAs<VectorType>(); if (!VT) @@ -11644,13 +11643,14 @@ static bool evalShuffleGeneric( ResultElements.reserve(NumElts); for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) { - if (ShouldZero && ShouldZero(DstIdx, ShuffleMask)) { + auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); + + if (SrcIdx < 0) { // Zero out this element QualType ElemTy = VT->getElementType(); ResultElements.push_back( APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy)))); } else { - auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); const APValue &Src = (SrcVecIdx == 0) ? A : B; ResultElements.push_back(Src.getVectorElt(SrcIdx)); } @@ -12446,7 +12446,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!evalShuffleGeneric( Info, E, R, [](unsigned DstIdx, - unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { + unsigned ShuffleMask) -> std::pair<unsigned, int> { constexpr unsigned LaneBits = 128u; unsigned NumElemPerLane = LaneBits / 32; unsigned NumSelectableElems = NumElemPerLane / 2; @@ -12459,7 +12459,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return {SrcIdx, LaneOffset + Index}; + return {SrcIdx, static_cast<int>(LaneOffset + Index)}; })) return false; return Success(R, E); @@ -12471,7 +12471,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!evalShuffleGeneric( Info, E, R, [](unsigned DstIdx, - unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { + unsigned ShuffleMask) -> std::pair<unsigned, int> { constexpr unsigned LaneBits = 128u; unsigned NumElemPerLane = LaneBits / 64; unsigned NumSelectableElems = NumElemPerLane / 2; @@ -12484,7 +12484,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return {SrcIdx, LaneOffset + Index}; + return {SrcIdx, static_cast<int>(LaneOffset + Index)}; })) return false; return Success(R, E); @@ -12493,23 +12493,22 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { APValue R; if (!evalShuffleGeneric( Info, E, R, - [](unsigned DstIdx, - unsigned Mask) -> std::pair<unsigned, unsigned> { + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { + // Bits [3:0]: zero mask - if bit is set, zero this element + if ((Mask & (1 << DstIdx)) != 0) { + return {0, -1}; + } // Bits [7:6]: select element from source vector Y (0-3) // Bits [5:4]: select destination position (0-3) unsigned SrcElem = (Mask >> 6) & 0x3; unsigned DstElem = (Mask >> 4) & 0x3; if (DstIdx == DstElem) { // Insert element from source vector (B) at this position - return {1, SrcElem}; + return {1, static_cast<int>(SrcElem)}; } else { // Copy from destination vector (A) - return {0, DstIdx}; + return {0, static_cast<int>(DstIdx)}; } - }, - [](unsigned DstIdx, unsigned Mask) -> bool { - // Bits [3:0]: zero mask - return (Mask & (1 << DstIdx)) != 0; })) return false; return Success(R, E); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
