https://github.com/pierluigilenoci updated https://github.com/llvm/llvm-project/pull/188887
>From fa3f5ac7567fde45327eeaa6fa429bcfd4150592 Mon Sep 17 00:00:00 2001 From: Pierluigi Lenoci <[email protected]> Date: Fri, 27 Mar 2026 01:43:36 +0100 Subject: [PATCH 1/3] [Clang] Allow VDBPSADBW intrinsics to be used in constexpr Add constexpr evaluation support for the VDBPSADBW (Double Block Packed Sum-Absolute-Differences) intrinsics (__builtin_ia32_dbpsadbw128/256/512) in both the tree-based constant evaluator (ExprConstant.cpp) and the bytecode constexpr interpreter (InterpBuiltin.cpp). The VDBPSADBW instruction computes the sum of absolute differences of groups of 4 unsigned bytes from the second source against two 4-byte reference blocks selected from the first source by the immediate operand. Per 128-bit lane, imm8[1:0] selects blockA and imm8[3:2] selects blockB from the first source. For each group of 4 bytes in the second source, two SAD values are computed (one against each block), producing 8 result words per 128-bit lane. Care is taken to treat input bytes as unsigned (the builtin signature uses signed char vectors) by extracting via getZExtValue() and casting to uint8_t before computing absolute differences. Fixes #188747 Signed-off-by: Pierluigi Lenoci <[email protected]> --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 64 ++++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 57 +++++++++++++++++ clang/test/CodeGen/X86/avx512bw-builtins.c | 23 +++++-- clang/test/CodeGen/X86/avx512vlbw-builtins.c | 58 +++++++++++++++--- 4 files changed, 190 insertions(+), 12 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 214013396e885..d9a14f84e4a8a 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2820,6 +2820,65 @@ static bool interp__builtin_ia32_pmul( return true; } +static bool interp__builtin_ia32_dbpsadbw(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 3); + QualType Arg2Type = Call->getArg(2)->getType(); + APSInt ImmVal = popToAPSInt(S, Arg2Type); + unsigned Imm = ImmVal.getZExtValue(); + + const Pointer &Src2 = S.Stk.pop<Pointer>(); + const Pointer &Src1 = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + + const auto *SrcVT = Call->getArg(0)->getType()->castAs<VectorType>(); + PrimType SrcElemT = *S.getContext().classify(SrcVT->getElementType()); + unsigned SourceLen = SrcVT->getNumElements(); + + const auto *DestVT = Call->getType()->castAs<VectorType>(); + PrimType DestElemT = *S.getContext().classify(DestVT->getElementType()); + bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType(); + + unsigned LaneSize = 16; // 128-bit lane = 16 bytes + unsigned NumLanes = SourceLen / LaneSize; + unsigned BlockOffsetA = (Imm & 0x3) * 4; + unsigned BlockOffsetB = ((Imm >> 2) & 0x3) * 4; + + unsigned DstIdx = 0; + for (unsigned Lane = 0; Lane < NumLanes; ++Lane) { + unsigned LaneStart = Lane * LaneSize; + + for (unsigned J = 0; J < 4; ++J) { + unsigned SadA = 0; + unsigned SadB = 0; + for (unsigned K = 0; K < 4; ++K) { + unsigned A1Val, A2Val, BVal; + INT_TYPE_SWITCH_NO_BOOL(SrcElemT, { + // Treat as unsigned bytes + A1Val = static_cast<uint8_t>( + Src1.elem<T>(LaneStart + BlockOffsetA + K).toAPSInt().getZExtValue()); + A2Val = static_cast<uint8_t>( + Src1.elem<T>(LaneStart + BlockOffsetB + K).toAPSInt().getZExtValue()); + BVal = static_cast<uint8_t>( + Src2.elem<T>(LaneStart + 4 * J + K).toAPSInt().getZExtValue()); + }); + SadA += (BVal > A1Val) ? (BVal - A1Val) : (A1Val - BVal); + SadB += (BVal > A2Val) ? (BVal - A2Val) : (A2Val - BVal); + } + INT_TYPE_SWITCH_NO_BOOL(DestElemT, { + Dst.elem<T>(DstIdx) = + static_cast<T>(APSInt(APInt(16, SadA), DestUnsigned)); + Dst.elem<T>(DstIdx + 1) = + static_cast<T>(APSInt(APInt(16, SadB), DestUnsigned)); + }); + DstIdx += 2; + } + } + + Dst.initializeAllElements(); + return true; +} + static bool interp_builtin_horizontal_int_binop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) { @@ -4861,6 +4920,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, (HiLHS.sext(BitWidth) * HiRHS.sext(BitWidth)); }); + case clang::X86::BI__builtin_ia32_dbpsadbw128: + case clang::X86::BI__builtin_ia32_dbpsadbw256: + case clang::X86::BI__builtin_ia32_dbpsadbw512: + return interp__builtin_ia32_dbpsadbw(S, OpPC, Call); + case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 4f45fa728c605..fc4a4834b462a 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12564,6 +12564,63 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case clang::X86::BI__builtin_ia32_dbpsadbw128: + case clang::X86::BI__builtin_ia32_dbpsadbw256: + case clang::X86::BI__builtin_ia32_dbpsadbw512: { + APValue SourceA, SourceB, SourceImm; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceA) || + !EvaluateAsRValue(Info, E->getArg(1), SourceB) || + !EvaluateAsRValue(Info, E->getArg(2), SourceImm)) + return false; + + unsigned SourceLen = SourceA.getVectorLength(); + unsigned LaneSize = 16; // 128-bit lane = 16 bytes + unsigned NumLanes = SourceLen / LaneSize; + unsigned Imm = SourceImm.getInt().getZExtValue(); + unsigned BlockOffsetA = (Imm & 0x3) * 4; + unsigned BlockOffsetB = ((Imm >> 2) & 0x3) * 4; + + auto *DestTy = E->getType()->castAs<VectorType>(); + QualType DestEltTy = DestTy->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + SmallVector<APValue, 32> ResultElements; + ResultElements.reserve(SourceLen / 2); + + for (unsigned Lane = 0; Lane < NumLanes; ++Lane) { + unsigned LaneStart = Lane * LaneSize; + + for (unsigned J = 0; J < 4; ++J) { + // Compute SAD of SourceB[4*J..4*J+3] vs blockA from SourceA + unsigned SadA = 0; + unsigned SadB = 0; + for (unsigned K = 0; K < 4; ++K) { + // Treat input bytes as unsigned + unsigned A = static_cast<uint8_t>( + SourceA.getVectorElt(LaneStart + BlockOffsetA + K) + .getInt() + .getZExtValue()); + unsigned B = static_cast<uint8_t>( + SourceB.getVectorElt(LaneStart + 4 * J + K) + .getInt() + .getZExtValue()); + SadA += (B > A) ? (B - A) : (A - B); + + unsigned A2 = static_cast<uint8_t>( + SourceA.getVectorElt(LaneStart + BlockOffsetB + K) + .getInt() + .getZExtValue()); + SadB += (B > A2) ? (B - A2) : (A2 - B); + } + ResultElements.push_back( + APValue(APSInt(APInt(16, SadA), DestUnsigned))); + ResultElements.push_back( + APValue(APSInt(APInt(16, SadB), DestUnsigned))); + } + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 2020b72a649ae..488146e740db4 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -3230,21 +3230,36 @@ TEST_CONSTEXPR(match_v64qi(_mm512_maskz_alignr_epi8((__mmask64)0x000000000000000 __m512i test_mm512_mm_dbsad_epu8(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mm_dbsad_epu8 // CHECK: @llvm.x86.avx512.dbpsadbw.512 - return _mm512_dbsad_epu8(__A, __B, 170); -} + return _mm512_dbsad_epu8(__A, __B, 170); +} +// 512-bit: 4 lanes, imm8=0: blockA=blockB=lane[0..3] for each lane +// Each lane behaves the same as the 128-bit case with matching data +TEST_CONSTEXPR(match_v32hu(_mm512_dbsad_epu8( + ((__m512i)(__v64qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + ((__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), + 0), 4, 4, 20, 20, 36, 36, 52, 52, + 4, 4, 20, 20, 36, 36, 52, 52, + 4, 4, 20, 20, 36, 36, 52, 52, + 4, 4, 20, 20, 36, 36, 52, 52)); __m512i test_mm512_mm_mask_dbsad_epu8(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mm_mask_dbsad_epu8 // CHECK: @llvm.x86.avx512.dbpsadbw.512 //CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - return _mm512_mask_dbsad_epu8(__W, __U, __A, __B, 170); + return _mm512_mask_dbsad_epu8(__W, __U, __A, __B, 170); } __m512i test_mm512_mm_maskz_dbsad_epu8(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mm_maskz_dbsad_epu8 // CHECK: @llvm.x86.avx512.dbpsadbw.512 //CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - return _mm512_maskz_dbsad_epu8(__U, __A, __B, 170); + return _mm512_maskz_dbsad_epu8(__U, __A, __B, 170); } __m512i test_mm512_sad_epu8(__m512i __A, __m512i __B) { diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index 0ee14909ae805..098ee29b1989e 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -3676,41 +3676,83 @@ TEST_CONSTEXPR(match_v32qi(_mm256_maskz_alignr_epi8((__mmask32)0xf000000f, ((__m __m128i test_mm_dbsad_epu8(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dbsad_epu8 // CHECK: @llvm.x86.avx512.dbpsadbw.128 - return _mm_dbsad_epu8(__A, __B, 170); -} + return _mm_dbsad_epu8(__A, __B, 170); +} +// imm8=4: blockA=A[0..3]={0,1,2,3}, blockB=A[4..7]={4,5,6,7} +// J=0: B[0..3]={1,2,3,4} vs blockA=4, vs blockB=12 +// J=1: B[4..7]={5,6,7,8} vs blockA=20, vs blockB=4 +// J=2: B[8..11]={9,10,11,12} vs blockA=36, vs blockB=20 +// J=3: B[12..15]={13,14,15,16} vs blockA=52, vs blockB=36 +TEST_CONSTEXPR(match_v8hu(_mm_dbsad_epu8( + ((__m128i)(__v16qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + ((__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), + 4), 4, 12, 20, 4, 36, 20, 52, 36)); +// imm8=0: blockA=blockB=A[0..3]={0,1,2,3} +TEST_CONSTEXPR(match_v8hu(_mm_dbsad_epu8( + ((__m128i)(__v16qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + ((__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), + 0), 4, 4, 20, 20, 36, 36, 52, 52)); +// Test with unsigned values > 127 (signed overflow territory) +// A[0..3]={200,100,50,25}, B[0..3]={180,120,40,30} +// imm8=0: blockA=blockB=A[0..3] +// SAD = |180-200|+|120-100|+|40-50|+|30-25| = 20+20+10+5 = 55 +TEST_CONSTEXPR(match_v8hu(_mm_dbsad_epu8( + ((__m128i)(__v16qu){200, 100, 50, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), + ((__m128i)(__v16qu){180, 120, 40, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), + 0), 55, 55, 375, 375, 375, 375, 375, 375)); __m128i test_mm_mask_dbsad_epu8(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_dbsad_epu8 // CHECK: @llvm.x86.avx512.dbpsadbw.128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - return _mm_mask_dbsad_epu8(__W, __U, __A, __B, 170); + return _mm_mask_dbsad_epu8(__W, __U, __A, __B, 170); } +// Test masked version: mask=0x55 (keep even elements, passthrough odd) +TEST_CONSTEXPR(match_v8hu(_mm_mask_dbsad_epu8( + ((__m128i)(__v8hu){99, 99, 99, 99, 99, 99, 99, 99}), (__mmask8)0x55, + ((__m128i)(__v16qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + ((__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), + 4), 4, 99, 20, 99, 36, 99, 52, 99)); __m128i test_mm_maskz_dbsad_epu8(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_dbsad_epu8 // CHECK: @llvm.x86.avx512.dbpsadbw.128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - return _mm_maskz_dbsad_epu8(__U, __A, __B, 170); + return _mm_maskz_dbsad_epu8(__U, __A, __B, 170); } +// Test zero-masked version: mask=0xAA (keep odd elements, zero even) +TEST_CONSTEXPR(match_v8hu(_mm_maskz_dbsad_epu8((__mmask8)0xAA, + ((__m128i)(__v16qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + ((__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), + 4), 0, 12, 0, 4, 0, 20, 0, 36)); __m256i test_mm256_dbsad_epu8(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dbsad_epu8 // CHECK: @llvm.x86.avx512.dbpsadbw.256 - return _mm256_dbsad_epu8(__A, __B, 170); -} + return _mm256_dbsad_epu8(__A, __B, 170); +} +// 256-bit: 2 lanes, imm8=0: blockA=blockB=lane[0..3] +// Lane 0: same as 128-bit test above +// Lane 1: A[16..19]={16,17,18,19}, B[16..19]={17,18,19,20} -> SAD=4 +TEST_CONSTEXPR(match_v16hu(_mm256_dbsad_epu8( + ((__m256i)(__v32qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), + ((__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), + 0), 4, 4, 20, 20, 36, 36, 52, 52, 4, 4, 20, 20, 36, 36, 52, 52)); __m256i test_mm256_mask_dbsad_epu8(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_dbsad_epu8 // CHECK: @llvm.x86.avx512.dbpsadbw.256 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - return _mm256_mask_dbsad_epu8(__W, __U, __A, __B, 170); + return _mm256_mask_dbsad_epu8(__W, __U, __A, __B, 170); } __m256i test_mm256_maskz_dbsad_epu8(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_dbsad_epu8 // CHECK: @llvm.x86.avx512.dbpsadbw.256 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - return _mm256_maskz_dbsad_epu8(__U, __A, __B, 170); + return _mm256_maskz_dbsad_epu8(__U, __A, __B, 170); } __mmask8 test_mm_movepi16_mask(__m128i __A) { // CHECK-LABEL: test_mm_movepi16_mask >From 8b292ead3eb31c002f6deb2e7179b1b208f6076d Mon Sep 17 00:00:00 2001 From: Pierluigi Lenoci <[email protected]> Date: Fri, 27 Mar 2026 18:43:08 +0100 Subject: [PATCH 2/3] fix: address reviewer feedback for constexpr VDBPSADBW - Add Constexpr tags to BuiltinsX86.td for VDBPSADBW builtins - Update InterpBuiltin.cpp per tbaederr's suggestions: - Use popToUInt64 instead of popToAPSInt for immediate value - Use != instead of < in loop comparison - Simplify element access by removing unnecessary toAPSInt().getZExtValue() - Apply clang-format fix in ExprConstant.cpp Signed-off-by: Pierluigi Lenoci <[email protected]> --- clang/include/clang/Basic/BuiltinsX86.td | 6 +++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 12 +++++------- clang/lib/AST/ExprConstant.cpp | 8 ++++---- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index f47532a63de04..e54f8d66843bf 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -3197,15 +3197,15 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVect def palignr512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Constant int)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def dbpsadbw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>, _Constant int)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def dbpsadbw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def dbpsadbw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>, _Constant int)">; def psadbw512 : X86Builtin<"_Vector<8, long long int>(_Vector<64, char>, _Vector<64, char>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index d9a14f84e4a8a..5d46b2c595b1f 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2823,9 +2823,7 @@ static bool interp__builtin_ia32_pmul( static bool interp__builtin_ia32_dbpsadbw(InterpState &S, CodePtr OpPC, const CallExpr *Call) { assert(Call->getNumArgs() == 3); - QualType Arg2Type = Call->getArg(2)->getType(); - APSInt ImmVal = popToAPSInt(S, Arg2Type); - unsigned Imm = ImmVal.getZExtValue(); + unsigned Imm = popToUInt64(S, Call->getArg(2)); const Pointer &Src2 = S.Stk.pop<Pointer>(); const Pointer &Src1 = S.Stk.pop<Pointer>(); @@ -2845,7 +2843,7 @@ static bool interp__builtin_ia32_dbpsadbw(InterpState &S, CodePtr OpPC, unsigned BlockOffsetB = ((Imm >> 2) & 0x3) * 4; unsigned DstIdx = 0; - for (unsigned Lane = 0; Lane < NumLanes; ++Lane) { + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { unsigned LaneStart = Lane * LaneSize; for (unsigned J = 0; J < 4; ++J) { @@ -2856,11 +2854,11 @@ static bool interp__builtin_ia32_dbpsadbw(InterpState &S, CodePtr OpPC, INT_TYPE_SWITCH_NO_BOOL(SrcElemT, { // Treat as unsigned bytes A1Val = static_cast<uint8_t>( - Src1.elem<T>(LaneStart + BlockOffsetA + K).toAPSInt().getZExtValue()); + Src1.elem<T>(LaneStart + BlockOffsetA + K)); A2Val = static_cast<uint8_t>( - Src1.elem<T>(LaneStart + BlockOffsetB + K).toAPSInt().getZExtValue()); + Src1.elem<T>(LaneStart + BlockOffsetB + K)); BVal = static_cast<uint8_t>( - Src2.elem<T>(LaneStart + 4 * J + K).toAPSInt().getZExtValue()); + Src2.elem<T>(LaneStart + 4 * J + K)); }); SadA += (BVal > A1Val) ? (BVal - A1Val) : (A1Val - BVal); SadB += (BVal > A2Val) ? (BVal - A2Val) : (A2Val - BVal); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index fc4a4834b462a..2a6e1713fba4d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12599,10 +12599,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { SourceA.getVectorElt(LaneStart + BlockOffsetA + K) .getInt() .getZExtValue()); - unsigned B = static_cast<uint8_t>( - SourceB.getVectorElt(LaneStart + 4 * J + K) - .getInt() - .getZExtValue()); + unsigned B = + static_cast<uint8_t>(SourceB.getVectorElt(LaneStart + 4 * J + K) + .getInt() + .getZExtValue()); SadA += (B > A) ? (B - A) : (A - B); unsigned A2 = static_cast<uint8_t>( >From b2ef04823423b38720746a63a0a661a54852de90 Mon Sep 17 00:00:00 2001 From: Pierluigi Lenoci <[email protected]> Date: Sat, 28 Mar 2026 16:50:42 +0100 Subject: [PATCH 3/3] style: apply clang-format to modified files Signed-off-by: Pierluigi Lenoci <[email protected]> --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 5d46b2c595b1f..4ba611cf68013 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2853,12 +2853,11 @@ static bool interp__builtin_ia32_dbpsadbw(InterpState &S, CodePtr OpPC, unsigned A1Val, A2Val, BVal; INT_TYPE_SWITCH_NO_BOOL(SrcElemT, { // Treat as unsigned bytes - A1Val = static_cast<uint8_t>( - Src1.elem<T>(LaneStart + BlockOffsetA + K)); - A2Val = static_cast<uint8_t>( - Src1.elem<T>(LaneStart + BlockOffsetB + K)); - BVal = static_cast<uint8_t>( - Src2.elem<T>(LaneStart + 4 * J + K)); + A1Val = + static_cast<uint8_t>(Src1.elem<T>(LaneStart + BlockOffsetA + K)); + A2Val = + static_cast<uint8_t>(Src1.elem<T>(LaneStart + BlockOffsetB + K)); + BVal = static_cast<uint8_t>(Src2.elem<T>(LaneStart + 4 * J + K)); }); SadA += (BVal > A1Val) ? (BVal - A1Val) : (A1Val - BVal); SadB += (BVal > A2Val) ? (BVal - A2Val) : (A2Val - BVal); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
