https://github.com/Mohxen updated https://github.com/llvm/llvm-project/pull/169253
>From 3a8bdd4672407e9b10ca4c26393c0ca331f4f69a Mon Sep 17 00:00:00 2001 From: Mohxen <[email protected]> Date: Mon, 4 May 2026 13:27:35 -0400 Subject: [PATCH 1/3] [Clang] Add constexpr support for x86 PSADBW intrinsics --- clang/include/clang/Basic/BuiltinsX86.td | 15 ++------ clang/lib/AST/ByteCode/InterpBuiltin.cpp | 44 ++++++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 32 ++++++++++++++++ clang/lib/Headers/avx2intrin.h | 2 +- clang/lib/Headers/avx512bwintrin.h | 2 +- clang/lib/Headers/emmintrin.h | 4 +- clang/lib/Headers/xmmintrin.h | 2 +- clang/test/CodeGen/X86/avx2-builtins.c | 9 +++++ clang/test/CodeGen/X86/avx512bw-builtins.c | 22 ++++++++++- clang/test/CodeGen/X86/mmx-builtins.c | 3 ++ clang/test/CodeGen/X86/sse2-builtins.c | 5 +++ 11 files changed, 122 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index b0f95d98b8471..a4aaac773623d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -173,13 +173,13 @@ let Features = "sse2", Attributes = [NoThrow] in { let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">; def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">; + def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">; } let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">; } let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">; def cvtpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">; def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">; def cvtsd2si : X86Builtin<"int(_Vector<2, double>)">; @@ -574,15 +574,11 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">; } -let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def mpsadbw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; def psadbw256 : X86Builtin< "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; -} - -let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { - def mpsadbw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; - def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long " @@ -3178,6 +3174,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def palignr512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Constant int)">; + def psadbw512 : X86Builtin<"_Vector<8, long long int>(_Vector<64, char>, _Vector<64, char>)">; } let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { @@ -3192,10 +3189,6 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVect def dbpsadbw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>, _Constant int)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def psadbw512 : X86Builtin<"_Vector<8, long long int>(_Vector<64, char>, _Vector<64, char>)">; -} - let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def compressdf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">; def compressdi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index e59d14db896a2..67083241e238e 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2949,6 +2949,45 @@ static bool interp__builtin_ia32_pmul( return true; } +static bool interp__builtin_ia32_psadbw(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 2); + + const Pointer &RHS = S.Stk.pop<Pointer>(); + const Pointer &LHS = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + + const auto *SrcVT = Call->getArg(0)->getType()->castAs<VectorType>(); + PrimType SrcElemT = *S.getContext().classify(SrcVT->getElementType()); + unsigned SourceLen = SrcVT->getNumElements(); + assert((SourceLen % 8) == 0); + + const auto *DestVT = Call->getType()->castAs<VectorType>(); + PrimType DestElemT = *S.getContext().classify(DestVT->getElementType()); + bool DestUnsigned = + DestVT->getElementType()->isUnsignedIntegerOrEnumerationType(); + + unsigned DstElem = 0; + for (unsigned Lane = 0; Lane != SourceLen; Lane += 8) { + APInt Sum(64, 0); + for (unsigned I = 0; I != 8; ++I) { + INT_TYPE_SWITCH_NO_BOOL(SrcElemT, { + APSInt L = LHS.elem<T>(Lane + I).toAPSInt(); + APSInt R = RHS.elem<T>(Lane + I).toAPSInt(); + Sum += llvm::APIntOps::abdu(L.extOrTrunc(8), R.extOrTrunc(8)).zext(64); + }); + } + + INT_TYPE_SWITCH_NO_BOOL(DestElemT, { + Dst.elem<T>(DstElem) = static_cast<T>(APSInt(Sum, DestUnsigned)); + }); + ++DstElem; + } + + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_ia32_dbpsadbw(InterpState &S, CodePtr OpPC, const CallExpr *Call) { assert(Call->getNumArgs() == 3); @@ -5422,6 +5461,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, (HiLHS.sext(BitWidth) * HiRHS.sext(BitWidth)); }); + case clang::X86::BI__builtin_ia32_psadbw128: + case clang::X86::BI__builtin_ia32_psadbw256: + case clang::X86::BI__builtin_ia32_psadbw512: + return interp__builtin_ia32_psadbw(S, OpPC, Call); + case clang::X86::BI__builtin_ia32_dbpsadbw128: case clang::X86::BI__builtin_ia32_dbpsadbw256: case clang::X86::BI__builtin_ia32_dbpsadbw512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 8efceff7e8c31..692b696df7e7a 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12629,6 +12629,38 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { .extractBits(16, 1); }); + case clang::X86::BI__builtin_ia32_psadbw128: + case clang::X86::BI__builtin_ia32_psadbw256: + case clang::X86::BI__builtin_ia32_psadbw512: { + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + + assert(SourceLHS.isVector() && SourceRHS.isVector()); + unsigned SourceLen = SourceLHS.getVectorLength(); + assert(SourceLen == SourceRHS.getVectorLength()); + assert((SourceLen % 8) == 0); + + auto *DestTy = E->getType()->castAs<VectorType>(); + QualType DestEltTy = DestTy->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + SmallVector<APValue, 8> ResultElements; + ResultElements.reserve(SourceLen / 8); + + for (unsigned Lane = 0; Lane != SourceLen; Lane += 8) { + APInt Sum(64, 0); + for (unsigned I = 0; I != 8; ++I) { + APInt LHS = SourceLHS.getVectorElt(Lane + I).getInt().extOrTrunc(8); + APInt RHS = SourceRHS.getVectorElt(Lane + I).getInt().extOrTrunc(8); + Sum += llvm::APIntOps::abdu(LHS, RHS).zext(64); + } + ResultElements.push_back(APValue(APSInt(Sum, DestUnsigned))); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case clang::X86::BI__builtin_ia32_pmaddubsw128: case clang::X86::BI__builtin_ia32_pmaddubsw256: case clang::X86::BI__builtin_ia32_pmaddubsw512: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index d3ceb2327ac62..2c91258253041 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1810,7 +1810,7 @@ _mm256_or_si256(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sad_epu8(__m256i __a, __m256i __b) { return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b); diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 83cabc0dfb5ac..d5314782517b8 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1872,7 +1872,7 @@ _mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __A, (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ (__v32hi)_mm512_setzero_si512())) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sad_epu8 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A, diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 43c93263f015a..940868e4570cc 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2481,8 +2481,8 @@ _mm_mul_epu32(__m128i __a, __m128i __b) { /// A 128-bit integer vector containing one of the source operands. /// \returns A [2 x i64] vector containing the sums of the sets of absolute /// differences between both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sad_epu8(__m128i __a, __m128i __b) { return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index efc0e6ce47e7d..143b4ea37216d 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2571,7 +2571,7 @@ _mm_avg_pu16(__m64 __a, __m64 __b) { /// \returns A 64-bit integer vector whose lower 16 bits contain the sums of the /// sets of absolute differences between both operands. The upper bits are /// cleared. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_sad_pu8(__m64 __a, __m64 __b) { return __trunc64(__builtin_ia32_psadbw128((__v16qi)__zext128(__a), diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index cb14d1aafedde..a8df1a279ec37 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1185,6 +1185,15 @@ __m256i test_mm256_sad_epu8(__m256i x, __m256i y) { // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_sad_epu8(x, y); } +TEST_CONSTEXPR(match_m256i(_mm256_sad_epu8((__m256i)(__v32qu){0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31}, + (__m256i)(__v32qu){31, 30, 29, 28, 27, 26, 25, 24, + 23, 22, 21, 20, 19, 18, 17, 16, + 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0}), + 192ULL, 64ULL, 64ULL, 192ULL)); __m256i test_mm256_shuffle_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_shuffle_epi8 diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 945789b18dd8a..427efbbb99af8 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -3295,8 +3295,26 @@ TEST_CONSTEXPR(match_v32hu(_mm512_maskz_dbsad_epu8((__mmask32)0xAAAAAAAA, __m512i test_mm512_sad_epu8(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_sad_epu8 // CHECK: @llvm.x86.avx512.psad.bw.512 - return _mm512_sad_epu8(__A, __B); -} + return _mm512_sad_epu8(__A, __B); +} +TEST_CONSTEXPR(match_m512i(_mm512_sad_epu8((__m512i)(__v64qu){0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63}, + (__m512i)(__v64qu){63, 62, 61, 60, 59, 58, 57, 56, + 55, 54, 53, 52, 51, 50, 49, 48, + 47, 46, 45, 44, 43, 42, 41, 40, + 39, 38, 37, 36, 35, 34, 33, 32, + 31, 30, 29, 28, 27, 26, 25, 24, + 23, 22, 21, 20, 19, 18, 17, 16, + 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0}), + 448ULL, 320ULL, 192ULL, 64ULL, + 64ULL, 192ULL, 320ULL, 448ULL)); __mmask32 test_mm512_movepi16_mask(__m512i __A) { // CHECK-LABEL: test_mm512_movepi16_mask diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 37d6306ecdb7d..90b42ba3cf099 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -486,6 +486,9 @@ __m64 test_mm_sad_pu8(__m64 a, __m64 b) { // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> return _mm_sad_pu8(a, b); } +TEST_CONSTEXPR(match_m64(_mm_sad_pu8((__m64)(__v8qu){0, 1, 2, 3, 4, 5, 6, 7}, + (__m64)(__v8qu){7, 6, 5, 4, 3, 2, 1, 0}), + 32ULL)); __m64 test_mm_set_pi8(char a, char b, char c, char d, char e, char f, char g, char h) { // CHECK-LABEL: test_mm_set_pi8 diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 2993b8bb719d6..3e36c0047baf0 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1075,6 +1075,11 @@ __m128i test_mm_sad_epu8(__m128i A, __m128i B) { // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_sad_epu8(A, B); } +TEST_CONSTEXPR(match_m128i(_mm_sad_epu8((__m128i)(__v16qu){0, 1, 2, 3, 4, 5, 6, 7, + 255, 254, 253, 252, 251, 250, 249, 248}, + (__m128i)(__v16qu){7, 6, 5, 4, 3, 2, 1, 0, + 248, 249, 250, 251, 252, 253, 254, 255}), + 32ULL, 32ULL)); __m128i test_mm_set_epi8(char A, char B, char C, char D, char E, char F, char G, char H, >From 23a50bee391268964fa4e15edd2e29d3434ab79d Mon Sep 17 00:00:00 2001 From: Mohxen <[email protected]> Date: Tue, 19 May 2026 08:38:10 -0400 Subject: [PATCH 2/3] Address PSADBW unsigned byte builtin signatures --- clang/include/clang/Basic/BuiltinsX86.td | 6 +++--- clang/lib/Headers/avx2intrin.h | 5 ++--- clang/lib/Headers/avx512bwintrin.h | 8 +++----- clang/lib/Headers/emmintrin.h | 2 +- clang/lib/Headers/xmmintrin.h | 7 +++---- 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index a4aaac773623d..a7d78f1b6d826 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -173,7 +173,7 @@ let Features = "sse2", Attributes = [NoThrow] in { let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">; def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">; - def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">; + def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, unsigned char>, _Vector<16, unsigned char>)">; } let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">; @@ -578,7 +578,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def mpsadbw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; def psadbw256 : X86Builtin< - "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; + "_Vector<4, long long int>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">; def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long " @@ -3174,7 +3174,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def palignr512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Constant int)">; - def psadbw512 : X86Builtin<"_Vector<8, long long int>(_Vector<64, char>, _Vector<64, char>)">; + def psadbw512 : X86Builtin<"_Vector<8, long long int>(_Vector<64, unsigned char>, _Vector<64, unsigned char>)">; } let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 2c91258253041..8eb22fe519e7d 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1811,9 +1811,8 @@ _mm256_or_si256(__m256i __a, __m256i __b) /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_sad_epu8(__m256i __a, __m256i __b) -{ - return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b); +_mm256_sad_epu8(__m256i __a, __m256i __b) { + return __builtin_ia32_psadbw256((__v32qu)__a, (__v32qu)__b); } /// Shuffles 8-bit integers in the 256-bit integer vector \a __a according diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index d5314782517b8..5917eeaaa38fe 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1872,11 +1872,9 @@ _mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __A, (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ (__v32hi)_mm512_setzero_si512())) -static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sad_epu8 (__m512i __A, __m512i __B) -{ - return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A, - (__v64qi) __B); +static __inline__ __m512i + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sad_epu8(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_psadbw512((__v64qu)__A, (__v64qu)__B); } #undef __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 940868e4570cc..3f039edf87e81 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2483,7 +2483,7 @@ _mm_mul_epu32(__m128i __a, __m128i __b) { /// differences between both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sad_epu8(__m128i __a, __m128i __b) { - return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); + return __builtin_ia32_psadbw128((__v16qu)__a, (__v16qu)__b); } /// Subtracts the corresponding 8-bit integer values in the operands. diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 143b4ea37216d..73eab9e460ca5 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2572,10 +2572,9 @@ _mm_avg_pu16(__m64 __a, __m64 __b) { /// sets of absolute differences between both operands. The upper bits are /// cleared. static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR -_mm_sad_pu8(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_psadbw128((__v16qi)__zext128(__a), - (__v16qi)__zext128(__b))); +_mm_sad_pu8(__m64 __a, __m64 __b) { + return __trunc64(__builtin_ia32_psadbw128((__v16qu)__zext128(__a), + (__v16qu)__zext128(__b))); } #if defined(__cplusplus) >From 053d28159370d88fcfc8307beda8c13a1eb9a4ef Mon Sep 17 00:00:00 2001 From: Mohxen <[email protected]> Date: Tue, 23 Jun 2026 21:34:10 -0400 Subject: [PATCH 3/3] [clang][X86] Fix psadbw builtin smoke test argument type --- clang/test/CodeGen/builtins-x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index 0f66d8c4e3120..f67ad13d97208 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -21,6 +21,7 @@ typedef float V2f __attribute__((vector_size(8))); // 128-bit typedef char V16c __attribute__((vector_size(16))); +typedef unsigned char V16Uc __attribute__((vector_size(16))); typedef signed short V8s __attribute__((vector_size(16))); typedef unsigned short V8u __attribute__((vector_size(16))); typedef signed int V4i __attribute__((vector_size(16))); @@ -289,7 +290,7 @@ void f0(void) { #ifdef USE_64 (void) __builtin_ia32_movnti64(tmp_LLip, tmp_LLi); #endif - tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c); + tmp_V2LLi = __builtin_ia32_psadbw128((V16Uc)tmp_V16c, (V16Uc)tmp_V16c); tmp_V4i = __builtin_ia32_cvtpd2dq(tmp_V2d); tmp_V4f = __builtin_ia32_cvtpd2ps(tmp_V2d); tmp_V4i = __builtin_ia32_cvttpd2dq(tmp_V2d); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
