https://github.com/Mohxen updated 
https://github.com/llvm/llvm-project/pull/169253

>From 3a8bdd4672407e9b10ca4c26393c0ca331f4f69a Mon Sep 17 00:00:00 2001
From: Mohxen <[email protected]>
Date: Mon, 4 May 2026 13:27:35 -0400
Subject: [PATCH 1/3] [Clang] Add constexpr support for x86 PSADBW intrinsics

---
 clang/include/clang/Basic/BuiltinsX86.td   | 15 ++------
 clang/lib/AST/ByteCode/InterpBuiltin.cpp   | 44 ++++++++++++++++++++++
 clang/lib/AST/ExprConstant.cpp             | 32 ++++++++++++++++
 clang/lib/Headers/avx2intrin.h             |  2 +-
 clang/lib/Headers/avx512bwintrin.h         |  2 +-
 clang/lib/Headers/emmintrin.h              |  4 +-
 clang/lib/Headers/xmmintrin.h              |  2 +-
 clang/test/CodeGen/X86/avx2-builtins.c     |  9 +++++
 clang/test/CodeGen/X86/avx512bw-builtins.c | 22 ++++++++++-
 clang/test/CodeGen/X86/mmx-builtins.c      |  3 ++
 clang/test/CodeGen/X86/sse2-builtins.c     |  5 +++
 11 files changed, 122 insertions(+), 18 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index b0f95d98b8471..a4aaac773623d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -173,13 +173,13 @@ let Features = "sse2", Attributes = [NoThrow] in {
 let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
   def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, 
double>)">;
+  def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, 
_Vector<16, char>)">;
 }
 let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, 
_Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
 }
 
 let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] 
in {
-  def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, 
_Vector<16, char>)">;
   def cvtpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">;
   def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">;
   def cvtsd2si : X86Builtin<"int(_Vector<2, double>)">;
@@ -574,15 +574,11 @@ let Features = "avx", Attributes = [NoThrow, Const, 
Constexpr, RequiredVectorWid
   def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, 
_Constant int)">;
 }
 
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
+  def mpsadbw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, 
_Vector<32, char>, _Constant char)">;
   def psadbw256
       : X86Builtin<
             "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
-  def mpsadbw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, 
_Vector<32, char>, _Constant char)">;
-
   def permdf256
       : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
   def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long "
@@ -3178,6 +3174,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, 
Constexpr] in {
 
 let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def palignr512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, 
_Vector<64, char>, _Constant int)">;
+  def psadbw512 : X86Builtin<"_Vector<8, long long int>(_Vector<64, char>, 
_Vector<64, char>)">;
 }
 
 let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
@@ -3192,10 +3189,6 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, 
Constexpr, RequiredVect
   def dbpsadbw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, 
_Vector<64, char>, _Constant int)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
-  def psadbw512 : X86Builtin<"_Vector<8, long long int>(_Vector<64, char>, 
_Vector<64, char>)">;
-}
-
 let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def compressdf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Vector<8, double>, unsigned char)">;
   def compressdi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, 
long long int>, _Vector<8, long long int>, unsigned char)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index e59d14db896a2..67083241e238e 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2949,6 +2949,45 @@ static bool interp__builtin_ia32_pmul(
   return true;
 }
 
+static bool interp__builtin_ia32_psadbw(InterpState &S, CodePtr OpPC,
+                                        const CallExpr *Call) {
+  assert(Call->getNumArgs() == 2);
+
+  const Pointer &RHS = S.Stk.pop<Pointer>();
+  const Pointer &LHS = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  const auto *SrcVT = Call->getArg(0)->getType()->castAs<VectorType>();
+  PrimType SrcElemT = *S.getContext().classify(SrcVT->getElementType());
+  unsigned SourceLen = SrcVT->getNumElements();
+  assert((SourceLen % 8) == 0);
+
+  const auto *DestVT = Call->getType()->castAs<VectorType>();
+  PrimType DestElemT = *S.getContext().classify(DestVT->getElementType());
+  bool DestUnsigned =
+      DestVT->getElementType()->isUnsignedIntegerOrEnumerationType();
+
+  unsigned DstElem = 0;
+  for (unsigned Lane = 0; Lane != SourceLen; Lane += 8) {
+    APInt Sum(64, 0);
+    for (unsigned I = 0; I != 8; ++I) {
+      INT_TYPE_SWITCH_NO_BOOL(SrcElemT, {
+        APSInt L = LHS.elem<T>(Lane + I).toAPSInt();
+        APSInt R = RHS.elem<T>(Lane + I).toAPSInt();
+        Sum += llvm::APIntOps::abdu(L.extOrTrunc(8), R.extOrTrunc(8)).zext(64);
+      });
+    }
+
+    INT_TYPE_SWITCH_NO_BOOL(DestElemT, {
+      Dst.elem<T>(DstElem) = static_cast<T>(APSInt(Sum, DestUnsigned));
+    });
+    ++DstElem;
+  }
+
+  Dst.initializeAllElements();
+  return true;
+}
+
 static bool interp__builtin_ia32_dbpsadbw(InterpState &S, CodePtr OpPC,
                                           const CallExpr *Call) {
   assert(Call->getNumArgs() == 3);
@@ -5422,6 +5461,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
                  (HiLHS.sext(BitWidth) * HiRHS.sext(BitWidth));
         });
 
+  case clang::X86::BI__builtin_ia32_psadbw128:
+  case clang::X86::BI__builtin_ia32_psadbw256:
+  case clang::X86::BI__builtin_ia32_psadbw512:
+    return interp__builtin_ia32_psadbw(S, OpPC, Call);
+
   case clang::X86::BI__builtin_ia32_dbpsadbw128:
   case clang::X86::BI__builtin_ia32_dbpsadbw256:
   case clang::X86::BI__builtin_ia32_dbpsadbw512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 8efceff7e8c31..692b696df7e7a 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12629,6 +12629,38 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
           .extractBits(16, 1);
     });
 
+  case clang::X86::BI__builtin_ia32_psadbw128:
+  case clang::X86::BI__builtin_ia32_psadbw256:
+  case clang::X86::BI__builtin_ia32_psadbw512: {
+    APValue SourceLHS, SourceRHS;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+      return false;
+
+    assert(SourceLHS.isVector() && SourceRHS.isVector());
+    unsigned SourceLen = SourceLHS.getVectorLength();
+    assert(SourceLen == SourceRHS.getVectorLength());
+    assert((SourceLen % 8) == 0);
+
+    auto *DestTy = E->getType()->castAs<VectorType>();
+    QualType DestEltTy = DestTy->getElementType();
+    bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+    SmallVector<APValue, 8> ResultElements;
+    ResultElements.reserve(SourceLen / 8);
+
+    for (unsigned Lane = 0; Lane != SourceLen; Lane += 8) {
+      APInt Sum(64, 0);
+      for (unsigned I = 0; I != 8; ++I) {
+        APInt LHS = SourceLHS.getVectorElt(Lane + I).getInt().extOrTrunc(8);
+        APInt RHS = SourceRHS.getVectorElt(Lane + I).getInt().extOrTrunc(8);
+        Sum += llvm::APIntOps::abdu(LHS, RHS).zext(64);
+      }
+      ResultElements.push_back(APValue(APSInt(Sum, DestUnsigned)));
+    }
+
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+
   case clang::X86::BI__builtin_ia32_pmaddubsw128:
   case clang::X86::BI__builtin_ia32_pmaddubsw256:
   case clang::X86::BI__builtin_ia32_pmaddubsw512:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index d3ceb2327ac62..2c91258253041 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -1810,7 +1810,7 @@ _mm256_or_si256(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sad_epu8(__m256i __a, __m256i __b)
 {
   return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);
diff --git a/clang/lib/Headers/avx512bwintrin.h 
b/clang/lib/Headers/avx512bwintrin.h
index 83cabc0dfb5ac..d5314782517b8 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1872,7 +1872,7 @@ _mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, 
__m512i __A,
                                   (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), 
\
                                   (__v32hi)_mm512_setzero_si512()))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_sad_epu8 (__m512i __A, __m512i __B)
 {
  return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 43c93263f015a..940868e4570cc 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2481,8 +2481,8 @@ _mm_mul_epu32(__m128i __a, __m128i __b) {
 ///    A 128-bit integer vector containing one of the source operands.
 /// \returns A [2 x i64] vector containing the sums of the sets of absolute
 ///    differences between both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a,
-                                                          __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sad_epu8(__m128i __a, __m128i __b) {
   return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
 }
 
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index efc0e6ce47e7d..143b4ea37216d 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -2571,7 +2571,7 @@ _mm_avg_pu16(__m64 __a, __m64 __b) {
 /// \returns A 64-bit integer vector whose lower 16 bits contain the sums of 
the
 ///    sets of absolute differences between both operands. The upper bits are
 ///    cleared.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_sad_pu8(__m64 __a, __m64 __b)
 {
   return __trunc64(__builtin_ia32_psadbw128((__v16qi)__zext128(__a),
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c 
b/clang/test/CodeGen/X86/avx2-builtins.c
index cb14d1aafedde..a8df1a279ec37 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -1185,6 +1185,15 @@ __m256i test_mm256_sad_epu8(__m256i x, __m256i y) {
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %{{.*}}, <32 
x i8> %{{.*}})
   return _mm256_sad_epu8(x, y);
 }
+TEST_CONSTEXPR(match_m256i(_mm256_sad_epu8((__m256i)(__v32qu){0, 1, 2, 3, 4, 
5, 6, 7,
+                                                             8, 9, 10, 11, 12, 
13, 14, 15,
+                                                             16, 17, 18, 19, 
20, 21, 22, 23,
+                                                             24, 25, 26, 27, 
28, 29, 30, 31},
+                                           (__m256i)(__v32qu){31, 30, 29, 28, 
27, 26, 25, 24,
+                                                             23, 22, 21, 20, 
19, 18, 17, 16,
+                                                             15, 14, 13, 12, 
11, 10, 9, 8,
+                                                             7, 6, 5, 4, 3, 2, 
1, 0}),
+                            192ULL, 64ULL, 64ULL, 192ULL));
 
 __m256i test_mm256_shuffle_epi8(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_shuffle_epi8
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c 
b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 945789b18dd8a..427efbbb99af8 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -3295,8 +3295,26 @@ 
TEST_CONSTEXPR(match_v32hu(_mm512_maskz_dbsad_epu8((__mmask32)0xAAAAAAAA,
 __m512i test_mm512_sad_epu8(__m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_sad_epu8
   // CHECK: @llvm.x86.avx512.psad.bw.512
-  return _mm512_sad_epu8(__A, __B); 
-}
+  return _mm512_sad_epu8(__A, __B);
+}
+TEST_CONSTEXPR(match_m512i(_mm512_sad_epu8((__m512i)(__v64qu){0, 1, 2, 3, 4, 
5, 6, 7,
+                                                             8, 9, 10, 11, 12, 
13, 14, 15,
+                                                             16, 17, 18, 19, 
20, 21, 22, 23,
+                                                             24, 25, 26, 27, 
28, 29, 30, 31,
+                                                             32, 33, 34, 35, 
36, 37, 38, 39,
+                                                             40, 41, 42, 43, 
44, 45, 46, 47,
+                                                             48, 49, 50, 51, 
52, 53, 54, 55,
+                                                             56, 57, 58, 59, 
60, 61, 62, 63},
+                                           (__m512i)(__v64qu){63, 62, 61, 60, 
59, 58, 57, 56,
+                                                             55, 54, 53, 52, 
51, 50, 49, 48,
+                                                             47, 46, 45, 44, 
43, 42, 41, 40,
+                                                             39, 38, 37, 36, 
35, 34, 33, 32,
+                                                             31, 30, 29, 28, 
27, 26, 25, 24,
+                                                             23, 22, 21, 20, 
19, 18, 17, 16,
+                                                             15, 14, 13, 12, 
11, 10, 9, 8,
+                                                             7, 6, 5, 4, 3, 2, 
1, 0}),
+                            448ULL, 320ULL, 192ULL, 64ULL,
+                            64ULL, 192ULL, 320ULL, 448ULL));
 
 __mmask32 test_mm512_movepi16_mask(__m512i __A) {
   // CHECK-LABEL: test_mm512_movepi16_mask
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c 
b/clang/test/CodeGen/X86/mmx-builtins.c
index 37d6306ecdb7d..90b42ba3cf099 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -486,6 +486,9 @@ __m64 test_mm_sad_pu8(__m64 a, __m64 b) {
   // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>
   return _mm_sad_pu8(a, b);
 }
+TEST_CONSTEXPR(match_m64(_mm_sad_pu8((__m64)(__v8qu){0, 1, 2, 3, 4, 5, 6, 7},
+                                      (__m64)(__v8qu){7, 6, 5, 4, 3, 2, 1, 0}),
+                         32ULL));
 
 __m64 test_mm_set_pi8(char a, char b, char c, char d, char e, char f, char g, 
char h) {
   // CHECK-LABEL: test_mm_set_pi8
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c 
b/clang/test/CodeGen/X86/sse2-builtins.c
index 2993b8bb719d6..3e36c0047baf0 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -1075,6 +1075,11 @@ __m128i test_mm_sad_epu8(__m128i A, __m128i B) {
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 
x i8> %{{.*}})
   return _mm_sad_epu8(A, B);
 }
+TEST_CONSTEXPR(match_m128i(_mm_sad_epu8((__m128i)(__v16qu){0, 1, 2, 3, 4, 5, 
6, 7,
+                                                          255, 254, 253, 252, 
251, 250, 249, 248},
+                                        (__m128i)(__v16qu){7, 6, 5, 4, 3, 2, 
1, 0,
+                                                          248, 249, 250, 251, 
252, 253, 254, 255}),
+                           32ULL, 32ULL));
 
 __m128i test_mm_set_epi8(char A, char B, char C, char D,
                          char E, char F, char G, char H,

>From 23a50bee391268964fa4e15edd2e29d3434ab79d Mon Sep 17 00:00:00 2001
From: Mohxen <[email protected]>
Date: Tue, 19 May 2026 08:38:10 -0400
Subject: [PATCH 2/3] Address PSADBW unsigned byte builtin signatures

---
 clang/include/clang/Basic/BuiltinsX86.td | 6 +++---
 clang/lib/Headers/avx2intrin.h           | 5 ++---
 clang/lib/Headers/avx512bwintrin.h       | 8 +++-----
 clang/lib/Headers/emmintrin.h            | 2 +-
 clang/lib/Headers/xmmintrin.h            | 7 +++----
 5 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index a4aaac773623d..a7d78f1b6d826 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -173,7 +173,7 @@ let Features = "sse2", Attributes = [NoThrow] in {
 let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
   def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, 
double>)">;
-  def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, 
_Vector<16, char>)">;
+  def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, unsigned 
char>, _Vector<16, unsigned char>)">;
 }
 let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, 
_Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
@@ -578,7 +578,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, 
Constexpr, RequiredVectorWi
   def mpsadbw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, 
_Vector<32, char>, _Constant char)">;
   def psadbw256
       : X86Builtin<
-            "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
+            "_Vector<4, long long int>(_Vector<32, unsigned char>, _Vector<32, 
unsigned char>)">;
   def permdf256
       : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
   def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long "
@@ -3174,7 +3174,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, 
Constexpr] in {
 
 let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def palignr512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, 
_Vector<64, char>, _Constant int)">;
-  def psadbw512 : X86Builtin<"_Vector<8, long long int>(_Vector<64, char>, 
_Vector<64, char>)">;
+  def psadbw512 : X86Builtin<"_Vector<8, long long int>(_Vector<64, unsigned 
char>, _Vector<64, unsigned char>)">;
 }
 
 let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 2c91258253041..8eb22fe519e7d 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -1811,9 +1811,8 @@ _mm256_or_si256(__m256i __a, __m256i __b)
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_sad_epu8(__m256i __a, __m256i __b)
-{
-  return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);
+_mm256_sad_epu8(__m256i __a, __m256i __b) {
+  return __builtin_ia32_psadbw256((__v32qu)__a, (__v32qu)__b);
 }
 
 /// Shuffles 8-bit integers in the 256-bit integer vector \a __a according
diff --git a/clang/lib/Headers/avx512bwintrin.h 
b/clang/lib/Headers/avx512bwintrin.h
index d5314782517b8..5917eeaaa38fe 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1872,11 +1872,9 @@ _mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 
__M, __m512i __A,
                                   (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), 
\
                                   (__v32hi)_mm512_setzero_si512()))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_sad_epu8 (__m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
-               (__v64qi) __B);
+static __inline__ __m512i
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sad_epu8(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_psadbw512((__v64qu)__A, (__v64qu)__B);
 }
 
 #undef __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 940868e4570cc..3f039edf87e81 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2483,7 +2483,7 @@ _mm_mul_epu32(__m128i __a, __m128i __b) {
 ///    differences between both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_sad_epu8(__m128i __a, __m128i __b) {
-  return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
+  return __builtin_ia32_psadbw128((__v16qu)__a, (__v16qu)__b);
 }
 
 /// Subtracts the corresponding 8-bit integer values in the operands.
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 143b4ea37216d..73eab9e460ca5 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -2572,10 +2572,9 @@ _mm_avg_pu16(__m64 __a, __m64 __b) {
 ///    sets of absolute differences between both operands. The upper bits are
 ///    cleared.
 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_sad_pu8(__m64 __a, __m64 __b)
-{
-  return __trunc64(__builtin_ia32_psadbw128((__v16qi)__zext128(__a),
-                                            (__v16qi)__zext128(__b)));
+_mm_sad_pu8(__m64 __a, __m64 __b) {
+  return __trunc64(__builtin_ia32_psadbw128((__v16qu)__zext128(__a),
+                                            (__v16qu)__zext128(__b)));
 }
 
 #if defined(__cplusplus)

>From 053d28159370d88fcfc8307beda8c13a1eb9a4ef Mon Sep 17 00:00:00 2001
From: Mohxen <[email protected]>
Date: Tue, 23 Jun 2026 21:34:10 -0400
Subject: [PATCH 3/3] [clang][X86] Fix psadbw builtin smoke test argument type

---
 clang/test/CodeGen/builtins-x86.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/builtins-x86.c 
b/clang/test/CodeGen/builtins-x86.c
index 0f66d8c4e3120..f67ad13d97208 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -21,6 +21,7 @@ typedef float V2f __attribute__((vector_size(8)));
 
 // 128-bit
 typedef char V16c __attribute__((vector_size(16)));
+typedef unsigned char V16Uc __attribute__((vector_size(16)));
 typedef signed short V8s __attribute__((vector_size(16)));
 typedef unsigned short V8u __attribute__((vector_size(16)));
 typedef signed int V4i __attribute__((vector_size(16)));
@@ -289,7 +290,7 @@ void f0(void) {
 #ifdef USE_64
   (void) __builtin_ia32_movnti64(tmp_LLip, tmp_LLi);
 #endif
-  tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c);
+  tmp_V2LLi = __builtin_ia32_psadbw128((V16Uc)tmp_V16c, (V16Uc)tmp_V16c);
   tmp_V4i = __builtin_ia32_cvtpd2dq(tmp_V2d);
   tmp_V4f = __builtin_ia32_cvtpd2ps(tmp_V2d);
   tmp_V4i = __builtin_ia32_cvttpd2dq(tmp_V2d);

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to