https://github.com/chaitanyav updated 
https://github.com/llvm/llvm-project/pull/168100

>From 71dbbff93b2abafd5bccf64479522f5aaa1ff7f6 Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <[email protected]>
Date: Mon, 10 Nov 2025 22:59:08 -0800
Subject: [PATCH 1/2] [Clang] VectorExprEvaluator::VisitCallExpr /
 InterpretBuiltin - Allow AVX512 VPSHUFBITQMB intrinsics to be used in
 constexpr

Resolves: #161337
---
 clang/include/clang/Basic/BuiltinsX86.td      |  6 +-
 clang/lib/AST/ByteCode/InterpBuiltin.cpp      | 72 +++++++++++++++++++
 clang/lib/AST/ExprConstant.cpp                | 46 ++++++++++++
 clang/lib/Headers/avx512bitalgintrin.h        | 27 ++++---
 clang/lib/Headers/avx512vlbitalgintrin.h      | 46 ++++++------
 .../test/CodeGen/X86/avx512bitalg-builtins.c  | 10 +++
 .../CodeGen/X86/avx512vlbitalg-builtins.c     | 14 ++++
 7 files changed, 179 insertions(+), 42 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index b760c3e06b8f7..d9059fc730b5b 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1363,15 +1363,15 @@ let Features = "avx512cd", Attributes = [NoThrow, 
Const, Constexpr, RequiredVect
   def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
 }
 
-let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, 
Constexpr, RequiredVectorWidth<128>] in {
   def vpshufbitqmb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, 
_Vector<16, char>, unsigned short)">;
 }
 
-let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, 
Constexpr, RequiredVectorWidth<256>] in {
   def vpshufbitqmb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, 
_Vector<32, char>, unsigned int)">;
 }
 
-let Features = "avx512bitalg", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
+let Features = "avx512bitalg", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def vpshufbitqmb512_mask : X86Builtin<"unsigned long long int(_Vector<64, 
char>, _Vector<64, char>, unsigned long long int)">;
 }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index eba71d66bc4d6..b36599101d5f1 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3468,6 +3468,72 @@ static bool interp__builtin_ia32_shuffle_generic(
   return true;
 }
 
+static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
+                                                 const CallExpr *Call) {
+
+  assert(Call->getNumArgs() == 3);
+
+  QualType SourceType = Call->getArg(0)->getType();
+  QualType ShuffleMaskType = Call->getArg(1)->getType();
+  QualType ZeroMaskType = Call->getArg(2)->getType();
+  if (!SourceType->isVectorType() || !ShuffleMaskType->isVectorType() ||
+      !ZeroMaskType->isIntegerType()) {
+    return false;
+  }
+
+  Pointer Source, ShuffleMask;
+  APSInt ZeroMask = popToAPSInt(S, Call->getArg(2));
+  ShuffleMask = S.Stk.pop<Pointer>();
+  Source = S.Stk.pop<Pointer>();
+
+  const auto *SourceVecT = SourceType->castAs<VectorType>();
+  const auto *ShuffleMaskVecT = ShuffleMaskType->castAs<VectorType>();
+  assert(SourceVecT->getNumElements() == ShuffleMaskVecT->getNumElements());
+  assert(ZeroMask.getBitWidth() == SourceVecT->getNumElements());
+
+  PrimType SourceElemT = 
*S.getContext().classify(SourceVecT->getElementType());
+  PrimType ShuffleMaskElemT =
+      *S.getContext().classify(ShuffleMaskVecT->getElementType());
+
+  const unsigned NumBytesInQWord = 8;
+  const unsigned NumBitsInByte = 8;
+  const unsigned NumBytes = SourceVecT->getNumElements();
+  const unsigned NumQWords = NumBytes / NumBytesInQWord;
+  const unsigned RetWidth = ZeroMask.getBitWidth();
+  APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
+
+  for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
+
+    APInt SourceQWord(64, 0);
+    for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord;
+         ++ByteInQWord) {
+      uint64_t Byte = 0;
+      INT_TYPE_SWITCH(SourceElemT, {
+        Byte = static_cast<uint64_t>(
+            Source.elem<T>(QWordId * NumBytesInQWord + ByteInQWord));
+      });
+      SourceQWord |= (Byte & 0xFF) << (ByteInQWord * NumBitsInByte);
+    }
+
+    for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord;
+         ++ByteInQWord) {
+      unsigned ByteIdx = QWordId * NumBytesInQWord + ByteInQWord;
+      unsigned M = 0;
+      INT_TYPE_SWITCH(ShuffleMaskElemT, {
+        M = static_cast<unsigned>(ShuffleMask.elem<T>(ByteIdx)) & 0x3F;
+      });
+
+      if (ZeroMask[ByteIdx]) {
+        RetMask.setBitVal(ByteIdx, SourceQWord[M]);
+      }
+    }
+  }
+
+  pushInteger(S, RetMask, Call->getType());
+
+  return true;
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
                       uint32_t BuiltinID) {
   if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4844,6 +4910,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
   case X86::BI__builtin_ia32_ucmpq512_mask:
     return interp__builtin_ia32_cmp_mask(S, OpPC, Call, BuiltinID,
                                          /*IsUnsigned=*/true);
+
+  case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
+  case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
+  case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
+    return interp__builtin_ia32_shufbitqmb_mask(S, OpPC, Call);
+
   case X86::BI__builtin_ia32_pslldqi128_byteshift:
   case X86::BI__builtin_ia32_pslldqi256_byteshift:
   case X86::BI__builtin_ia32_pslldqi512_byteshift:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ce5301f17b3e7..962d4b91c1d45 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -16706,6 +16706,52 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const 
CallExpr *E,
 
     return Success(APValue(RetMask), E);
   }
+  case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
+  case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
+  case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
+    assert(E->getNumArgs() == 3);
+
+    APValue Source, ShuffleMask;
+    APSInt ZeroMask;
+    if (!EvaluateVector(E->getArg(0), Source, Info) ||
+        !EvaluateVector(E->getArg(1), ShuffleMask, Info) ||
+        !EvaluateInteger(E->getArg(2), ZeroMask, Info))
+      return false;
+
+    assert(Source.getVectorLength() == ShuffleMask.getVectorLength());
+    assert(ZeroMask.getBitWidth() == Source.getVectorLength());
+
+    unsigned NumBytesInQWord = 8;
+    unsigned NumBitsInByte = 8;
+    unsigned NumBytes = Source.getVectorLength();
+    unsigned NumQWords = NumBytes / NumBytesInQWord;
+    unsigned RetWidth = ZeroMask.getBitWidth();
+    APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
+
+    for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
+
+      APInt SourceQWord(64, 0);
+      for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord;
+           ++ByteInQWord) {
+        uint64_t Byte =
+            Source.getVectorElt(QWordId * NumBytesInQWord + ByteInQWord)
+                .getInt()
+                .getZExtValue();
+        SourceQWord |= (Byte & 0xFF) << (ByteInQWord * NumBitsInByte);
+      }
+
+      for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord;
+           ++ByteInQWord) {
+        unsigned ByteIdx = QWordId * NumBytesInQWord + ByteInQWord;
+        unsigned M =
+            ShuffleMask.getVectorElt(ByteIdx).getInt().getZExtValue() & 0x3F;
+        if (ZeroMask[ByteIdx]) {
+          RetMask.setBitVal(ByteIdx, SourceQWord[M]);
+        }
+      }
+    }
+    return Success(APValue(RetMask), E);
+  }
   }
 }
 
diff --git a/clang/lib/Headers/avx512bitalgintrin.h 
b/clang/lib/Headers/avx512bitalgintrin.h
index 98197e468370d..6a3e47814cb93 100644
--- a/clang/lib/Headers/avx512bitalgintrin.h
+++ b/clang/lib/Headers/avx512bitalgintrin.h
@@ -15,44 +15,43 @@
 #define __AVX512BITALGINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS                                                     
\
+  constexpr                                                                    
\
+      __attribute__((__always_inline__, __nodebug__,                           
\
+                     __target__("avx512bitalg"), __min_vector_width__(512)))
+#else
 #define __DEFAULT_FN_ATTRS                                                     
\
   __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"),   
\
                  __min_vector_width__(512)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
-#else
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
 #endif
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_popcnt_epi16(__m512i __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi16(__m512i __A) {
   return (__m512i)__builtin_elementwise_popcount((__v32hu)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512(
       (__mmask32)__U, (__v32hi)_mm512_popcnt_epi16(__B), (__v32hi)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) {
   return _mm512_mask_popcnt_epi16((__m512i)_mm512_setzero_si512(), __U, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_popcnt_epi8(__m512i __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi8(__m512i __A) {
   return (__m512i)__builtin_elementwise_popcount((__v64qu)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512(
       (__mmask64)__U, (__v64qi)_mm512_popcnt_epi8(__B), (__v64qi)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) {
   return _mm512_mask_popcnt_epi8((__m512i)_mm512_setzero_si512(), __U, __B);
 }
@@ -74,6 +73,4 @@ _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
 }
 
 #undef __DEFAULT_FN_ATTRS
-#undef __DEFAULT_FN_ATTRS_CONSTEXPR
-
 #endif
diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h 
b/clang/lib/Headers/avx512vlbitalgintrin.h
index 1874adce4ea6e..624630f76e484 100644
--- a/clang/lib/Headers/avx512vlbitalgintrin.h
+++ b/clang/lib/Headers/avx512vlbitalgintrin.h
@@ -15,6 +15,16 @@
 #define __AVX512VLBITALGINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128                                                  
\
+  constexpr __attribute__((__always_inline__, __nodebug__,                     
\
+                           __target__("avx512vl,avx512bitalg"),                
\
+                           __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  
\
+  constexpr __attribute__((__always_inline__, __nodebug__,                     
\
+                           __target__("avx512vl,avx512bitalg"),                
\
+                           __min_vector_width__(256)))
+#else
 #define __DEFAULT_FN_ATTRS128                                                  
\
   __attribute__((__always_inline__, __nodebug__,                               
\
                  __target__("avx512vl,avx512bitalg"),                          
\
@@ -23,75 +33,66 @@
   __attribute__((__always_inline__, __nodebug__,                               
\
                  __target__("avx512vl,avx512bitalg"),                          
\
                  __min_vector_width__(256)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
-#else
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
 #endif
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_popcnt_epi16(__m256i __A) {
   return (__m256i)__builtin_elementwise_popcount((__v16hu)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256(
       (__mmask16)__U, (__v16hi)_mm256_popcnt_epi16(__B), (__v16hi)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) {
   return _mm256_mask_popcnt_epi16((__m256i)_mm256_setzero_si256(), __U, __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_popcnt_epi16(__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi16(__m128i __A) {
   return (__m128i)__builtin_elementwise_popcount((__v8hu)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128(
       (__mmask8)__U, (__v8hi)_mm_popcnt_epi16(__B), (__v8hi)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) {
   return _mm_mask_popcnt_epi16((__m128i)_mm_setzero_si128(), __U, __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_popcnt_epi8(__m256i __A) {
   return (__m256i)__builtin_elementwise_popcount((__v32qu)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256(
       (__mmask32)__U, (__v32qi)_mm256_popcnt_epi8(__B), (__v32qi)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) {
   return _mm256_mask_popcnt_epi8((__m256i)_mm256_setzero_si256(), __U, __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_popcnt_epi8(__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi8(__m128i __A) {
   return (__m128i)__builtin_elementwise_popcount((__v16qu)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128(
       (__mmask16)__U, (__v16qi)_mm_popcnt_epi8(__B), (__v16qi)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) {
   return _mm_mask_popcnt_epi8((__m128i)_mm_setzero_si128(), __U, __B);
 }
@@ -131,7 +132,4 @@ _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B)
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
-#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
-#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
-
 #endif
diff --git a/clang/test/CodeGen/X86/avx512bitalg-builtins.c 
b/clang/test/CodeGen/X86/avx512bitalg-builtins.c
index 3ac8674421d93..7d524ab156500 100644
--- a/clang/test/CodeGen/X86/avx512bitalg-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bitalg-builtins.c
@@ -70,4 +70,14 @@ __mmask64 test_mm512_bitshuffle_epi64_mask(__m512i __A, 
__m512i __B) {
   // CHECK: @llvm.x86.avx512.vpshufbitqmb.512
   return _mm512_bitshuffle_epi64_mask(__A, __B);
 }
+TEST_CONSTEXPR(_mm512_bitshuffle_epi64_mask(
+  (__m512i)(__v64qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 
85,85,85,85,85,85,85,85,
+                     1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 
85,85,85,85,85,85,85,85},
+  (__m512i)(__v64qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 
0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7,
+                     0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 
0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff010155ff0101ULL);
 
+TEST_CONSTEXPR(_mm512_mask_bitshuffle_epi64_mask(0xFFFFFFFF00000000ULL,
+  (__m512i)(__v64qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 
85,85,85,85,85,85,85,85,
+                     1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 
85,85,85,85,85,85,85,85},
+  (__m512i)(__v64qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 
0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7,
+                     0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 
0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff010100000000ULL);
diff --git a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c 
b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
index e0b55c6fde81a..d19d2c28f3649 100644
--- a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
@@ -116,6 +116,13 @@ __mmask32 test_mm256_bitshuffle_epi64_mask(__m256i __A, 
__m256i __B) {
   // CHECK: @llvm.x86.avx512.vpshufbitqmb.256
   return _mm256_bitshuffle_epi64_mask(__A, __B);
 }
+TEST_CONSTEXPR(_mm256_bitshuffle_epi64_mask(
+  (__m256i)(__v32qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 
85,85,85,85,85,85,85,85},
+  (__m256i)(__v32qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 
0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff0101);
+
+TEST_CONSTEXPR(_mm256_mask_bitshuffle_epi64_mask(0xFFFF0000,
+  (__m256i)(__v32qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 
85,85,85,85,85,85,85,85},
+  (__m256i)(__v32qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 
0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff0000);
 
 __mmask16 test_mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, 
__m128i __B) {
   // CHECK-LABEL: test_mm_mask_bitshuffle_epi64_mask
@@ -129,4 +136,11 @@ __mmask16 test_mm_bitshuffle_epi64_mask(__m128i __A, 
__m128i __B) {
   // CHECK: @llvm.x86.avx512.vpshufbitqmb.128
   return _mm_bitshuffle_epi64_mask(__A, __B);
 }
+TEST_CONSTEXPR(_mm_bitshuffle_epi64_mask(
+  (__m128i)(__v16qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128},
+  (__m128i)(__v16qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56}) == 0x0101);
+
+TEST_CONSTEXPR(_mm_mask_bitshuffle_epi64_mask(0xFF00,
+  (__m128i)(__v16qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128},
+  (__m128i)(__v16qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56}) == 0x0100);
 

>From 457e5481c9b54d35d352e851f4b61764bc2621f2 Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <[email protected]>
Date: Thu, 20 Nov 2025 09:45:55 -0800
Subject: [PATCH 2/2] Address review feedback

- Rename ByteInQWord to ByteIdx for loop counter within qword
- Add SelIdx for absolute byte index used in mask selection
- Use APInt::insertBits() instead of manual shift/OR for building SourceQWord
- Remove unnecessary const from local variables
- Move constexpr after __attribute__ in header macros
- Minor formatting cleanup
---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 29 +++++++++++-------------
 clang/lib/AST/ExprConstant.cpp           | 24 ++++++++------------
 clang/lib/Headers/avx512bitalgintrin.h   |  5 ++--
 clang/lib/Headers/avx512vlbitalgintrin.h | 12 +++++-----
 4 files changed, 31 insertions(+), 39 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b36599101d5f1..831c05768f3ec 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3495,36 +3495,33 @@ static bool 
interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
   PrimType ShuffleMaskElemT =
       *S.getContext().classify(ShuffleMaskVecT->getElementType());
 
-  const unsigned NumBytesInQWord = 8;
-  const unsigned NumBitsInByte = 8;
-  const unsigned NumBytes = SourceVecT->getNumElements();
-  const unsigned NumQWords = NumBytes / NumBytesInQWord;
-  const unsigned RetWidth = ZeroMask.getBitWidth();
+  unsigned NumBytesInQWord = 8;
+  unsigned NumBitsInByte = 8;
+  unsigned NumBytes = SourceVecT->getNumElements();
+  unsigned NumQWords = NumBytes / NumBytesInQWord;
+  unsigned RetWidth = ZeroMask.getBitWidth();
   APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
 
   for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
-
     APInt SourceQWord(64, 0);
-    for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord;
-         ++ByteInQWord) {
+    for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
       uint64_t Byte = 0;
       INT_TYPE_SWITCH(SourceElemT, {
         Byte = static_cast<uint64_t>(
-            Source.elem<T>(QWordId * NumBytesInQWord + ByteInQWord));
+            Source.elem<T>(QWordId * NumBytesInQWord + ByteIdx));
       });
-      SourceQWord |= (Byte & 0xFF) << (ByteInQWord * NumBitsInByte);
+      SourceQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
     }
 
-    for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord;
-         ++ByteInQWord) {
-      unsigned ByteIdx = QWordId * NumBytesInQWord + ByteInQWord;
+    for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
+      unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx;
       unsigned M = 0;
       INT_TYPE_SWITCH(ShuffleMaskElemT, {
-        M = static_cast<unsigned>(ShuffleMask.elem<T>(ByteIdx)) & 0x3F;
+        M = static_cast<unsigned>(ShuffleMask.elem<T>(SelIdx)) & 0x3F;
       });
 
-      if (ZeroMask[ByteIdx]) {
-        RetMask.setBitVal(ByteIdx, SourceQWord[M]);
+      if (ZeroMask[SelIdx]) {
+        RetMask.setBitVal(SelIdx, SourceQWord[M]);
       }
     }
   }
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 962d4b91c1d45..60f3245483d29 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -16729,24 +16729,20 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const 
CallExpr *E,
     APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
 
     for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
-
       APInt SourceQWord(64, 0);
-      for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord;
-           ++ByteInQWord) {
-        uint64_t Byte =
-            Source.getVectorElt(QWordId * NumBytesInQWord + ByteInQWord)
-                .getInt()
-                .getZExtValue();
-        SourceQWord |= (Byte & 0xFF) << (ByteInQWord * NumBitsInByte);
+      for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
+        uint64_t Byte = Source.getVectorElt(QWordId * NumBytesInQWord + 
ByteIdx)
+                            .getInt()
+                            .getZExtValue();
+        SourceQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
       }
 
-      for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord;
-           ++ByteInQWord) {
-        unsigned ByteIdx = QWordId * NumBytesInQWord + ByteInQWord;
+      for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
+        unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx;
         unsigned M =
-            ShuffleMask.getVectorElt(ByteIdx).getInt().getZExtValue() & 0x3F;
-        if (ZeroMask[ByteIdx]) {
-          RetMask.setBitVal(ByteIdx, SourceQWord[M]);
+            ShuffleMask.getVectorElt(SelIdx).getInt().getZExtValue() & 0x3F;
+        if (ZeroMask[SelIdx]) {
+          RetMask.setBitVal(SelIdx, SourceQWord[M]);
         }
       }
     }
diff --git a/clang/lib/Headers/avx512bitalgintrin.h 
b/clang/lib/Headers/avx512bitalgintrin.h
index 6a3e47814cb93..f5e9b1a84fcfe 100644
--- a/clang/lib/Headers/avx512bitalgintrin.h
+++ b/clang/lib/Headers/avx512bitalgintrin.h
@@ -17,9 +17,8 @@
 /* Define the default attributes for the functions in this file. */
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS                                                     
\
-  constexpr                                                                    
\
-      __attribute__((__always_inline__, __nodebug__,                           
\
-                     __target__("avx512bitalg"), __min_vector_width__(512)))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"),   
\
+                 __min_vector_width__(512))) constexpr
 #else
 #define __DEFAULT_FN_ATTRS                                                     
\
   __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"),   
\
diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h 
b/clang/lib/Headers/avx512vlbitalgintrin.h
index 624630f76e484..edfb9c1e1f241 100644
--- a/clang/lib/Headers/avx512vlbitalgintrin.h
+++ b/clang/lib/Headers/avx512vlbitalgintrin.h
@@ -17,13 +17,13 @@
 /* Define the default attributes for the functions in this file. */
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS128                                                  
\
-  constexpr __attribute__((__always_inline__, __nodebug__,                     
\
-                           __target__("avx512vl,avx512bitalg"),                
\
-                           __min_vector_width__(128)))
+  __attribute__((__always_inline__, __nodebug__,                               
\
+                 __target__("avx512vl,avx512bitalg"),                          
\
+                 __min_vector_width__(128))) constexpr
 #define __DEFAULT_FN_ATTRS256                                                  
\
-  constexpr __attribute__((__always_inline__, __nodebug__,                     
\
-                           __target__("avx512vl,avx512bitalg"),                
\
-                           __min_vector_width__(256)))
+  __attribute__((__always_inline__, __nodebug__,                               
\
+                 __target__("avx512vl,avx512bitalg"),                          
\
+                 __min_vector_width__(256))) constexpr
 #else
 #define __DEFAULT_FN_ATTRS128                                                  
\
   __attribute__((__always_inline__, __nodebug__,                               
\

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to