https://github.com/TelGome updated 
https://github.com/llvm/llvm-project/pull/167236

>From 7dce6c64d62258feb5ff347e3af28e0d247299aa Mon Sep 17 00:00:00 2001
From: TelGome <[email protected]>
Date: Sun, 9 Nov 2025 22:29:36 +0800
Subject: [PATCH 1/2] Generalize interp__builtin_ia32_shuffle_generic to handle
 single op permute shuffles

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 221 +++++++----------
 clang/lib/AST/ExprConstant.cpp           | 289 ++++++++++-------------
 2 files changed, 215 insertions(+), 295 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0ef130c0a55df..0d5d07a8f9e2b 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2841,76 +2841,6 @@ static bool interp__builtin_blend(InterpState &S, 
CodePtr OpPC,
   return true;
 }
 
-static bool interp__builtin_ia32_pshufb(InterpState &S, CodePtr OpPC,
-                                        const CallExpr *Call) {
-  assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
-  const Pointer &Control = S.Stk.pop<Pointer>();
-  const Pointer &Src = S.Stk.pop<Pointer>();
-  const Pointer &Dst = S.Stk.peek<Pointer>();
-
-  unsigned NumElems = Dst.getNumElems();
-  assert(NumElems == Control.getNumElems());
-  assert(NumElems == Dst.getNumElems());
-
-  for (unsigned Idx = 0; Idx != NumElems; ++Idx) {
-    uint8_t Ctlb = static_cast<uint8_t>(Control.elem<int8_t>(Idx));
-
-    if (Ctlb & 0x80) {
-      Dst.elem<int8_t>(Idx) = 0;
-    } else {
-      unsigned LaneBase = (Idx / 16) * 16;
-      unsigned SrcOffset = Ctlb & 0x0F;
-      unsigned SrcIdx = LaneBase + SrcOffset;
-
-      Dst.elem<int8_t>(Idx) = Src.elem<int8_t>(SrcIdx);
-    }
-  }
-  Dst.initializeAllElements();
-  return true;
-}
-
-static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
-                                       const CallExpr *Call, bool IsShufHW) {
-  assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
-  APSInt ControlImm = popToAPSInt(S, Call->getArg(1));
-  const Pointer &Src = S.Stk.pop<Pointer>();
-  const Pointer &Dst = S.Stk.peek<Pointer>();
-
-  unsigned NumElems = Dst.getNumElems();
-  PrimType ElemT = Dst.getFieldDesc()->getPrimType();
-
-  unsigned ElemBits = static_cast<unsigned>(primSize(ElemT) * 8);
-  if (ElemBits != 16 && ElemBits != 32)
-    return false;
-
-  unsigned LaneElts = 128u / ElemBits;
-  assert(LaneElts && (NumElems % LaneElts == 0));
-
-  uint8_t Ctl = static_cast<uint8_t>(ControlImm.getZExtValue());
-
-  for (unsigned Idx = 0; Idx != NumElems; Idx++) {
-    unsigned LaneBase = (Idx / LaneElts) * LaneElts;
-    unsigned LaneIdx = Idx % LaneElts;
-    unsigned SrcIdx = Idx;
-    unsigned Sel = (Ctl >> (2 * (LaneIdx & 0x3))) & 0x3;
-    if (ElemBits == 32) {
-      SrcIdx = LaneBase + Sel;
-    } else {
-      constexpr unsigned HalfSize = 4;
-      bool InHigh = LaneIdx >= HalfSize;
-      if (!IsShufHW && !InHigh) {
-        SrcIdx = LaneBase + Sel;
-      } else if (IsShufHW && InHigh) {
-        SrcIdx = LaneBase + HalfSize + Sel;
-      }
-    }
-
-    INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(Idx) = Src.elem<T>(SrcIdx); 
});
-  }
-  Dst.initializeAllElements();
-  return true;
-}
-
 static bool interp__builtin_ia32_test_op(
     InterpState &S, CodePtr OpPC, const CallExpr *Call,
     llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
@@ -3377,61 +3307,46 @@ static bool interp__builtin_ia32_vpconflict(InterpState 
&S, CodePtr OpPC,
   return true;
 }
 
-static bool interp__builtin_x86_byteshift(
-    InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID,
-    llvm::function_ref<APInt(const Pointer &, unsigned Lane, unsigned I,
-                             unsigned Shift)>
-        Fn) {
-  assert(Call->getNumArgs() == 2);
-
-  APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
-  uint64_t Shift = ImmAPS.getZExtValue() & 0xff;
-
-  const Pointer &Src = S.Stk.pop<Pointer>();
-  if (!Src.getFieldDesc()->isPrimitiveArray())
-    return false;
-
-  unsigned NumElems = Src.getNumElems();
-  const Pointer &Dst = S.Stk.peek<Pointer>();
-  PrimType ElemT = Src.getFieldDesc()->getPrimType();
-
-  for (unsigned Lane = 0; Lane != NumElems; Lane += 16) {
-    for (unsigned I = 0; I != 16; ++I) {
-      unsigned Base = Lane + I;
-      APSInt Result = APSInt(Fn(Src, Lane, I, Shift));
-      INT_TYPE_SWITCH_NO_BOOL(ElemT,
-                              { Dst.elem<T>(Base) = static_cast<T>(Result); });
-    }
-  }
-
-  Dst.initializeAllElements();
-
-  return true;
-}
-
 static bool interp__builtin_ia32_shuffle_generic(
     InterpState &S, CodePtr OpPC, const CallExpr *Call,
     llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
         GetSourceIndex) {
 
-  assert(Call->getNumArgs() == 3);
+  assert(Call->getNumArgs() == 2 || Call->getNumArgs() == 3);
 
   unsigned ShuffleMask = 0;
   Pointer A, MaskVector, B;
-
-  QualType Arg2Type = Call->getArg(2)->getType();
   bool IsVectorMask = false;
-  if (Arg2Type->isVectorType()) {
-    IsVectorMask = true;
-    B = S.Stk.pop<Pointer>();
-    MaskVector = S.Stk.pop<Pointer>();
-    A = S.Stk.pop<Pointer>();
-  } else if (Arg2Type->isIntegerType()) {
-    ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
-    B = S.Stk.pop<Pointer>();
-    A = S.Stk.pop<Pointer>();
+  bool IsSingleOperand = (Call->getNumArgs() == 2);
+
+  if (IsSingleOperand) {
+    QualType MaskType = Call->getArg(1)->getType();
+    if (MaskType->isVectorType()) {
+      IsVectorMask = true;
+      MaskVector = S.Stk.pop<Pointer>();
+      A = S.Stk.pop<Pointer>();
+      B = A;
+    } else if (MaskType->isIntegerType()) {
+      ShuffleMask = popToAPSInt(S, Call->getArg(1)).getZExtValue();
+      A = S.Stk.pop<Pointer>();
+      B = A;
+    } else {
+      return false;
+    }
   } else {
-    return false;
+    QualType Arg2Type = Call->getArg(2)->getType();
+    if (Arg2Type->isVectorType()) {
+      IsVectorMask = true;
+      B = S.Stk.pop<Pointer>();
+      MaskVector = S.Stk.pop<Pointer>();
+      A = S.Stk.pop<Pointer>();
+    } else if (Arg2Type->isIntegerType()) {
+      ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
+      B = S.Stk.pop<Pointer>();
+      A = S.Stk.pop<Pointer>();
+    } else {
+      return false;
+    }
   }
 
   QualType Arg0Type = Call->getArg(0)->getType();
@@ -3455,6 +3370,7 @@ static bool interp__builtin_ia32_shuffle_generic(
         ShuffleMask = static_cast<unsigned>(MaskVector.elem<T>(DstIdx));
       });
     }
+
     auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
 
     if (SrcIdx < 0) {
@@ -4555,22 +4471,59 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
   case X86::BI__builtin_ia32_pshufb128:
   case X86::BI__builtin_ia32_pshufb256:
   case X86::BI__builtin_ia32_pshufb512:
-    return interp__builtin_ia32_pshufb(S, OpPC, Call);
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+          uint8_t Ctlb = static_cast<uint8_t>(ShuffleMask);
+          if (Ctlb & 0x80) {
+            return std::make_pair(0, -1);
+          } else {
+            unsigned LaneBase = (DstIdx / 16) * 16;
+            unsigned SrcOffset = Ctlb & 0x0F;
+            unsigned SrcIdx = LaneBase + SrcOffset;
+            return std::make_pair(0, static_cast<int>(SrcIdx));
+            }
+        });
 
   case X86::BI__builtin_ia32_pshuflw:
   case X86::BI__builtin_ia32_pshuflw256:
   case X86::BI__builtin_ia32_pshuflw512:
-    return interp__builtin_ia32_pshuf(S, OpPC, Call, false);
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+          unsigned LaneBase = (DstIdx / 8) * 8;
+          unsigned LaneIdx = DstIdx % 8;
+          if (LaneIdx < 4) {
+            unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
+            return std::make_pair(0, static_cast<int>(LaneBase + Sel));
+          } else {
+            return std::make_pair(0, static_cast<int>(DstIdx));
+          }
+        });
 
   case X86::BI__builtin_ia32_pshufhw:
   case X86::BI__builtin_ia32_pshufhw256:
   case X86::BI__builtin_ia32_pshufhw512:
-    return interp__builtin_ia32_pshuf(S, OpPC, Call, true);
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+          unsigned LaneBase = (DstIdx / 8) * 8;
+          unsigned LaneIdx = DstIdx % 8;
+          if (LaneIdx >= 4) {
+            unsigned Sel = (ShuffleMask >> (2 * (LaneIdx - 4))) & 0x3;
+            return std::make_pair(0, static_cast<int>(LaneBase + 4 + Sel));
+          } else {
+            return std::make_pair(0, static_cast<int>(DstIdx));
+          }
+        });
 
   case X86::BI__builtin_ia32_pshufd:
   case X86::BI__builtin_ia32_pshufd256:
   case X86::BI__builtin_ia32_pshufd512:
-    return interp__builtin_ia32_pshuf(S, OpPC, Call, false);
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+          unsigned LaneBase = (DstIdx / 4) * 4;
+          unsigned LaneIdx = DstIdx % 4;
+          unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
+          return std::make_pair(0, static_cast<int>(LaneBase + Sel));
+        });
 
   case X86::BI__builtin_ia32_kandqi:
   case X86::BI__builtin_ia32_kandhi:
@@ -4728,13 +4681,17 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
     // The lane width is hardcoded to 16 to match the SIMD register size,
     // but the algorithm processes one byte per iteration,
     // so APInt(8, ...) is correct and intentional.
-    return interp__builtin_x86_byteshift(
-        S, OpPC, Call, BuiltinID,
-        [](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) {
-          if (I < Shift) {
-            return APInt(8, 0);
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call,
+        [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
+          unsigned LaneBase = (DstIdx / 16) * 16;
+          unsigned LaneIdx = DstIdx % 16;
+          if (LaneIdx < Shift) {
+            return std::make_pair(0, -1);
           }
-          return APInt(8, Src.elem<uint8_t>(Lane + I - Shift));
+
+            return std::make_pair(0, static_cast<int>(LaneBase + LaneIdx - 
Shift));
+
         });
 
   case X86::BI__builtin_ia32_psrldqi128_byteshift:
@@ -4744,14 +4701,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
     // The lane width is hardcoded to 16 to match the SIMD register size,
     // but the algorithm processes one byte per iteration,
     // so APInt(8, ...) is correct and intentional.
-    return interp__builtin_x86_byteshift(
-        S, OpPC, Call, BuiltinID,
-        [](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) {
-          if (I + Shift < 16) {
-            return APInt(8, Src.elem<uint8_t>(Lane + I + Shift));
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call,
+        [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
+          unsigned LaneBase = (DstIdx / 16) * 16;
+          unsigned LaneIdx = DstIdx % 16;
+          if (LaneIdx + Shift < 16) {
+            return std::make_pair(0, static_cast<int>(LaneBase + LaneIdx + 
Shift));
           }
 
-          return APInt(8, 0);
+          return std::make_pair(0, -1);
         });
 
   default:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 972d9fe3b5e4f..1541356d298da 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12090,24 +12090,46 @@ static bool evalShuffleGeneric(
   unsigned ShuffleMask = 0;
   APValue A, MaskVector, B;
   bool IsVectorMask = false;
-
-  QualType Arg2Type = Call->getArg(2)->getType();
-  if (Arg2Type->isVectorType()) {
-    IsVectorMask = true;
-    if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
-        !EvaluateAsRValue(Info, Call->getArg(1), MaskVector) ||
-        !EvaluateAsRValue(Info, Call->getArg(2), B))
-      return false;
-  } else if (Arg2Type->isIntegerType()) {
-    APSInt MaskImm;
-    if (!EvaluateInteger(Call->getArg(2), MaskImm, Info))
-      return false;
-    ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue());
-    if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
-        !EvaluateAsRValue(Info, Call->getArg(1), B))
+  bool IsSingleOperand = (Call->getNumArgs() == 2);
+
+  if (IsSingleOperand) {
+    QualType MaskType = Call->getArg(1)->getType();
+    if (MaskType->isVectorType()) {
+      IsVectorMask = true;
+      if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
+          !EvaluateAsRValue(Info, Call->getArg(1), MaskVector))
+        return false;
+      B = A;
+    } else if (MaskType->isIntegerType()) {
+      APSInt MaskImm;
+      if (!EvaluateInteger(Call->getArg(1), MaskImm, Info))
+        return false;
+      ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue());
+      if (!EvaluateAsRValue(Info, Call->getArg(0), A))
+        return false;
+      B = A;
+    } else {
       return false;
+    }
   } else {
-    return false;
+    QualType Arg2Type = Call->getArg(2)->getType();
+    if (Arg2Type->isVectorType()) {
+      IsVectorMask = true;
+      if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
+          !EvaluateAsRValue(Info, Call->getArg(1), MaskVector) ||
+          !EvaluateAsRValue(Info, Call->getArg(2), B))
+        return false;
+    } else if (Arg2Type->isIntegerType()) {
+      APSInt MaskImm;
+      if (!EvaluateInteger(Call->getArg(2), MaskImm, Info))
+        return false;
+      ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue());
+      if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
+          !EvaluateAsRValue(Info, Call->getArg(1), B))
+        return false;
+    } else {
+      return false;
+    }
   }
 
   unsigned NumElts = VT->getNumElements();
@@ -12124,8 +12146,12 @@ static bool evalShuffleGeneric(
     if (SrcIdx < 0) {
       // Zero out this element
       QualType ElemTy = VT->getElementType();
-      ResultElements.push_back(
-          APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy))));
+      if (ElemTy->isFloatingType()) {
+        ResultElements.push_back(
+            APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy))));
+      } else {
+        ResultElements.push_back(APValue(Info.Ctx.MakeIntValue(0, ElemTy)));
+      }
     } else {
       const APValue &Src = (SrcVecIdx == 0) ? A : B;
       ResultElements.push_back(Src.getVectorElt(SrcIdx));
@@ -12136,98 +12162,6 @@ static bool evalShuffleGeneric(
   return true;
 }
 
-static bool evalPshufbBuiltin(EvalInfo &Info, const CallExpr *Call,
-                              APValue &Out) {
-  APValue SrcVec, ControlVec;
-  if (!EvaluateAsRValue(Info, Call->getArg(0), SrcVec))
-    return false;
-  if (!EvaluateAsRValue(Info, Call->getArg(1), ControlVec))
-    return false;
-
-  const auto *VT = Call->getType()->getAs<VectorType>();
-  if (!VT)
-    return false;
-
-  QualType ElemT = VT->getElementType();
-  unsigned NumElts = VT->getNumElements();
-
-  SmallVector<APValue, 64> ResultElements;
-  ResultElements.reserve(NumElts);
-
-  for (unsigned Idx = 0; Idx != NumElts; ++Idx) {
-    APValue CtlVal = ControlVec.getVectorElt(Idx);
-    APSInt CtlByte = CtlVal.getInt();
-    uint8_t Ctl = static_cast<uint8_t>(CtlByte.getZExtValue());
-
-    if (Ctl & 0x80) {
-      APValue Zero(Info.Ctx.MakeIntValue(0, ElemT));
-      ResultElements.push_back(Zero);
-    } else {
-      unsigned LaneBase = (Idx / 16) * 16;
-      unsigned SrcOffset = Ctl & 0x0F;
-      unsigned SrcIdx = LaneBase + SrcOffset;
-
-      ResultElements.push_back(SrcVec.getVectorElt(SrcIdx));
-    }
-  }
-  Out = APValue(ResultElements.data(), ResultElements.size());
-  return true;
-}
-
-static bool evalPshufBuiltin(EvalInfo &Info, const CallExpr *Call,
-                             bool IsShufHW, APValue &Out) {
-  APValue Vec;
-  APSInt Imm;
-  if (!EvaluateAsRValue(Info, Call->getArg(0), Vec))
-    return false;
-  if (!EvaluateInteger(Call->getArg(1), Imm, Info))
-    return false;
-
-  const auto *VT = Call->getType()->getAs<VectorType>();
-  if (!VT)
-    return false;
-
-  QualType ElemT = VT->getElementType();
-  unsigned ElemBits = Info.Ctx.getTypeSize(ElemT);
-  unsigned NumElts = VT->getNumElements();
-
-  unsigned LaneBits = 128u;
-  unsigned LaneElts = LaneBits / ElemBits;
-  if (!LaneElts || (NumElts % LaneElts) != 0)
-    return false;
-
-  uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
-
-  SmallVector<APValue, 32> ResultElements;
-  ResultElements.reserve(NumElts);
-
-  for (unsigned Idx = 0; Idx != NumElts; Idx++) {
-    unsigned LaneBase = (Idx / LaneElts) * LaneElts;
-    unsigned LaneIdx = Idx % LaneElts;
-    unsigned SrcIdx = Idx;
-    unsigned Sel = (Ctl >> (2 * LaneIdx)) & 0x3;
-
-    if (ElemBits == 32) {
-      SrcIdx = LaneBase + Sel;
-    } else {
-      constexpr unsigned HalfSize = 4;
-      bool InHigh = LaneIdx >= HalfSize;
-      if (!IsShufHW && !InHigh) {
-        SrcIdx = LaneBase + Sel;
-      } else if (IsShufHW && InHigh) {
-        unsigned Rel = LaneIdx - HalfSize;
-        Sel = (Ctl >> (2 * Rel)) & 0x3;
-        SrcIdx = LaneBase + HalfSize + Sel;
-      }
-    }
-
-    ResultElements.push_back(Vec.getVectorElt(SrcIdx));
-  }
-
-  Out = APValue(ResultElements.data(), ResultElements.size());
-  return true;
-}
-
 bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
   if (!IsConstantEvaluatedBuiltinCall(E))
     return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -12993,7 +12927,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
   case X86::BI__builtin_ia32_pshufb256:
   case X86::BI__builtin_ia32_pshufb512: {
     APValue R;
-    if (!evalPshufbBuiltin(Info, E, R))
+    if (!evalShuffleGeneric(
+            Info, E, R,
+            [](unsigned DstIdx, unsigned ShuffleMask) -> std::pair<unsigned, 
int> {
+              uint8_t Ctlb = static_cast<uint8_t>(ShuffleMask);
+              if (Ctlb & 0x80) {
+                return std::make_pair(0, -1);
+              } else {
+                unsigned LaneBase = (DstIdx / 16) * 16;
+                unsigned SrcOffset = Ctlb & 0x0F;
+                unsigned SrcIdx = LaneBase + SrcOffset;
+                return std::make_pair(0, static_cast<int>(SrcIdx));
+              }
+            }))
       return false;
     return Success(R, E);
   }
@@ -13002,7 +12948,21 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
   case X86::BI__builtin_ia32_pshuflw256:
   case X86::BI__builtin_ia32_pshuflw512: {
     APValue R;
-    if (!evalPshufBuiltin(Info, E, false, R))
+    if (!evalShuffleGeneric(
+            Info, E, R,
+            [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+              constexpr unsigned LaneBits = 128u;
+              constexpr unsigned ElemBits = 16u;
+              constexpr unsigned LaneElts = LaneBits / ElemBits;
+              constexpr unsigned HalfSize = 4;
+              unsigned LaneBase = (DstIdx / LaneElts) * LaneElts;
+              unsigned LaneIdx = DstIdx % LaneElts;
+              if (LaneIdx < HalfSize) {
+                unsigned Sel = (Mask >> (2 * LaneIdx)) & 0x3;
+                return std::make_pair(0, static_cast<int>(LaneBase + Sel));
+              }
+                return std::make_pair(0, static_cast<int>(DstIdx));
+            }))
       return false;
     return Success(R, E);
   }
@@ -13011,7 +12971,22 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
   case X86::BI__builtin_ia32_pshufhw256:
   case X86::BI__builtin_ia32_pshufhw512: {
     APValue R;
-    if (!evalPshufBuiltin(Info, E, true, R))
+    if (!evalShuffleGeneric(
+            Info, E, R,
+            [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+              constexpr unsigned LaneBits = 128u;
+              constexpr unsigned ElemBits = 16u;
+              constexpr unsigned LaneElts = LaneBits / ElemBits;
+              constexpr unsigned HalfSize = 4;
+              unsigned LaneBase = (DstIdx / LaneElts) * LaneElts;
+              unsigned LaneIdx = DstIdx % LaneElts;
+              if (LaneIdx >= HalfSize) {
+                unsigned Rel = LaneIdx - HalfSize;
+                unsigned Sel = (Mask >> (2 * Rel)) & 0x3;
+                return std::make_pair(0, static_cast<int>(LaneBase + HalfSize 
+ Sel));
+              }
+                return std::make_pair(0, static_cast<int>(DstIdx));
+            }))
       return false;
     return Success(R, E);
   }
@@ -13020,7 +12995,17 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
   case X86::BI__builtin_ia32_pshufd256:
   case X86::BI__builtin_ia32_pshufd512: {
     APValue R;
-    if (!evalPshufBuiltin(Info, E, false, R))
+    if (!evalShuffleGeneric(
+            Info, E, R,
+            [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+              constexpr unsigned LaneBits = 128u;
+              constexpr unsigned ElemBits = 32u;
+              constexpr unsigned LaneElts = LaneBits / ElemBits;
+              unsigned LaneBase = (DstIdx / LaneElts) * LaneElts;
+              unsigned LaneIdx = DstIdx % LaneElts;
+              unsigned Sel = (Mask >> (2 * LaneIdx)) & 0x3;
+              return std::make_pair(0, static_cast<int>(LaneBase + Sel));
+            }))
       return false;
     return Success(R, E);
   }
@@ -13500,62 +13485,38 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
   case X86::BI__builtin_ia32_pslldqi128_byteshift:
   case X86::BI__builtin_ia32_pslldqi256_byteshift:
   case X86::BI__builtin_ia32_pslldqi512_byteshift: {
-    assert(E->getNumArgs() == 2);
-
-    APValue Src;
-    APSInt Imm;
-    if (!EvaluateAsRValue(Info, E->getArg(0), Src) ||
-        !EvaluateInteger(E->getArg(1), Imm, Info))
+    APValue R;
+    if (!evalShuffleGeneric(
+            Info, E, R,
+            [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
+              unsigned LaneBase = (DstIdx / 16) * 16;
+              unsigned LaneIdx = DstIdx % 16;
+              if (LaneIdx < Shift) {
+                return std::make_pair(0, -1);
+              }
+                return std::make_pair(0, static_cast<int>(LaneBase + LaneIdx - 
Shift));
+            }))
       return false;
-
-    unsigned VecLen = Src.getVectorLength();
-    unsigned Shift = Imm.getZExtValue() & 0xff;
-
-    SmallVector<APValue> ResultElements;
-    for (unsigned Lane = 0; Lane != VecLen; Lane += 16) {
-      for (unsigned I = 0; I != 16; ++I) {
-        if (I < Shift) {
-          APSInt Zero(8, /*isUnsigned=*/true);
-          Zero = 0;
-          ResultElements.push_back(APValue(Zero));
-        } else {
-          ResultElements.push_back(Src.getVectorElt(Lane + I - Shift));
-        }
-      }
-    }
-
-    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
-  }
+    return Success(R, E);
+}
 
   case X86::BI__builtin_ia32_psrldqi128_byteshift:
   case X86::BI__builtin_ia32_psrldqi256_byteshift:
   case X86::BI__builtin_ia32_psrldqi512_byteshift: {
-    assert(E->getNumArgs() == 2);
-
-    APValue Src;
-    APSInt Imm;
-    if (!EvaluateAsRValue(Info, E->getArg(0), Src) ||
-        !EvaluateInteger(E->getArg(1), Imm, Info))
+    APValue R;
+    if (!evalShuffleGeneric(
+            Info, E, R,
+            [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
+              unsigned LaneBase = (DstIdx / 16) * 16;
+              unsigned LaneIdx = DstIdx % 16;
+              if (LaneIdx + Shift < 16) {
+                return std::make_pair(0, static_cast<int>(LaneBase + LaneIdx + 
Shift));
+              }
+                return std::make_pair(0, -1);
+            }))
       return false;
-
-    unsigned VecLen = Src.getVectorLength();
-    unsigned Shift = Imm.getZExtValue() & 0xff;
-
-    SmallVector<APValue> ResultElements;
-    for (unsigned Lane = 0; Lane != VecLen; Lane += 16) {
-      for (unsigned I = 0; I != 16; ++I) {
-        if (I + Shift < 16) {
-          ResultElements.push_back(Src.getVectorElt(Lane + I + Shift));
-        } else {
-          APSInt Zero(8, /*isUnsigned=*/true);
-          Zero = 0;
-          ResultElements.push_back(APValue(Zero));
-        }
-      }
-    }
-
-    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
-  }
+    return Success(R, E);
+}
   case X86::BI__builtin_ia32_vpermi2varq128:
   case X86::BI__builtin_ia32_vpermi2varpd128: {
     APValue R;

>From 1b4f4288d3adbd62f9da63427a802845c25601fb Mon Sep 17 00:00:00 2001
From: TelGome <[email protected]>
Date: Tue, 11 Nov 2025 19:18:38 +0800
Subject: [PATCH 2/2] Fix style and format

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 34 +++++++++----------
 clang/lib/AST/ExprConstant.cpp           | 43 +++++++++++++-----------
 2 files changed, 39 insertions(+), 38 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0d5d07a8f9e2b..d2dad1dc9bab3 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4474,14 +4474,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
     return interp__builtin_ia32_shuffle_generic(
         S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
           uint8_t Ctlb = static_cast<uint8_t>(ShuffleMask);
-          if (Ctlb & 0x80) {
+          if (Ctlb & 0x80)
             return std::make_pair(0, -1);
-          } else {
-            unsigned LaneBase = (DstIdx / 16) * 16;
-            unsigned SrcOffset = Ctlb & 0x0F;
-            unsigned SrcIdx = LaneBase + SrcOffset;
-            return std::make_pair(0, static_cast<int>(SrcIdx));
-            }
+
+          unsigned LaneBase = (DstIdx / 16) * 16;
+          unsigned SrcOffset = Ctlb & 0x0F;
+          unsigned SrcIdx = LaneBase + SrcOffset;
+          return std::make_pair(0, static_cast<int>(SrcIdx));
         });
 
   case X86::BI__builtin_ia32_pshuflw:
@@ -4494,9 +4493,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const 
CallExpr *Call,
           if (LaneIdx < 4) {
             unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
             return std::make_pair(0, static_cast<int>(LaneBase + Sel));
-          } else {
-            return std::make_pair(0, static_cast<int>(DstIdx));
           }
+
+          return std::make_pair(0, static_cast<int>(DstIdx));
         });
 
   case X86::BI__builtin_ia32_pshufhw:
@@ -4509,9 +4508,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const 
CallExpr *Call,
           if (LaneIdx >= 4) {
             unsigned Sel = (ShuffleMask >> (2 * (LaneIdx - 4))) & 0x3;
             return std::make_pair(0, static_cast<int>(LaneBase + 4 + Sel));
-          } else {
-            return std::make_pair(0, static_cast<int>(DstIdx));
           }
+
+          return std::make_pair(0, static_cast<int>(DstIdx));
         });
 
   case X86::BI__builtin_ia32_pshufd:
@@ -4686,12 +4685,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
         [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
           unsigned LaneBase = (DstIdx / 16) * 16;
           unsigned LaneIdx = DstIdx % 16;
-          if (LaneIdx < Shift) {
+          if (LaneIdx < Shift)
             return std::make_pair(0, -1);
-          }
-
-            return std::make_pair(0, static_cast<int>(LaneBase + LaneIdx - 
Shift));
 
+          return std::make_pair(0,
+                                static_cast<int>(LaneBase + LaneIdx - Shift));
         });
 
   case X86::BI__builtin_ia32_psrldqi128_byteshift:
@@ -4706,9 +4704,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const 
CallExpr *Call,
         [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
           unsigned LaneBase = (DstIdx / 16) * 16;
           unsigned LaneIdx = DstIdx % 16;
-          if (LaneIdx + Shift < 16) {
-            return std::make_pair(0, static_cast<int>(LaneBase + LaneIdx + 
Shift));
-          }
+          if (LaneIdx + Shift < 16)
+            return std::make_pair(0,
+                                  static_cast<int>(LaneBase + LaneIdx + 
Shift));
 
           return std::make_pair(0, -1);
         });
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 1541356d298da..d93fa6c6954cb 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12929,16 +12929,16 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
     APValue R;
     if (!evalShuffleGeneric(
             Info, E, R,
-            [](unsigned DstIdx, unsigned ShuffleMask) -> std::pair<unsigned, 
int> {
+            [](unsigned DstIdx,
+               unsigned ShuffleMask) -> std::pair<unsigned, int> {
               uint8_t Ctlb = static_cast<uint8_t>(ShuffleMask);
-              if (Ctlb & 0x80) {
+              if (Ctlb & 0x80)
                 return std::make_pair(0, -1);
-              } else {
-                unsigned LaneBase = (DstIdx / 16) * 16;
-                unsigned SrcOffset = Ctlb & 0x0F;
-                unsigned SrcIdx = LaneBase + SrcOffset;
-                return std::make_pair(0, static_cast<int>(SrcIdx));
-              }
+
+              unsigned LaneBase = (DstIdx / 16) * 16;
+              unsigned SrcOffset = Ctlb & 0x0F;
+              unsigned SrcIdx = LaneBase + SrcOffset;
+              return std::make_pair(0, static_cast<int>(SrcIdx));
             }))
       return false;
     return Success(R, E);
@@ -12961,7 +12961,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
                 unsigned Sel = (Mask >> (2 * LaneIdx)) & 0x3;
                 return std::make_pair(0, static_cast<int>(LaneBase + Sel));
               }
-                return std::make_pair(0, static_cast<int>(DstIdx));
+              return std::make_pair(0, static_cast<int>(DstIdx));
             }))
       return false;
     return Success(R, E);
@@ -12983,9 +12983,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
               if (LaneIdx >= HalfSize) {
                 unsigned Rel = LaneIdx - HalfSize;
                 unsigned Sel = (Mask >> (2 * Rel)) & 0x3;
-                return std::make_pair(0, static_cast<int>(LaneBase + HalfSize 
+ Sel));
+                return std::make_pair(
+                    0, static_cast<int>(LaneBase + HalfSize + Sel));
               }
-                return std::make_pair(0, static_cast<int>(DstIdx));
+              return std::make_pair(0, static_cast<int>(DstIdx));
             }))
       return false;
     return Success(R, E);
@@ -13491,14 +13492,15 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
             [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
               unsigned LaneBase = (DstIdx / 16) * 16;
               unsigned LaneIdx = DstIdx % 16;
-              if (LaneIdx < Shift) {
+              if (LaneIdx < Shift)
                 return std::make_pair(0, -1);
-              }
-                return std::make_pair(0, static_cast<int>(LaneBase + LaneIdx - 
Shift));
+
+              return std::make_pair(
+                  0, static_cast<int>(LaneBase + LaneIdx - Shift));
             }))
       return false;
     return Success(R, E);
-}
+  }
 
   case X86::BI__builtin_ia32_psrldqi128_byteshift:
   case X86::BI__builtin_ia32_psrldqi256_byteshift:
@@ -13509,14 +13511,15 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
             [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
               unsigned LaneBase = (DstIdx / 16) * 16;
               unsigned LaneIdx = DstIdx % 16;
-              if (LaneIdx + Shift < 16) {
-                return std::make_pair(0, static_cast<int>(LaneBase + LaneIdx + 
Shift));
-              }
-                return std::make_pair(0, -1);
+              if (LaneIdx + Shift < 16)
+                return std::make_pair(
+                    0, static_cast<int>(LaneBase + LaneIdx + Shift));
+
+              return std::make_pair(0, -1);
             }))
       return false;
     return Success(R, E);
-}
+  }
   case X86::BI__builtin_ia32_vpermi2varq128:
   case X86::BI__builtin_ia32_vpermi2varpd128: {
     APValue R;

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to