llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Nagraj Gaonkar (NagrajMG)

<details>
<summary>Changes</summary>

## [Headers][X86] Allow PSHUFD/PSHUFLW/PSHUFW shuffle intrinsics to be used in 
`constexpr`



### PSHUFW — shuffle 4×i16 in MMX (64-bit)
| Intrinsic | X86 Builtin            | CPUID Flags | Header        |
|-----------|------------------------|------------|---------------|
| `_mm_shuffle_pi16` | `__builtin_ia32_pshufw` | MMX | `mmintrin.h` |

---

### PSHUFLW — shuffle low 4×i16 per 128-bit lane
| Intrinsics | X86 Builtins | CPUID Flags | Header |
|-----------|--------------|------------|-------|
| `_mm_shufflelo_epi16`            | `__builtin_ia32_pshuflw`             | 
SSE2            | `emmintrin.h` |
| `_mm256_shufflelo_epi16`         | `__builtin_ia32_pshuflw256`          | 
AVX2            | `avx2intrin.h` |
| `_mm512_shufflelo_epi16`         | `__builtin_ia32_pshuflw512`          | 
AVX-512BW       | `avx512bwintrin.h` |
| `_mm_mask_shufflelo_epi16`       | `__builtin_ia32_pshuflw128_mask`     | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm256_mask_shufflelo_epi16`    | `__builtin_ia32_pshuflw256_mask`     | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm512_mask_shufflelo_epi16`    | `__builtin_ia32_pshuflw512_mask`     | 
AVX-512BW       | `avx512bwintrin.h` |
| `_mm_maskz_shufflelo_epi16`      | `__builtin_ia32_pshuflw128_maskz`    | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm256_maskz_shufflelo_epi16`   | `__builtin_ia32_pshuflw256_maskz`    | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm512_maskz_shufflelo_epi16`   | `__builtin_ia32_pshuflw512_maskz`    | 
AVX-512BW       | `avx512bwintrin.h` |

---

### PSHUFHW — shuffle high 4×i16 per 128-bit lane
| Intrinsics | X86 Builtins | CPUID Flags | Header |
|-----------|--------------|------------|-------|
| `_mm_shufflehi_epi16`            | `__builtin_ia32_pshufhw`             | 
SSE2            | `emmintrin.h` |
| `_mm256_shufflehi_epi16`         | `__builtin_ia32_pshufhw256`          | 
AVX2            | `avx2intrin.h` |
| `_mm512_shufflehi_epi16`         | `__builtin_ia32_pshufhw512`          | 
AVX-512BW       | `avx512bwintrin.h` |
| `_mm_mask_shufflehi_epi16`       | `__builtin_ia32_pshufhw128_mask`     | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm256_mask_shufflehi_epi16`    | `__builtin_ia32_pshufhw256_mask`     | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm512_mask_shufflehi_epi16`    | `__builtin_ia32_pshufhw512_mask`     | 
AVX-512BW       | `avx512bwintrin.h` |
| `_mm_maskz_shufflehi_epi16`      | `__builtin_ia32_pshufhw128_maskz`    | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm256_maskz_shufflehi_epi16`   | `__builtin_ia32_pshufhw256_maskz`    | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm512_maskz_shufflehi_epi16`   | `__builtin_ia32_pshufhw512_maskz`    | 
AVX-512BW       | `avx512bwintrin.h` |

---

### PSHUFD — shuffle 4×i32 per 128-bit lane
| Intrinsics | X86 Builtins | CPUID Flags | Header |
|-----------|--------------|------------|-------|
| `_mm_shuffle_epi32`            | `__builtin_ia32_pshufd`             | SSE2   
      | `emmintrin.h` |
| `_mm256_shuffle_epi32`         | `__builtin_ia32_pshufd256`          | AVX2   
      | `avx2intrin.h` |
| `_mm512_shuffle_epi32`         | `__builtin_ia32_pshufd512`          | 
AVX-512F     | `avx512fintrin.h` |
| `_mm_mask_shuffle_epi32`       | `__builtin_ia32_pshufd128_mask`     | 
AVX-512VL    | `avx512vlintrin.h` |
| `_mm256_mask_shuffle_epi32`    | `__builtin_ia32_pshufd256_mask`     | 
AVX-512VL    | `avx512vlintrin.h` |
| `_mm512_mask_shuffle_epi32`    | `__builtin_ia32_pshufd512_mask`     | 
AVX-512F     | `avx512fintrin.h` |
| `_mm_maskz_shuffle_epi32`      | `__builtin_ia32_pshufd128_maskz`    | 
AVX-512VL    | `avx512vlintrin.h` |
| `_mm256_maskz_shuffle_epi32`   | `__builtin_ia32_pshufd256_maskz`    | 
AVX-512VL    | `avx512vlintrin.h` |
| `_mm512_maskz_shuffle_epi32`   | `__builtin_ia32_pshufd512_maskz`    | 
AVX-512F     | `avx512fintrin.h` |

---

Fixes **#<!-- -->156611**

Adds constexpr evaluation to these intrinsics in both the **ExprConstant** 
evaluator and the **Bytecode Interpreter**, with tests for all unmasked, 
masked, and mask-zero variants across MMX, 128-bit, 256-bit, and 512-bit widths.

---

Patch is 57.80 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/161210.diff


11 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsX86.td (+35-10) 
- (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+245) 
- (modified) clang/lib/AST/ExprConstant.cpp (+287) 
- (modified) clang/lib/Headers/mmintrin.h (+5) 
- (modified) clang/test/CodeGen/X86/avx2-builtins.c (+3-3) 
- (modified) clang/test/CodeGen/X86/avx512bw-builtins.c (+10-1) 
- (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+7-2) 
- (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+17) 
- (modified) clang/test/CodeGen/X86/avx512vlbw-builtins.c (+50) 
- (modified) clang/test/CodeGen/X86/mmx-builtins.c (+1-1) 
- (modified) clang/test/CodeGen/X86/sse2-builtins.c (+3-3) 


``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 77e599587edc3..e70691a30627a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -145,6 +145,10 @@ let Features = "mmx", Header = "mmintrin.h", Attributes = 
[NoThrow, Const] in {
   def _m_prefetch : X86LibBuiltin<"void(void *)">;
 }
 
+let Features = "mmx", Attributes = [NoThrow, Const, Constexpr] in {
+  def pshufw : X86Builtin<"_Vector<4, short>(_Vector<4, short>, _Constant 
int)">;
+}
+
 // PRFCHW
 let Features = "prfchw", Header = "intrin.h", Attributes = [NoThrow, Const] in 
{
   def _m_prefetchw : X86LibBuiltin<"void(void volatile const *)">;
@@ -217,10 +221,13 @@ let Features = "sse2", Attributes = [NoThrow] in {
   def movnti : X86Builtin<"void(int *, int)">;
 }
 
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] 
in {
-  def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
+let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant 
int)">;
+  def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
   def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant 
int)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] 
in {
   def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, 
_Vector<16, char>)">;
   def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
   def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
@@ -584,9 +591,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] i
   def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Vector<16, short>)">;
   def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, 
_Vector<32, char>)">;
   def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, 
char>)">;
-  def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant 
int)">;
-  def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int)">;
-  def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int)">;
   def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, 
char>)">;
   def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Vector<16, short>)">;
   def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, 
int>)">;
@@ -647,6 +651,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, 
Constexpr, RequiredVectorWi
   def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, 
_Vector<16, short>)">;
   def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, 
int>)">;
   def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, 
_Vector<16, short>)">;
+
+  def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int)">;
+  def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int)">;
+  def pshufd256  : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant 
int)">;
 }
 
 let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
@@ -1990,13 +1998,13 @@ let Features = "avx512vl", Attributes = [NoThrow, 
Const, Constexpr, RequiredVect
 }
 
 let Features = "avx512bw", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
-  def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int)">;
-  def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int)">;
   def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, 
short>)">;
 }
 
 let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Vector<32, short>)">;
+  def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int)">;
+  def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int)">;
 }
 
 let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
@@ -2016,21 +2024,35 @@ let Features = "avx512f",
 
 let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def psrlv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Vector<32, short>)">;
+  def pshuflw512_mask  : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int, _Vector<32, short>, unsigned int)">;
+  def pshuflw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int, unsigned int)">;
+  def pshufhw512_mask  : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int, _Vector<32, short>, unsigned int)">;
+  def pshufhw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int, unsigned int)">;
 }
 
 let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
   def psrlv16hi : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Vector<16, short>)">;
+  def pshuflw256_mask  : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int, _Vector<16, short>, unsigned short)">;
+  def pshuflw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int, unsigned short)">;
+  def pshufhw256_mask  : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int, _Vector<16, short>, unsigned short)">;
+  def pshufhw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int, unsigned short)">;
 }
 
 let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, 
short>)">;
+  def pshuflw128_mask  : X86Builtin<"_Vector<8, short>(_Vector<8, short>, 
_Constant int, _Vector<8, short>, unsigned char)">;
+  def pshuflw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, 
_Constant int, unsigned char)">;
+  def pshufhw128_mask  : X86Builtin<"_Vector<8, short>(_Vector<8, short>, 
_Constant int, _Vector<8, short>, unsigned char)">;
+  def pshufhw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, 
_Constant int, unsigned char)">;
 }
 
-let Features = "avx512f",
-    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
   def psrldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
   def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long 
int>, int)">;
+  def pshufd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, 
_Constant int, _Vector<16, int>, unsigned short)">;
+  def pshufd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, 
_Constant int, unsigned short)">;
+  def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant 
int)">;
 }
 
 let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
@@ -2047,10 +2069,14 @@ let Features = "avx512bw,avx512vl", Attributes = 
[NoThrow, Const, Constexpr, Req
 
 let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def psravq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long 
int>, _Vector<2, long long int>)">;
+  def pshufd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant 
int, _Vector<4, int>, unsigned char)">;
+  def pshufd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant 
int, unsigned char)">;
 }
 
 let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
   def psravq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Vector<4, long long int>)">;
+  def pshufd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant 
int, _Vector<8, int>, unsigned char)">;
+  def pshufd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant 
int, unsigned char)">;
 }
 
 let Features = "avx512bw", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
@@ -3266,7 +3292,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>
 }
 
 let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
-  def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant 
int)">;
   def expanddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Vector<8, double>, unsigned char)">;
   def expanddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long 
long int>, _Vector<8, long long int>, unsigned char)">;
 }
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 891344d4e6ed0..e0bd5d531db34 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2862,6 +2862,218 @@ static bool interp__builtin_blend(InterpState &S, 
CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_ia32_pshuflw_common(InterpState &S, CodePtr OpPC,
+                                                const CallExpr *Call) {
+  const unsigned NumArgs = Call->getNumArgs();
+  assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
+  APSInt K;
+  Pointer SrcPT;
+  const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
+  const bool IsMaskZ = (NumArgs == 3);
+  if (NumArgs == 4) {
+    K = popToAPSInt(S, Call->getArg(3));
+    SrcPT = S.Stk.pop<Pointer>();
+  } else if (NumArgs == 3) {
+    K = popToAPSInt(S, Call->getArg(2));
+  }
+
+  APSInt Imm = popToAPSInt(S, Call->getArg(1));
+  const Pointer &Src = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+  const unsigned NumElems = Dst.getNumElems();
+  const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
+  const unsigned ElemBits = 16;
+  const unsigned LaneElems = 128u / ElemBits;
+  const unsigned Half = 4;
+  assert(NumElems % LaneElems == 0 && "pshuflw expects 128-bit lanes");
+  const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+
+  for (unsigned i = 0; i != NumElems; ++i) {
+    const unsigned laneBase = (i / LaneElems) * LaneElems;
+    const unsigned inLane = i % LaneElems;
+
+    unsigned srcIdx;
+    if (inLane < Half) {
+      const unsigned pos = inLane;
+      const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
+      srcIdx = laneBase + sel;
+    } else {
+      srcIdx = i;
+    }
+
+    APSInt Chosen;
+    INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
+
+    if (!HasMask) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+      continue;
+    }
+
+    const bool Keep =
+        (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
+
+    if (Keep) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+    } else if (IsMaskZ) {
+      APSInt Zero(APInt(Chosen.getBitWidth(), 0));
+      Zero.setIsSigned(Chosen.isSigned());
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Zero); });
+    } else {
+      APSInt PT;
+      INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
+      INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
+    }
+  }
+
+  Dst.initializeAllElements();
+  return true;
+}
+
+static bool interp__builtin_ia32_pshufhw_common(InterpState &S, CodePtr OpPC,
+                                                const CallExpr *Call) {
+  (void)OpPC;
+  const unsigned NumArgs = Call->getNumArgs();
+  assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
+
+  APSInt K;
+  Pointer SrcPT;
+  const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
+  const bool IsMaskZ = (NumArgs == 3);
+
+  if (NumArgs == 4) {
+    K = popToAPSInt(S, Call->getArg(3));
+    SrcPT = S.Stk.pop<Pointer>();
+  } else if (NumArgs == 3) {
+    K = popToAPSInt(S, Call->getArg(2));
+  }
+
+  APSInt Imm = popToAPSInt(S, Call->getArg(1));
+  const Pointer &Src = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  const unsigned NumElems = Dst.getNumElems();
+  const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
+
+  const unsigned ElemBits = 16;
+  const unsigned LaneElems = 128u / ElemBits;
+  const unsigned HalfBase = 4;
+  assert(NumElems % LaneElems == 0);
+
+  const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+
+  for (unsigned i = 0; i != NumElems; ++i) {
+    const unsigned laneBase = (i / LaneElems) * LaneElems;
+    const unsigned inLane = i % LaneElems;
+
+    unsigned srcIdx;
+    if (inLane >= HalfBase) {
+      const unsigned pos = inLane - HalfBase;
+      const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
+      srcIdx = laneBase + HalfBase + sel;
+    } else {
+      srcIdx = i;
+    }
+
+    APSInt Chosen;
+    INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
+
+    if (!HasMask) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+      continue;
+    }
+
+    const bool Keep =
+        (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
+    if (Keep) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+    } else if (IsMaskZ) {
+      APSInt Zero(APInt(Chosen.getBitWidth(), 0));
+      Zero.setIsSigned(Chosen.isSigned());
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Zero); });
+    } else {
+      APSInt PT;
+      INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
+      INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
+    }
+  }
+
+  Dst.initializeAllElements();
+  return true;
+}
+
+static bool interp__builtin_ia32_pshufd_common(InterpState &S, CodePtr OpPC,
+                                              const CallExpr *Call) {
+  (void)OpPC;
+  const unsigned NumArgs = Call->getNumArgs();
+  assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
+
+  APSInt K;
+  Pointer SrcPT;
+  const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
+  const bool IsMaskZ = (NumArgs == 3);
+
+  if (NumArgs == 4) {
+    K = popToAPSInt(S, Call->getArg(3));
+    SrcPT = S.Stk.pop<Pointer>();
+  } else if (NumArgs == 3) {
+    K = popToAPSInt(S, Call->getArg(2));
+  }
+
+  APSInt Imm = popToAPSInt(S, Call->getArg(1));
+  const Pointer &Src = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  const unsigned NumElems = Dst.getNumElems();
+  const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
+
+  const unsigned ElemBits = 32;
+  const unsigned LaneElems = 128u / ElemBits;
+  assert(NumElems % LaneElems == 0);
+
+  const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+
+  for (unsigned i = 0; i != NumElems; ++i) {
+    const unsigned laneBase = (i / LaneElems) * LaneElems;
+    const unsigned inLane = i % LaneElems;
+    const unsigned sel = (Ctl >> (2 * inLane)) & 0x3;
+    const unsigned srcIdx = laneBase + sel;
+
+    APSInt Chosen;
+    INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
+
+    if (!HasMask) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+      continue;
+    }
+
+    const bool Keep =
+        (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
+    if (Keep) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+    } else if (IsMaskZ) {
+      APSInt Zero(APInt(Chosen.getBitWidth(), 0));
+      Zero.setIsSigned(Chosen.isSigned());
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Zero); });
+    } else {
+      APSInt PT;
+      INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
+      INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
+    }
+  }
+
+  Dst.initializeAllElements();
+  return true;
+}
+
 static bool interp__builtin_elementwise_triop(
     InterpState &S, CodePtr OpPC, const CallExpr *Call,
     llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3417,6 +3629,39 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
     return interp__builtin_elementwise_int_binop(S, OpPC, Call,
                                                  llvm::APIntOps::mulhs);
 
+  case clang::X86::BI__builtin_ia32_pshuflw:
+  case clang::X86::BI__builtin_ia32_pshuflw256:
+  case clang::X86::BI__builtin_ia32_pshuflw512:
+  case clang::X86::BI__builtin_ia32_pshuflw128_mask:
+  case clang::X86::BI__builtin_ia32_pshuflw256_mask:
+  case clang::X86::BI__builtin_ia32_pshuflw512_mask:
+  case clang::X86::BI__builtin_ia32_pshuflw128_maskz:
+  case clang::X86::BI__builtin_ia32_pshuflw256_maskz:
+  case clang::X86::BI__builtin_ia32_pshuflw512_maskz:
+    return interp__builtin_ia32_pshuflw_common(S, OpPC, Call);
+
+  case clang::X86::BI__builtin_ia32_pshufhw:
+  case clang::X86::BI__builtin_ia32_pshufhw256:
+  case clang::X86::BI__builtin_ia32_pshufhw512:
+  case clang::X86::BI__builtin_ia32_pshufhw128_mask:
+  case clang::X86::BI__builtin_ia32_pshufhw256_mask:
+  case clang::X86::BI__builtin_ia32_pshufhw512_mask:
+  case clang::X86::BI__builtin_ia32_pshufhw128_maskz:
+  case clang::X86::BI__builtin_ia32_pshufhw256_maskz:
+  case clang::X86::BI__builtin_ia32_pshufhw512_maskz:
+    return interp__builtin_ia32_pshufhw_common(S, OpPC, Call);
+
+  case clang::X86::BI__builtin_ia32_pshufd:
+  case clang::X86::BI__builtin_ia32_pshufd256:
+  case clang::X86::BI__builtin_ia32_pshufd512:
+  case clang::X86::BI__builtin_ia32_pshufd128_mask:
+  case clang::X86::BI__builtin_ia32_pshufd256_mask:
+  case clang::X86::BI__builtin_ia32_pshufd512_mask:
+  case clang::X86::BI__builtin_ia32_pshufd128_maskz:
+  case clang::X86::BI__builtin_ia32_pshufd256_maskz:
+  case clang::X86::BI__builtin_ia32_pshufd512_maskz:
+    return interp__builtin_ia32_pshufd_common(S, OpPC, Call);
+
   case clang::X86::BI__builtin_ia32_psllv2di:
   case clang::X86::BI__builtin_ia32_psllv4di:
   case clang::X86::BI__builtin_ia32_psllv4si:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b706b14945b6d..1ce601d37e0d6 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11869,6 +11869,293 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
 
+  case X86::BI__builtin_ia32_pshufw: {
+    APValue Src;
+    APSInt Imm;
+    if (!EvaluateAsRValue(Info, E->getArg(0), Src)) return false;
+    if (!EvaluateInteger(E->getArg(1), Imm, Info))  return false;
+
+    unsigned N = Src.getVectorLength(); 
+    SmallVector<APValue, 4> ResultElements;
+    ResultElements.reserve(N);
+
+    uint8_t C = static_cast<uint8_t>(Imm.getZExtValue());
+    for (unsigned i = 0; i != N; ++i) {
+      unsigned sel = (C >> (2 * i)) & 0x3;
+      ResultElements.push_back(Src.getVectorElt(sel));
+    }
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+
+  case clang::X86::BI__builtin_ia32_pshuflw:
+  case clang::X86::BI__builtin_ia32_pshuflw256:
+  case clang::X86::BI__builtin_ia32_pshuflw512:
+  case clang::X86::BI__builtin_ia32_pshuflw128_mask:
+  case clang::X86::BI__builtin_ia32_pshuflw256_mask:
+  case clang::X86::BI__builtin_ia32_pshuflw512_mask:
+  case clang::X86::BI__builtin_ia32_pshuflw128_maskz:
+  case clang::X86::BI__builtin_ia32_pshuflw256_maskz:
+  case clang::X86::BI__builtin_ia32_pshuflw512_maskz: {
+    const unsigned BID = E->getBuiltinCallee();
+
+    const bool IsMask =
+        BID == clang::X86::BI__builtin_ia32_pshuflw128_mask  ||
+        BID == clang::X86::BI__builtin_ia32_pshuflw256_mask  ||
+        BID == clang::X86::BI__builtin_ia32_pshuflw512_mask;
+
+    const bool IsMaskZ =
+        BID == clang::X86::BI__builtin_ia32_pshuflw128_maskz ||
+        BID == clang::X86::BI__builtin_ia32_pshuflw256_maskz ||
+        BID == clang::X86::BI__builtin_ia32_pshuflw512_maskz;
+
+    const unsigned AIdx  = 0, ImmIdx = 1;
+    const unsigned SrcIdx = 2;
+    const unsigned KI...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/161210
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to