llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Nagraj Gaonkar (NagrajMG)

<details>
<summary>Changes</summary>

## [Headers][X86] Allow PSHUFD/PSHUFLW/PSHUFW shuffle intrinsics to be used in 
`constexpr`



### PSHUFW — shuffle 4×i16 in MMX (64-bit)
| Intrinsic | X86 Builtin            | CPUID Flags | Header        |
|-----------|------------------------|------------|---------------|
| `_mm_shuffle_pi16` | `__builtin_ia32_pshufw` | MMX | `mmintrin.h` |

---

### PSHUFLW — shuffle low 4×i16 per 128-bit lane
| Intrinsics | X86 Builtins | CPUID Flags | Header |
|-----------|--------------|------------|-------|
| `_mm_shufflelo_epi16`            | `__builtin_ia32_pshuflw`             | 
SSE2            | `emmintrin.h` |
| `_mm256_shufflelo_epi16`         | `__builtin_ia32_pshuflw256`          | 
AVX2            | `avx2intrin.h` |
| `_mm512_shufflelo_epi16`         | `__builtin_ia32_pshuflw512`          | 
AVX-512BW       | `avx512bwintrin.h` |
| `_mm_mask_shufflelo_epi16`       | `__builtin_ia32_pshuflw128_mask`     | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm256_mask_shufflelo_epi16`    | `__builtin_ia32_pshuflw256_mask`     | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm512_mask_shufflelo_epi16`    | `__builtin_ia32_pshuflw512_mask`     | 
AVX-512BW       | `avx512bwintrin.h` |
| `_mm_maskz_shufflelo_epi16`      | `__builtin_ia32_pshuflw128_maskz`    | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm256_maskz_shufflelo_epi16`   | `__builtin_ia32_pshuflw256_maskz`    | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm512_maskz_shufflelo_epi16`   | `__builtin_ia32_pshuflw512_maskz`    | 
AVX-512BW       | `avx512bwintrin.h` |

---

### PSHUFHW — shuffle high 4×i16 per 128-bit lane
| Intrinsics | X86 Builtins | CPUID Flags | Header |
|-----------|--------------|------------|-------|
| `_mm_shufflehi_epi16`            | `__builtin_ia32_pshufhw`             | 
SSE2            | `emmintrin.h` |
| `_mm256_shufflehi_epi16`         | `__builtin_ia32_pshufhw256`          | 
AVX2            | `avx2intrin.h` |
| `_mm512_shufflehi_epi16`         | `__builtin_ia32_pshufhw512`          | 
AVX-512BW       | `avx512bwintrin.h` |
| `_mm_mask_shufflehi_epi16`       | `__builtin_ia32_pshufhw128_mask`     | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm256_mask_shufflehi_epi16`    | `__builtin_ia32_pshufhw256_mask`     | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm512_mask_shufflehi_epi16`    | `__builtin_ia32_pshufhw512_mask`     | 
AVX-512BW       | `avx512bwintrin.h` |
| `_mm_maskz_shufflehi_epi16`      | `__builtin_ia32_pshufhw128_maskz`    | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm256_maskz_shufflehi_epi16`   | `__builtin_ia32_pshufhw256_maskz`    | 
AVX-512VL+BW    | `avx512vlbwintrin.h` |
| `_mm512_maskz_shufflehi_epi16`   | `__builtin_ia32_pshufhw512_maskz`    | 
AVX-512BW       | `avx512bwintrin.h` |

---

### PSHUFD — shuffle 4×i32 per 128-bit lane
| Intrinsics | X86 Builtins | CPUID Flags | Header |
|-----------|--------------|------------|-------|
| `_mm_shuffle_epi32`            | `__builtin_ia32_pshufd`             | SSE2   
      | `emmintrin.h` |
| `_mm256_shuffle_epi32`         | `__builtin_ia32_pshufd256`          | AVX2   
      | `avx2intrin.h` |
| `_mm512_shuffle_epi32`         | `__builtin_ia32_pshufd512`          | 
AVX-512F     | `avx512fintrin.h` |
| `_mm_mask_shuffle_epi32`       | `__builtin_ia32_pshufd128_mask`     | 
AVX-512VL    | `avx512vlintrin.h` |
| `_mm256_mask_shuffle_epi32`    | `__builtin_ia32_pshufd256_mask`     | 
AVX-512VL    | `avx512vlintrin.h` |
| `_mm512_mask_shuffle_epi32`    | `__builtin_ia32_pshufd512_mask`     | 
AVX-512F     | `avx512fintrin.h` |
| `_mm_maskz_shuffle_epi32`      | `__builtin_ia32_pshufd128_maskz`    | 
AVX-512VL    | `avx512vlintrin.h` |
| `_mm256_maskz_shuffle_epi32`   | `__builtin_ia32_pshufd256_maskz`    | 
AVX-512VL    | `avx512vlintrin.h` |
| `_mm512_maskz_shuffle_epi32`   | `__builtin_ia32_pshufd512_maskz`    | 
AVX-512F     | `avx512fintrin.h` |

---

Fixes **#<!-- -->156611**

Adds constexpr evaluation to these intrinsics in both the **ExprConstant** 
evaluator and the **Bytecode Interpreter**, with tests for all unmasked, 
masked, and mask-zero variants across MMX, 128-bit, 256-bit, and 512-bit widths.

---

Patch is 49.10 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/161094.diff


11 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsX86.td (+57-7) 
- (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+246) 
- (modified) clang/lib/AST/ExprConstant.cpp (+287) 
- (modified) clang/lib/Headers/mmintrin.h (+6) 
- (modified) clang/test/CodeGen/X86/avx2-builtins.c (+5) 
- (modified) clang/test/CodeGen/X86/avx512bw-builtins.c (+11-1) 
- (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+14) 
- (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+20) 
- (modified) clang/test/CodeGen/X86/avx512vlbw-builtins.c (+60-1) 
- (modified) clang/test/CodeGen/X86/mmx-builtins.c (+2-1) 
- (modified) clang/test/CodeGen/X86/sse2-builtins.c (+10-6) 


``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 77e599587edc3..08b82b03b7865 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -145,6 +145,10 @@ let Features = "mmx", Header = "mmintrin.h", Attributes = 
[NoThrow, Const] in {
   def _m_prefetch : X86LibBuiltin<"void(void *)">;
 }
 
+let Features = "mmx", Attributes = [NoThrow, Const, Constexpr] in {
+  def pshufw : X86Builtin<"_Vector<4, short>(_Vector<4, short>, _Constant 
int)">;
+}
+
 // PRFCHW
 let Features = "prfchw", Header = "intrin.h", Attributes = [NoThrow, Const] in 
{
   def _m_prefetchw : X86LibBuiltin<"void(void volatile const *)">;
@@ -217,10 +221,13 @@ let Features = "sse2", Attributes = [NoThrow] in {
   def movnti : X86Builtin<"void(int *, int)">;
 }
 
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] 
in {
-  def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
+let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant 
int)">;
+  def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
   def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant 
int)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] 
in {
   def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, 
_Vector<16, char>)">;
   def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
   def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
@@ -569,6 +576,12 @@ let Features = "avx", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in
   def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, 
_Constant int)">;
 }
 
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
+  def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int)">;
+  def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int)">;
+  def pshufd256  : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant 
int)">;
+}
+
 let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
   def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, 
_Vector<32, char>, _Constant char)">;
   def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, 
_Vector<32, char>, _Constant int)">;
@@ -584,9 +597,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] i
   def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Vector<16, short>)">;
   def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, 
_Vector<32, char>)">;
   def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, 
char>)">;
-  def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant 
int)">;
-  def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int)">;
-  def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int)">;
   def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, 
char>)">;
   def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Vector<16, short>)">;
   def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, 
int>)">;
@@ -1989,9 +1999,28 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, 
Constexpr, RequiredVect
   def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Constant int)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int)">;
   def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
+  def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant 
int)">;
+  def pshufd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, 
_Constant int, _Vector<16, int>, unsigned short)">;
+  def pshufd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, 
_Constant int, unsigned short)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
+  def pshufd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant 
int, _Vector<8, int>, unsigned char)">;
+  def pshufd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant 
int, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
+  def pshufd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant 
int, _Vector<4, int>, unsigned char)">;
+  def pshufd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant 
int, unsigned char)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
   def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, 
short>)">;
 }
 
@@ -3266,7 +3295,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>
 }
 
 let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
-  def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant 
int)">;
   def expanddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Vector<8, double>, unsigned char)">;
   def expanddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long 
long int>, _Vector<8, long long int>, unsigned char)">;
 }
@@ -5114,3 +5142,25 @@ let Features = "avx10.2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>
 let Features = "avx10.2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
   def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">;
 }
+
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
+  def pshuflw512_mask  : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int, _Vector<32, short>, unsigned int)">;
+  def pshuflw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int, unsigned int)">;
+  def pshufhw512_mask  : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int, _Vector<32, short>, unsigned int)">;
+  def pshufhw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, 
_Constant int, unsigned int)">;
+}
+
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
+  def pshuflw256_mask  : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int, _Vector<16, short>, unsigned short)">;
+  def pshuflw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int, unsigned short)">;
+  def pshufhw256_mask  : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int, _Vector<16, short>, unsigned short)">;
+  def pshufhw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Constant int, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
+  def pshuflw128_mask  : X86Builtin<"_Vector<8, short>(_Vector<8, short>, 
_Constant int, _Vector<8, short>, unsigned char)">;
+  def pshuflw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, 
_Constant int, unsigned char)">;
+  def pshufhw128_mask  : X86Builtin<"_Vector<8, short>(_Vector<8, short>, 
_Constant int, _Vector<8, short>, unsigned char)">;
+  def pshufhw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, 
_Constant int, unsigned char)">;
+}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 891344d4e6ed0..1156626a30c8a 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2862,6 +2862,218 @@ static bool interp__builtin_blend(InterpState &S, 
CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_ia32_pshuflw_common(InterpState &S, CodePtr OpPC,
+                                                const CallExpr *Call) {
+  const unsigned NumArgs = Call->getNumArgs();
+  assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
+  APSInt K;
+  Pointer SrcPT;
+  const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
+  const bool IsMaskZ = (NumArgs == 3);
+  if (NumArgs == 4) {
+    K = popToAPSInt(S, Call->getArg(3));
+    SrcPT = S.Stk.pop<Pointer>();
+  } else if (NumArgs == 3) {
+    K = popToAPSInt(S, Call->getArg(2));
+  }
+
+  APSInt Imm = popToAPSInt(S, Call->getArg(1));
+  const Pointer &Src = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+  const unsigned NumElems = Dst.getNumElems();
+  const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
+  const unsigned ElemBits = 16;
+  const unsigned LaneElems = 128u / ElemBits;
+  const unsigned Half = 4;
+  assert(NumElems % LaneElems == 0 && "pshuflw expects 128-bit lanes");
+  const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+
+  for (unsigned i = 0; i != NumElems; ++i) {
+    const unsigned laneBase = (i / LaneElems) * LaneElems;
+    const unsigned inLane = i % LaneElems;
+
+    unsigned srcIdx;
+    if (inLane < Half) {
+      const unsigned pos = inLane;
+      const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
+      srcIdx = laneBase + sel;
+    } else {
+      srcIdx = i;
+    }
+
+    APSInt Chosen;
+    INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
+
+    if (!HasMask) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+      continue;
+    }
+
+    const bool Keep =
+        (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
+
+    if (Keep) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+    } else if (IsMaskZ) {
+      APSInt Zero(APInt(Chosen.getBitWidth(), 0));
+      Zero.setIsSigned(Chosen.isSigned());
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Zero); });
+    } else {
+      APSInt PT;
+      INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
+      INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
+    }
+  }
+
+  Dst.initializeAllElements();
+  return true;
+}
+
+static bool interp__builtin_ia32_pshufhw_common(InterpState &S, CodePtr OpPC,
+                                                const CallExpr *Call) {
+  (void)OpPC;
+  const unsigned NumArgs = Call->getNumArgs();
+  assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
+
+  APSInt K;
+  Pointer SrcPT;
+  const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
+  const bool IsMaskZ = (NumArgs == 3);
+
+  if (NumArgs == 4) {
+    K = popToAPSInt(S, Call->getArg(3));
+    SrcPT = S.Stk.pop<Pointer>();
+  } else if (NumArgs == 3) {
+    K = popToAPSInt(S, Call->getArg(2));
+  }
+
+  APSInt Imm = popToAPSInt(S, Call->getArg(1));
+  const Pointer &Src = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  const unsigned NumElems = Dst.getNumElems();
+  const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
+
+  const unsigned ElemBits = 16;
+  const unsigned LaneElems = 128u / ElemBits;
+  const unsigned HalfBase = 4;
+  assert(NumElems % LaneElems == 0);
+
+  const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+
+  for (unsigned i = 0; i != NumElems; ++i) {
+    const unsigned laneBase = (i / LaneElems) * LaneElems;
+    const unsigned inLane = i % LaneElems;
+
+    unsigned srcIdx;
+    if (inLane >= HalfBase) {
+      const unsigned pos = inLane - HalfBase;
+      const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
+      srcIdx = laneBase + HalfBase + sel;
+    } else {
+      srcIdx = i;
+    }
+
+    APSInt Chosen;
+    INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
+
+    if (!HasMask) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+      continue;
+    }
+
+    const bool Keep =
+        (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
+    if (Keep) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+    } else if (IsMaskZ) {
+      APSInt Zero(APInt(Chosen.getBitWidth(), 0));
+      Zero.setIsSigned(Chosen.isSigned());
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Zero); });
+    } else {
+      APSInt PT;
+      INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
+      INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
+    }
+  }
+
+  Dst.initializeAllElements();
+  return true;
+}
+
+static bool interp__builtin_ia32_pshufd_common(InterpState &S, CodePtr OpPC,
+                                               const CallExpr *Call) {
+  (void)OpPC;
+  const unsigned NumArgs = Call->getNumArgs();
+  assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
+
+  APSInt K;
+  Pointer SrcPT;
+  const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
+  const bool IsMaskZ = (NumArgs == 3);
+
+  if (NumArgs == 4) {
+    K = popToAPSInt(S, Call->getArg(3));
+    SrcPT = S.Stk.pop<Pointer>();
+  } else if (NumArgs == 3) {
+    K = popToAPSInt(S, Call->getArg(2));
+  }
+
+  APSInt Imm = popToAPSInt(S, Call->getArg(1));
+  const Pointer &Src = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  const unsigned NumElems = Dst.getNumElems();
+  const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
+
+  const unsigned ElemBits = 32;
+  const unsigned LaneElems = 128u / ElemBits;
+  assert(NumElems % LaneElems == 0);
+
+  const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+
+  for (unsigned i = 0; i != NumElems; ++i) {
+    const unsigned laneBase = (i / LaneElems) * LaneElems;
+    const unsigned inLane = i % LaneElems;
+    const unsigned sel = (Ctl >> (2 * inLane)) & 0x3;
+    const unsigned srcIdx = laneBase + sel;
+
+    APSInt Chosen;
+    INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
+
+    if (!HasMask) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+      continue;
+    }
+
+    const bool Keep =
+        (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
+    if (Keep) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+    } else if (IsMaskZ) {
+      APSInt Zero(APInt(Chosen.getBitWidth(), 0));
+      Zero.setIsSigned(Chosen.isSigned());
+      INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                              { Dst.elem<T>(i) = static_cast<T>(Zero); });
+    } else {
+      APSInt PT;
+      INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
+      INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
+    }
+  }
+
+  Dst.initializeAllElements();
+  return true;
+}
+
 static bool interp__builtin_elementwise_triop(
     InterpState &S, CodePtr OpPC, const CallExpr *Call,
     llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -2967,6 +3179,7 @@ static bool 
interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
                       uint32_t BuiltinID) {
   if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -3417,6 +3630,39 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
     return interp__builtin_elementwise_int_binop(S, OpPC, Call,
                                                  llvm::APIntOps::mulhs);
 
+  case clang::X86::BI__builtin_ia32_pshuflw:
+  case clang::X86::BI__builtin_ia32_pshuflw256:
+  case clang::X86::BI__builtin_ia32_pshuflw512:
+  case clang::X86::BI__builtin_ia32_pshuflw128_mask:
+  case clang::X86::BI__builtin_ia32_pshuflw256_mask:
+  case clang::X86::BI__builtin_ia32_pshuflw512_mask:
+  case clang::X86::BI__builtin_ia32_pshuflw128_maskz:
+  case clang::X86::BI__builtin_ia32_pshuflw256_maskz:
+  case clang::X86::BI__builtin_ia32_pshuflw512_maskz:
+    return interp__builtin_ia32_pshuflw_common(S, OpPC, Call);
+
+  case clang::X86::BI__builtin_ia32_pshufhw:
+  case clang::X86::BI__builtin_ia32_pshufhw256:
+  case clang::X86::BI__builtin_ia32_pshufhw512:
+  case clang::X86::BI__builtin_ia32_pshufhw128_mask:
+  case clang::X86::BI__builtin_ia32_pshufhw256_mask:
+  case clang::X86::BI__builtin_ia32_pshufhw512_mask:
+  case clang::X86::BI__builtin_ia32_pshufhw128_maskz:
+  case clang::X86::BI__builtin_ia32_pshufhw256_maskz:
+  case clang::X86::BI__builtin_ia32_pshufhw512_maskz:
+    return interp__builtin_ia32_pshufhw_common(S, OpPC, Call);
+
+  case clang::X86::BI__builtin_ia32_pshufd:
+  case clang::X86::BI__builtin_ia32_pshufd256:
+  case clang::X86::BI__builtin_ia32_pshufd512:
+  case clang::X86::BI__builtin_ia32_pshufd128_mask:
+  case clang::X86::BI__builtin_ia32_pshufd256_mask:
+  case clang::X86::BI__builtin_ia32_pshufd512_mask:
+  case clang::X86::BI__builtin_ia32_pshufd128_maskz:
+  case clang::X86::BI__builtin_ia32_pshufd256_maskz:
+  case clang::X86::BI__builtin_ia32_pshufd512_maskz:
+    return interp__builtin_ia32_pshufd_common(S, OpPC, Call);
+
   case clang::X86::BI__builtin_ia32_psllv2di:
   case clang::X86::BI__builtin_ia32_psllv4di:
   case clang::X86::BI__builtin_ia32_psllv4si:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b706b14945b6d..3fee702120abc 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11868,6 +11868,292 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+case X86::BI__builtin_ia32_pshufw: {
+  APValue Src;
+  APSInt Imm;
+  if (!EvaluateAsRValue(Info, E->getArg(0), Src)) return false;
+  if (!EvaluateInteger(E->getArg(1), Imm, Info))  return false;
+
+  unsigned N = Src.getVectorLength(); 
+  SmallVector<APValue, 4> ResultElements;
+  ResultElements.reserve(N);
+
+  uint8_t C = static_cast<uint8_t>(Imm.getZExtValue());
+  for (unsigned i = 0; i != N; ++i) {
+    unsigned sel = (C >> (2 * i)) & 0x3;
+    ResultElements.push_back(Src.getVectorElt(sel));
+  }
+  return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+}
+
+case clang::X86::BI__builtin_ia32_pshuflw:
+case clang::X86::BI__builtin_ia32_pshuflw256:
+case clang::X86::BI__builtin_ia32_pshuflw512:
+case clang::X86::BI__builtin_ia32_pshuflw128_mask:
+case clang::X86::BI__builtin_ia32_pshuflw256_mask:
+case clang::X86::BI__builtin_ia32_pshuflw512_mask:
+case clang::X86::BI__builtin_ia32_pshuflw128_maskz:
+case clang::X86::BI__builtin_ia32_pshuflw256_maskz:
+case clang::X86::BI__builtin_ia32_pshuflw512_maskz: {
+  const unsigned BID = E->getBuiltinCallee();
+
+  const bool IsMask =
+      BID == clang::X86::BI__builtin_ia32_pshuflw128_mask  ||
+      BID == clang::X86::BI__builtin_ia32_pshuflw256_mask  ||
+      BID == clang::X86::BI__builtin_ia32_pshuflw512_mask;
+
+  const bool IsMaskZ =
+      BID == clang::X86::BI__builtin_ia32_pshuflw128_maskz ||
+      BID == clang::X86::BI__builtin_ia32_pshuflw256_maskz ||
+      BID == clang::X86::BI__builtin_ia32_pshuflw512_maskz;
+
+  const unsigned AIdx  = 0, ImmIdx = 1;
+  const unsigned SrcIdx = 2;
+  const unsigned KIdx   = IsMaskZ ? 2 : 3;
+
+  APValue AVal, SrcVal;
+  APSInt Imm, K;
+  if (!EvaluateAsRValue(Info, E->getArg(AIdx), AVal)) return false;
+  if (!EvaluateInteger(E->getArg(ImmIdx), Imm, Info))  return false;
+
+  const APSInt *KPtr = nullptr;
+  const APValue *PassThru = nullptr;
+  bool ZeroInactive = false;
+
+  if (IsMask) {
+    if (!EvaluateAsRValue(Info, E->getArg(SrcIdx), SrcVal)) return false;
+    if (!EvaluateInteger(E->getArg(KIdx), K, Info))         return false;
+    KPtr = &K; PassThru = &SrcVal; ZeroInactive = false;
+  } else if (IsMaskZ) {
+    if (!EvaluateInteger(E->getArg(KIdx), K, Info))         return false;
+    KPtr = &K; PassThru = nullptr; ZeroInactive = true;
+  }
+
+  const auto *VT = E->getType()->getAs<VectorType>();
+...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/161094
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to