llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Nagraj Gaonkar (NagrajMG)
<details>
<summary>Changes</summary>
## [Headers][X86] Allow PSHUFD/PSHUFLW/PSHUFW shuffle intrinsics to be used in
`constexpr`
### PSHUFW — shuffle 4×i16 in MMX (64-bit)
| Intrinsic | X86 Builtin | CPUID Flags | Header |
|-----------|------------------------|------------|---------------|
| `_mm_shuffle_pi16` | `__builtin_ia32_pshufw` | MMX | `mmintrin.h` |
---
### PSHUFLW — shuffle low 4×i16 per 128-bit lane
| Intrinsics | X86 Builtins | CPUID Flags | Header |
|-----------|--------------|------------|-------|
| `_mm_shufflelo_epi16` | `__builtin_ia32_pshuflw` |
SSE2 | `emmintrin.h` |
| `_mm256_shufflelo_epi16` | `__builtin_ia32_pshuflw256` |
AVX2 | `avx2intrin.h` |
| `_mm512_shufflelo_epi16` | `__builtin_ia32_pshuflw512` |
AVX-512BW | `avx512bwintrin.h` |
| `_mm_mask_shufflelo_epi16` | `__builtin_ia32_pshuflw128_mask` |
AVX-512VL+BW | `avx512vlbwintrin.h` |
| `_mm256_mask_shufflelo_epi16` | `__builtin_ia32_pshuflw256_mask` |
AVX-512VL+BW | `avx512vlbwintrin.h` |
| `_mm512_mask_shufflelo_epi16` | `__builtin_ia32_pshuflw512_mask` |
AVX-512BW | `avx512bwintrin.h` |
| `_mm_maskz_shufflelo_epi16` | `__builtin_ia32_pshuflw128_maskz` |
AVX-512VL+BW | `avx512vlbwintrin.h` |
| `_mm256_maskz_shufflelo_epi16` | `__builtin_ia32_pshuflw256_maskz` |
AVX-512VL+BW | `avx512vlbwintrin.h` |
| `_mm512_maskz_shufflelo_epi16` | `__builtin_ia32_pshuflw512_maskz` |
AVX-512BW | `avx512bwintrin.h` |
---
### PSHUFHW — shuffle high 4×i16 per 128-bit lane
| Intrinsics | X86 Builtins | CPUID Flags | Header |
|-----------|--------------|------------|-------|
| `_mm_shufflehi_epi16` | `__builtin_ia32_pshufhw` |
SSE2 | `emmintrin.h` |
| `_mm256_shufflehi_epi16` | `__builtin_ia32_pshufhw256` |
AVX2 | `avx2intrin.h` |
| `_mm512_shufflehi_epi16` | `__builtin_ia32_pshufhw512` |
AVX-512BW | `avx512bwintrin.h` |
| `_mm_mask_shufflehi_epi16` | `__builtin_ia32_pshufhw128_mask` |
AVX-512VL+BW | `avx512vlbwintrin.h` |
| `_mm256_mask_shufflehi_epi16` | `__builtin_ia32_pshufhw256_mask` |
AVX-512VL+BW | `avx512vlbwintrin.h` |
| `_mm512_mask_shufflehi_epi16` | `__builtin_ia32_pshufhw512_mask` |
AVX-512BW | `avx512bwintrin.h` |
| `_mm_maskz_shufflehi_epi16` | `__builtin_ia32_pshufhw128_maskz` |
AVX-512VL+BW | `avx512vlbwintrin.h` |
| `_mm256_maskz_shufflehi_epi16` | `__builtin_ia32_pshufhw256_maskz` |
AVX-512VL+BW | `avx512vlbwintrin.h` |
| `_mm512_maskz_shufflehi_epi16` | `__builtin_ia32_pshufhw512_maskz` |
AVX-512BW | `avx512bwintrin.h` |
---
### PSHUFD — shuffle 4×i32 per 128-bit lane
| Intrinsics | X86 Builtins | CPUID Flags | Header |
|-----------|--------------|------------|-------|
| `_mm_shuffle_epi32` | `__builtin_ia32_pshufd` | SSE2
| `emmintrin.h` |
| `_mm256_shuffle_epi32` | `__builtin_ia32_pshufd256` | AVX2
| `avx2intrin.h` |
| `_mm512_shuffle_epi32` | `__builtin_ia32_pshufd512` |
AVX-512F | `avx512fintrin.h` |
| `_mm_mask_shuffle_epi32` | `__builtin_ia32_pshufd128_mask` |
AVX-512VL | `avx512vlintrin.h` |
| `_mm256_mask_shuffle_epi32` | `__builtin_ia32_pshufd256_mask` |
AVX-512VL | `avx512vlintrin.h` |
| `_mm512_mask_shuffle_epi32` | `__builtin_ia32_pshufd512_mask` |
AVX-512F | `avx512fintrin.h` |
| `_mm_maskz_shuffle_epi32` | `__builtin_ia32_pshufd128_maskz` |
AVX-512VL | `avx512vlintrin.h` |
| `_mm256_maskz_shuffle_epi32` | `__builtin_ia32_pshufd256_maskz` |
AVX-512VL | `avx512vlintrin.h` |
| `_mm512_maskz_shuffle_epi32` | `__builtin_ia32_pshufd512_maskz` |
AVX-512F | `avx512fintrin.h` |
---
Fixes **#<!-- -->156611**
Adds constexpr evaluation to these intrinsics in both the **ExprConstant**
evaluator and the **Bytecode Interpreter**, with tests for all unmasked,
masked, and mask-zero variants across MMX, 128-bit, 256-bit, and 512-bit widths.
---
Patch is 49.10 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/161094.diff
11 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsX86.td (+57-7)
- (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+246)
- (modified) clang/lib/AST/ExprConstant.cpp (+287)
- (modified) clang/lib/Headers/mmintrin.h (+6)
- (modified) clang/test/CodeGen/X86/avx2-builtins.c (+5)
- (modified) clang/test/CodeGen/X86/avx512bw-builtins.c (+11-1)
- (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+14)
- (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+20)
- (modified) clang/test/CodeGen/X86/avx512vlbw-builtins.c (+60-1)
- (modified) clang/test/CodeGen/X86/mmx-builtins.c (+2-1)
- (modified) clang/test/CodeGen/X86/sse2-builtins.c (+10-6)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.td
b/clang/include/clang/Basic/BuiltinsX86.td
index 77e599587edc3..08b82b03b7865 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -145,6 +145,10 @@ let Features = "mmx", Header = "mmintrin.h", Attributes =
[NoThrow, Const] in {
def _m_prefetch : X86LibBuiltin<"void(void *)">;
}
+let Features = "mmx", Attributes = [NoThrow, Const, Constexpr] in {
+ def pshufw : X86Builtin<"_Vector<4, short>(_Vector<4, short>, _Constant
int)">;
+}
+
// PRFCHW
let Features = "prfchw", Header = "intrin.h", Attributes = [NoThrow, Const] in
{
def _m_prefetchw : X86LibBuiltin<"void(void volatile const *)">;
@@ -217,10 +221,13 @@ let Features = "sse2", Attributes = [NoThrow] in {
def movnti : X86Builtin<"void(int *, int)">;
}
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
in {
- def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
+let Features = "sse2", Attributes = [NoThrow, Const, Constexpr,
RequiredVectorWidth<128>] in {
def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant
int)">;
+ def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant
int)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
in {
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>,
_Vector<16, char>)">;
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
@@ -569,6 +576,12 @@ let Features = "avx", Attributes = [NoThrow, Const,
RequiredVectorWidth<256>] in
def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int,
_Constant int)">;
}
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr,
RequiredVectorWidth<256>] in {
+ def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>,
_Constant int)">;
+ def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>,
_Constant int)">;
+ def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant
int)">;
+}
+
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>]
in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>,
_Vector<32, char>, _Constant char)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>,
_Vector<32, char>, _Constant int)">;
@@ -584,9 +597,6 @@ let Features = "avx2", Attributes = [NoThrow, Const,
RequiredVectorWidth<256>] i
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>,
_Vector<16, short>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>,
_Vector<32, char>)">;
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32,
char>)">;
- def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant
int)">;
- def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>,
_Constant int)">;
- def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>,
_Constant int)">;
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32,
char>)">;
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>,
_Vector<16, short>)">;
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8,
int>)">;
@@ -1989,9 +1999,28 @@ let Features = "avx512vl", Attributes = [NoThrow, Const,
Constexpr, RequiredVect
def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long
int>, _Constant int)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const,
RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr,
RequiredVectorWidth<512>] in {
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>,
_Constant int)">;
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>,
_Constant int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr,
RequiredVectorWidth<512>] in {
+ def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant
int)">;
+ def pshufd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>,
_Constant int, _Vector<16, int>, unsigned short)">;
+ def pshufd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>,
_Constant int, unsigned short)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr,
RequiredVectorWidth<256>] in {
+ def pshufd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant
int, _Vector<8, int>, unsigned char)">;
+ def pshufd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant
int, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr,
RequiredVectorWidth<128>] in {
+ def pshufd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant
int, _Vector<4, int>, unsigned char)">;
+ def pshufd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant
int, unsigned char)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const,
RequiredVectorWidth<512>] in {
def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8,
short>)">;
}
@@ -3266,7 +3295,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const,
RequiredVectorWidth<128>
}
let Features = "avx512f", Attributes = [NoThrow, Const,
RequiredVectorWidth<512>] in {
- def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant
int)">;
def expanddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>,
_Vector<8, double>, unsigned char)">;
def expanddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long
long int>, _Vector<8, long long int>, unsigned char)">;
}
@@ -5114,3 +5142,25 @@ let Features = "avx10.2", Attributes = [NoThrow, Const,
RequiredVectorWidth<256>
let Features = "avx10.2", Attributes = [NoThrow, Const,
RequiredVectorWidth<512>] in {
def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">;
}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr,
RequiredVectorWidth<512>] in {
+ def pshuflw512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>,
_Constant int, _Vector<32, short>, unsigned int)">;
+ def pshuflw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>,
_Constant int, unsigned int)">;
+ def pshufhw512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>,
_Constant int, _Vector<32, short>, unsigned int)">;
+ def pshufhw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>,
_Constant int, unsigned int)">;
+}
+
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr,
RequiredVectorWidth<256>] in {
+ def pshuflw256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>,
_Constant int, _Vector<16, short>, unsigned short)">;
+ def pshuflw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>,
_Constant int, unsigned short)">;
+ def pshufhw256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>,
_Constant int, _Vector<16, short>, unsigned short)">;
+ def pshufhw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>,
_Constant int, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr,
RequiredVectorWidth<128>] in {
+ def pshuflw128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>,
_Constant int, _Vector<8, short>, unsigned char)">;
+ def pshuflw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>,
_Constant int, unsigned char)">;
+ def pshufhw128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>,
_Constant int, _Vector<8, short>, unsigned char)">;
+ def pshufhw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>,
_Constant int, unsigned char)">;
+}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 891344d4e6ed0..1156626a30c8a 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2862,6 +2862,218 @@ static bool interp__builtin_blend(InterpState &S,
CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_pshuflw_common(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ const unsigned NumArgs = Call->getNumArgs();
+ assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
+ APSInt K;
+ Pointer SrcPT;
+ const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
+ const bool IsMaskZ = (NumArgs == 3);
+ if (NumArgs == 4) {
+ K = popToAPSInt(S, Call->getArg(3));
+ SrcPT = S.Stk.pop<Pointer>();
+ } else if (NumArgs == 3) {
+ K = popToAPSInt(S, Call->getArg(2));
+ }
+
+ APSInt Imm = popToAPSInt(S, Call->getArg(1));
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ const unsigned NumElems = Dst.getNumElems();
+ const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
+ const unsigned ElemBits = 16;
+ const unsigned LaneElems = 128u / ElemBits;
+ const unsigned Half = 4;
+ assert(NumElems % LaneElems == 0 && "pshuflw expects 128-bit lanes");
+ const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ const unsigned laneBase = (i / LaneElems) * LaneElems;
+ const unsigned inLane = i % LaneElems;
+
+ unsigned srcIdx;
+ if (inLane < Half) {
+ const unsigned pos = inLane;
+ const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
+ srcIdx = laneBase + sel;
+ } else {
+ srcIdx = i;
+ }
+
+ APSInt Chosen;
+ INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
+
+ if (!HasMask) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+ continue;
+ }
+
+ const bool Keep =
+ (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
+
+ if (Keep) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+ } else if (IsMaskZ) {
+ APSInt Zero(APInt(Chosen.getBitWidth(), 0));
+ Zero.setIsSigned(Chosen.isSigned());
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Zero); });
+ } else {
+ APSInt PT;
+ INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
+ }
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp__builtin_ia32_pshufhw_common(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ (void)OpPC;
+ const unsigned NumArgs = Call->getNumArgs();
+ assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
+
+ APSInt K;
+ Pointer SrcPT;
+ const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
+ const bool IsMaskZ = (NumArgs == 3);
+
+ if (NumArgs == 4) {
+ K = popToAPSInt(S, Call->getArg(3));
+ SrcPT = S.Stk.pop<Pointer>();
+ } else if (NumArgs == 3) {
+ K = popToAPSInt(S, Call->getArg(2));
+ }
+
+ APSInt Imm = popToAPSInt(S, Call->getArg(1));
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ const unsigned NumElems = Dst.getNumElems();
+ const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
+
+ const unsigned ElemBits = 16;
+ const unsigned LaneElems = 128u / ElemBits;
+ const unsigned HalfBase = 4;
+ assert(NumElems % LaneElems == 0);
+
+ const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ const unsigned laneBase = (i / LaneElems) * LaneElems;
+ const unsigned inLane = i % LaneElems;
+
+ unsigned srcIdx;
+ if (inLane >= HalfBase) {
+ const unsigned pos = inLane - HalfBase;
+ const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
+ srcIdx = laneBase + HalfBase + sel;
+ } else {
+ srcIdx = i;
+ }
+
+ APSInt Chosen;
+ INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
+
+ if (!HasMask) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+ continue;
+ }
+
+ const bool Keep =
+ (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
+ if (Keep) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+ } else if (IsMaskZ) {
+ APSInt Zero(APInt(Chosen.getBitWidth(), 0));
+ Zero.setIsSigned(Chosen.isSigned());
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Zero); });
+ } else {
+ APSInt PT;
+ INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
+ }
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp__builtin_ia32_pshufd_common(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ (void)OpPC;
+ const unsigned NumArgs = Call->getNumArgs();
+ assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
+
+ APSInt K;
+ Pointer SrcPT;
+ const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
+ const bool IsMaskZ = (NumArgs == 3);
+
+ if (NumArgs == 4) {
+ K = popToAPSInt(S, Call->getArg(3));
+ SrcPT = S.Stk.pop<Pointer>();
+ } else if (NumArgs == 3) {
+ K = popToAPSInt(S, Call->getArg(2));
+ }
+
+ APSInt Imm = popToAPSInt(S, Call->getArg(1));
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ const unsigned NumElems = Dst.getNumElems();
+ const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
+
+ const unsigned ElemBits = 32;
+ const unsigned LaneElems = 128u / ElemBits;
+ assert(NumElems % LaneElems == 0);
+
+ const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ const unsigned laneBase = (i / LaneElems) * LaneElems;
+ const unsigned inLane = i % LaneElems;
+ const unsigned sel = (Ctl >> (2 * inLane)) & 0x3;
+ const unsigned srcIdx = laneBase + sel;
+
+ APSInt Chosen;
+ INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
+
+ if (!HasMask) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+ continue;
+ }
+
+ const bool Keep =
+ (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
+ if (Keep) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+ } else if (IsMaskZ) {
+ APSInt Zero(APInt(Chosen.getBitWidth(), 0));
+ Zero.setIsSigned(Chosen.isSigned());
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Zero); });
+ } else {
+ APSInt PT;
+ INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
+ }
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_elementwise_triop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -2967,6 +3179,7 @@ static bool
interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
return true;
}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -3417,6 +3630,39 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC,
const CallExpr *Call,
return interp__builtin_elementwise_int_binop(S, OpPC, Call,
llvm::APIntOps::mulhs);
+ case clang::X86::BI__builtin_ia32_pshuflw:
+ case clang::X86::BI__builtin_ia32_pshuflw256:
+ case clang::X86::BI__builtin_ia32_pshuflw512:
+ case clang::X86::BI__builtin_ia32_pshuflw128_mask:
+ case clang::X86::BI__builtin_ia32_pshuflw256_mask:
+ case clang::X86::BI__builtin_ia32_pshuflw512_mask:
+ case clang::X86::BI__builtin_ia32_pshuflw128_maskz:
+ case clang::X86::BI__builtin_ia32_pshuflw256_maskz:
+ case clang::X86::BI__builtin_ia32_pshuflw512_maskz:
+ return interp__builtin_ia32_pshuflw_common(S, OpPC, Call);
+
+ case clang::X86::BI__builtin_ia32_pshufhw:
+ case clang::X86::BI__builtin_ia32_pshufhw256:
+ case clang::X86::BI__builtin_ia32_pshufhw512:
+ case clang::X86::BI__builtin_ia32_pshufhw128_mask:
+ case clang::X86::BI__builtin_ia32_pshufhw256_mask:
+ case clang::X86::BI__builtin_ia32_pshufhw512_mask:
+ case clang::X86::BI__builtin_ia32_pshufhw128_maskz:
+ case clang::X86::BI__builtin_ia32_pshufhw256_maskz:
+ case clang::X86::BI__builtin_ia32_pshufhw512_maskz:
+ return interp__builtin_ia32_pshufhw_common(S, OpPC, Call);
+
+ case clang::X86::BI__builtin_ia32_pshufd:
+ case clang::X86::BI__builtin_ia32_pshufd256:
+ case clang::X86::BI__builtin_ia32_pshufd512:
+ case clang::X86::BI__builtin_ia32_pshufd128_mask:
+ case clang::X86::BI__builtin_ia32_pshufd256_mask:
+ case clang::X86::BI__builtin_ia32_pshufd512_mask:
+ case clang::X86::BI__builtin_ia32_pshufd128_maskz:
+ case clang::X86::BI__builtin_ia32_pshufd256_maskz:
+ case clang::X86::BI__builtin_ia32_pshufd512_maskz:
+ return interp__builtin_ia32_pshufd_common(S, OpPC, Call);
+
case clang::X86::BI__builtin_ia32_psllv2di:
case clang::X86::BI__builtin_ia32_psllv4di:
case clang::X86::BI__builtin_ia32_psllv4si:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b706b14945b6d..3fee702120abc 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11868,6 +11868,292 @@ bool VectorExprEvaluator::VisitCallExpr(const
CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+case X86::BI__builtin_ia32_pshufw: {
+ APValue Src;
+ APSInt Imm;
+ if (!EvaluateAsRValue(Info, E->getArg(0), Src)) return false;
+ if (!EvaluateInteger(E->getArg(1), Imm, Info)) return false;
+
+ unsigned N = Src.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(N);
+
+ uint8_t C = static_cast<uint8_t>(Imm.getZExtValue());
+ for (unsigned i = 0; i != N; ++i) {
+ unsigned sel = (C >> (2 * i)) & 0x3;
+ ResultElements.push_back(Src.getVectorElt(sel));
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+}
+
+case clang::X86::BI__builtin_ia32_pshuflw:
+case clang::X86::BI__builtin_ia32_pshuflw256:
+case clang::X86::BI__builtin_ia32_pshuflw512:
+case clang::X86::BI__builtin_ia32_pshuflw128_mask:
+case clang::X86::BI__builtin_ia32_pshuflw256_mask:
+case clang::X86::BI__builtin_ia32_pshuflw512_mask:
+case clang::X86::BI__builtin_ia32_pshuflw128_maskz:
+case clang::X86::BI__builtin_ia32_pshuflw256_maskz:
+case clang::X86::BI__builtin_ia32_pshuflw512_maskz: {
+ const unsigned BID = E->getBuiltinCallee();
+
+ const bool IsMask =
+ BID == clang::X86::BI__builtin_ia32_pshuflw128_mask ||
+ BID == clang::X86::BI__builtin_ia32_pshuflw256_mask ||
+ BID == clang::X86::BI__builtin_ia32_pshuflw512_mask;
+
+ const bool IsMaskZ =
+ BID == clang::X86::BI__builtin_ia32_pshuflw128_maskz ||
+ BID == clang::X86::BI__builtin_ia32_pshuflw256_maskz ||
+ BID == clang::X86::BI__builtin_ia32_pshuflw512_maskz;
+
+ const unsigned AIdx = 0, ImmIdx = 1;
+ const unsigned SrcIdx = 2;
+ const unsigned KIdx = IsMaskZ ? 2 : 3;
+
+ APValue AVal, SrcVal;
+ APSInt Imm, K;
+ if (!EvaluateAsRValue(Info, E->getArg(AIdx), AVal)) return false;
+ if (!EvaluateInteger(E->getArg(ImmIdx), Imm, Info)) return false;
+
+ const APSInt *KPtr = nullptr;
+ const APValue *PassThru = nullptr;
+ bool ZeroInactive = false;
+
+ if (IsMask) {
+ if (!EvaluateAsRValue(Info, E->getArg(SrcIdx), SrcVal)) return false;
+ if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
+ KPtr = &K; PassThru = &SrcVal; ZeroInactive = false;
+ } else if (IsMaskZ) {
+ if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
+ KPtr = &K; PassThru = nullptr; ZeroInactive = true;
+ }
+
+ const auto *VT = E->getType()->getAs<VectorType>();
+...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/161094
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits