https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/187946
>From a882bf4fe23fae163db67d50c99e1415b932d8c4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <[email protected]> Date: Sun, 22 Mar 2026 17:55:29 +0000 Subject: [PATCH 1/2] [clang][x86] Allow AVX512 expand intrinsics to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 33 +++++++++------- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 32 +++++++++++++++ clang/lib/AST/ExprConstant.cpp | 39 +++++++++++++++++++ clang/lib/Headers/avx512fintrin.h | 16 ++++---- clang/lib/Headers/avx512vbmi2intrin.h | 8 ++-- clang/lib/Headers/avx512vlintrin.h | 32 +++++++-------- clang/lib/Headers/avx512vlvbmi2intrin.h | 16 ++++---- clang/test/CodeGen/X86/avx512f-builtins.c | 10 +++++ clang/test/CodeGen/X86/avx512vbmi2-builtins.c | 8 +++- clang/test/CodeGen/X86/avx512vl-builtins.c | 32 +++++++++++++++ .../test/CodeGen/X86/avx512vlvbmi2-builtins.c | 16 ++++++-- 11 files changed, 185 insertions(+), 57 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 4cadd570cc6c4..f47532a63de04 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1499,35 +1499,35 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256 def cvttps2udq256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def expanddf128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def expanddf256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def expanddi128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def expanddi256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, unsigned char)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def expandhi128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, unsigned char)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def expandhi256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, unsigned short)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def expandqi128_mask : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, unsigned short)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def expandqi256_mask : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, unsigned int)">; } @@ -1579,19 +1579,19 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in { def expandloadsi256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int const *>, _Vector<8, int>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def expandsf128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def expandsf256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def expandsi128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def expandsi256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, unsigned char)">; } @@ -3230,12 +3230,12 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> def cmpss_mask : X86Builtin<"unsigned char(_Vector<4, float>, _Vector<4, float>, _Constant int, unsigned char, _Constant int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def expanddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">; def expanddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char)">; } -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def expandhi512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, unsigned int)">; def expandqi512_mask : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">; } @@ -3255,9 +3255,12 @@ let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in { def expandloadsi512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int const *>, _Vector<16, int>, unsigned short)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def expandsf512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, unsigned short)">; def expandsi512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, unsigned short)">; +} + +let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def cvtps2pd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, float>, _Vector<8, double>, unsigned char, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index fcd11ee9089c0..ddc1c2b03bd7b 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -5157,6 +5157,38 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::pair<unsigned, int>{1, static_cast<int>(DstIdx)}; }); } + case X86::BI__builtin_ia32_expanddf128_mask: + case X86::BI__builtin_ia32_expanddf256_mask: + case X86::BI__builtin_ia32_expanddf512_mask: + case X86::BI__builtin_ia32_expanddi128_mask: + case X86::BI__builtin_ia32_expanddi256_mask: + case X86::BI__builtin_ia32_expanddi512_mask: + case X86::BI__builtin_ia32_expandhi128_mask: + case X86::BI__builtin_ia32_expandhi256_mask: + case X86::BI__builtin_ia32_expandhi512_mask: + case X86::BI__builtin_ia32_expandqi128_mask: + case X86::BI__builtin_ia32_expandqi256_mask: + case X86::BI__builtin_ia32_expandqi512_mask: + case X86::BI__builtin_ia32_expandsf128_mask: + case X86::BI__builtin_ia32_expandsf256_mask: + case X86::BI__builtin_ia32_expandsf512_mask: + case X86::BI__builtin_ia32_expandsi128_mask: + case X86::BI__builtin_ia32_expandsi256_mask: + case X86::BI__builtin_ia32_expandsi512_mask: { + unsigned NumElems = + Call->getArg(0)->getType()->castAs<VectorType>()->getNumElements(); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [NumElems](unsigned DstIdx, const APInt &ShuffleMask) { + // Trunc to the sub-mask for the dst index and count the number of + // src elements used prior to that. + APInt ExpandMask = ShuffleMask.trunc(DstIdx + 1); + if (ExpandMask[DstIdx]) { + int SrcIdx = ExpandMask.popcount() - 1; + return std::pair<unsigned, int>{0, SrcIdx}; + } + return std::pair<unsigned, int>{1, static_cast<int>(DstIdx)}; + }); + } case clang::X86::BI__builtin_ia32_blendpd: case clang::X86::BI__builtin_ia32_blendpd256: case clang::X86::BI__builtin_ia32_blendps: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b459d4043e13d..ec988584cce48 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12910,6 +12910,45 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_expanddf128_mask: + case X86::BI__builtin_ia32_expanddf256_mask: + case X86::BI__builtin_ia32_expanddf512_mask: + case X86::BI__builtin_ia32_expanddi128_mask: + case X86::BI__builtin_ia32_expanddi256_mask: + case X86::BI__builtin_ia32_expanddi512_mask: + case X86::BI__builtin_ia32_expandhi128_mask: + case X86::BI__builtin_ia32_expandhi256_mask: + case X86::BI__builtin_ia32_expandhi512_mask: + case X86::BI__builtin_ia32_expandqi128_mask: + case X86::BI__builtin_ia32_expandqi256_mask: + case X86::BI__builtin_ia32_expandqi512_mask: + case X86::BI__builtin_ia32_expandsf128_mask: + case X86::BI__builtin_ia32_expandsf256_mask: + case X86::BI__builtin_ia32_expandsf512_mask: + case X86::BI__builtin_ia32_expandsi128_mask: + case X86::BI__builtin_ia32_expandsi256_mask: + case X86::BI__builtin_ia32_expandsi512_mask: { + APValue Source, Passthru; + if (!EvaluateAsRValue(Info, E->getArg(0), Source) || + !EvaluateAsRValue(Info, E->getArg(1), Passthru)) + return false; + APSInt Mask; + if (!EvaluateInteger(E->getArg(2), Mask, Info)) + return false; + + unsigned NumElts = Source.getVectorLength(); + SmallVector<APValue, 64> ResultElements; + ResultElements.reserve(NumElts); + + unsigned SourceIdx = 0; + for (unsigned I = 0; I != NumElts; ++I) { + if (Mask[I]) + ResultElements.push_back(Source.getVectorElt(SourceIdx++)); + else + ResultElements.push_back(Passthru.getVectorElt(I)); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case X86::BI__builtin_ia32_vpconflictsi_128: case X86::BI__builtin_ia32_vpconflictsi_256: case X86::BI__builtin_ia32_vpconflictsi_512: diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index a5d2f1726cff4..edd33968848f3 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8383,7 +8383,7 @@ _mm_maskz_load_sd (__mmask8 __U, const double* __A) (__v16si)_mm512_shuffle_epi32((A), (I)), \ (__v16si)_mm512_setzero_si512())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, @@ -8391,7 +8391,7 @@ _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, @@ -8399,7 +8399,7 @@ _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, @@ -8407,7 +8407,7 @@ _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, @@ -8479,7 +8479,7 @@ _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, @@ -8487,7 +8487,7 @@ _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, @@ -8495,7 +8495,7 @@ _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, @@ -8503,7 +8503,7 @@ _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, diff --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h index 380d1eeb5c38b..f46a42c2172e2 100644 --- a/clang/lib/Headers/avx512vbmi2intrin.h +++ b/clang/lib/Headers/avx512vbmi2intrin.h @@ -67,7 +67,7 @@ _mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) { return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, @@ -75,7 +75,7 @@ _mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) { return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, @@ -83,7 +83,7 @@ _mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) { return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, @@ -91,7 +91,7 @@ _mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) { return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 03d132839d9a9..c816b9ef77a6f 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -2250,14 +2250,14 @@ _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, (__v2df) @@ -2265,14 +2265,14 @@ _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, (__v4df) @@ -2280,14 +2280,14 @@ _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, (__v2di) @@ -2295,14 +2295,14 @@ _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, (__v4di) @@ -2445,14 +2445,14 @@ _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, (__v4sf) @@ -2460,14 +2460,14 @@ _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, (__v8sf) @@ -2475,14 +2475,14 @@ _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, (__v4si) @@ -2490,14 +2490,14 @@ _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, (__v8si) diff --git a/clang/lib/Headers/avx512vlvbmi2intrin.h b/clang/lib/Headers/avx512vlvbmi2intrin.h index 7fa3a9ad31e82..2cf4b2dfc9a87 100644 --- a/clang/lib/Headers/avx512vlvbmi2intrin.h +++ b/clang/lib/Headers/avx512vlvbmi2intrin.h @@ -74,7 +74,7 @@ _mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, @@ -82,7 +82,7 @@ _mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, @@ -90,7 +90,7 @@ _mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, @@ -98,7 +98,7 @@ _mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, @@ -180,7 +180,7 @@ _mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, @@ -188,7 +188,7 @@ _mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, @@ -196,7 +196,7 @@ _mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, @@ -204,7 +204,7 @@ _mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 1831074c97dbc..af6d86ced584b 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -9863,24 +9863,29 @@ __m512d test_mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A) { // CHECK: @llvm.x86.avx512.mask.expand return _mm512_mask_expand_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_expand_pd((__m512d)(__v8df){ 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0}, 0xBE, (__m512d)(__v8df){ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0} ), 99.0, 1.0, 2.0, 3.0, 4.0, 5.0, 99.0, 6.0)); __m512d test_mm512_maskz_expand_pd(__mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_maskz_expand_pd // CHECK: @llvm.x86.avx512.mask.expand return _mm512_maskz_expand_pd(__U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_expand_pd(0xBE, (__m512d)(__v8df){ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0} ), 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 0.0, 6.0)); __m512i test_mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_expand_epi64 // CHECK: @llvm.x86.avx512.mask.expand return _mm512_mask_expand_epi64(__W, __U, __A); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_expand_epi64((__m512i)(__v8di){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x30, (__m512i)(__v8di){ 1, 2, 3, 4, 5, 6, 7, 8} ), 99, 99, 99, 99, 1, 2, 99, 99)); __m512i test_mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_expand_epi64 // CHECK: @llvm.x86.avx512.mask.expand return _mm512_maskz_expand_epi64(__U, __A); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_expand_epi64(0x30, (__m512i)(__v8di){ 1, 2, 3, 4, 5, 6, 7, 8} ), 0, 0, 0, 0, 1, 2, 0, 0)); + __m512i test_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) { // CHECK-LABEL: test_mm512_mask_expandloadu_epi64 // CHECK: @llvm.masked.expandload.v8i64(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x i64> %{{.*}}) @@ -9934,24 +9939,29 @@ __m512 test_mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A) { // CHECK: @llvm.x86.avx512.mask.expand return _mm512_mask_expand_ps(__W, __U, __A); } +TEST_CONSTEXPR(match_m512(_mm512_mask_expand_ps((__m512)(__v16sf){ 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f}, 0xB474, (__m512)(__v16sf){ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f} ), 99.0f, 99.0f, 1.0f, 99.0f, 2.0f, 3.0f, 4.0f, 99.0f, 99.0f, 99.0f, 5.0f, 99.0f, 6.0f, 7.0f, 99.0f, 8.0f)); __m512 test_mm512_maskz_expand_ps(__mmask16 __U, __m512 __A) { // CHECK-LABEL: test_mm512_maskz_expand_ps // CHECK: @llvm.x86.avx512.mask.expand return _mm512_maskz_expand_ps(__U, __A); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_expand_ps(0xB474, (__m512)(__v16sf){ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f} ), 0.0f, 0.0f, 1.0f, 0.0f, 2.0f, 3.0f, 4.0f, 0.0f, 0.0f, 0.0f, 5.0f, 0.0f, 6.0f, 7.0f, 0.0f, 8.0f)); __m512i test_mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_expand_epi32 // CHECK: @llvm.x86.avx512.mask.expand return _mm512_mask_expand_epi32(__W, __U, __A); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_expand_epi32((__m512i)(__v16si){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xE513, (__m512i)(__v16si){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} ), 1, 2, 99, 99, 3, 99, 99, 99, 4, 99, 5, 99, 99, 6, 7, 8)); __m512i test_mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_expand_epi32 // CHECK: @llvm.x86.avx512.mask.expand return _mm512_maskz_expand_epi32(__U, __A); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_expand_epi32(0xE513, (__m512i)(__v16si){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} ), 1, 2, 0, 0, 3, 0, 0, 0, 4, 0, 5, 0, 0, 6, 7, 8)); + __m512d test_mm512_cvt_roundps_pd(__m256 __A) { // CHECK-LABEL: test_mm512_cvt_roundps_pd // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 diff --git a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c index c76f7bc0e19de..1e71285b29b4e 100644 --- a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c @@ -30,14 +30,14 @@ __m512i test_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) { // CHECK: call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i1> %{{.*}}) return _mm512_mask_compress_epi8(__S, __U, __D); } -TEST_CONSTEXPR(match_v64qi(_mm512_mask_compress_epi8((__m512i)(__v64qs){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x8000000000000003ULL, (__m512i)(__v64qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 0, 1, 63, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); +TEST_CONSTEXPR(match_v64qi(_mm512_mask_compress_epi8((__m512i)(__v64qi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x8000000000000003ULL, (__m512i)(__v64qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 0, 1, 63, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); __m512i test_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) { // CHECK-LABEL: test_mm512_maskz_compress_epi8 // CHECK: call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i1> %{{.*}}) return _mm512_maskz_compress_epi8(__U, __D); } -TEST_CONSTEXPR(match_v64qi(_mm512_maskz_compress_epi8(0x8000000000000003ULL, (__m512i)(__v64qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 0, 1, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_compress_epi8(0x8000000000000003ULL, (__m512i)(__v64qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 0, 1, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); void test_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) { // CHECK-LABEL: test_mm512_mask_compressstoreu_epi16 @@ -56,24 +56,28 @@ __m512i test_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) { // CHECK: call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i1> %{{.*}}) return _mm512_mask_expand_epi16(__S, __U, __D); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_expand_epi16((__m512i)(__v32hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5838F3C4, (__m512i)(__v32hi){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} ), 99, 99, 1, 99, 99, 99, 2, 3, 4, 5, 99, 99, 6, 7, 8, 9, 99, 99, 99, 10, 11, 12, 99, 99, 99, 99, 99, 13, 14, 99, 15, 99)); __m512i test_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) { // CHECK-LABEL: test_mm512_maskz_expand_epi16 // CHECK: call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i1> %{{.*}}) return _mm512_maskz_expand_epi16(__U, __D); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_expand_epi16(0x5838F3C4, (__m512i)(__v32hi){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} ), 0, 0, 1, 0, 0, 0, 2, 3, 4, 5, 0, 0, 6, 7, 8, 9, 0, 0, 0, 10, 11, 12, 0, 0, 0, 0, 0, 13, 14, 0, 15, 0)); __m512i test_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) { // CHECK-LABEL: test_mm512_mask_expand_epi8 // CHECK: call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i1> %{{.*}}) return _mm512_mask_expand_epi8(__S, __U, __D); } +TEST_CONSTEXPR(match_v64qi(_mm512_mask_expand_epi8((__m512i)(__v64qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x416B5E0F4234A3D5, (__m512i)(__v64qi){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64} ), 1, 99, 2, 99, 3, 99, 4, 5, 6, 7, 99, 99, 99, 8, 99, 9, 99, 99, 10, 99, 11, 12, 99, 99, 99, 13, 99, 99, 99, 99, 14, 99, 15, 16, 17, 18, 99, 99, 99, 99, 99, 19, 20, 21, 22, 99, 23, 99, 24, 25, 99, 26, 99, 27, 28, 99, 29, 99, 99, 99, 99, 99, 30, 99)); __m512i test_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) { // CHECK-LABEL: test_mm512_maskz_expand_epi8 // CHECK: call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i1> %{{.*}}) return _mm512_maskz_expand_epi8(__U, __D); } +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_expand_epi8(0x416B5E0F4234A3D5, (__m512i)(__v64qi){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64} ), 1, 0, 2, 0, 3, 0, 4, 5, 6, 7, 0, 0, 0, 8, 0, 9, 0, 0, 10, 0, 11, 12, 0, 0, 0, 13, 0, 0, 0, 0, 14, 0, 15, 16, 17, 18, 0, 0, 0, 0, 0, 19, 20, 21, 22, 0, 23, 0, 24, 25, 0, 26, 0, 27, 28, 0, 29, 0, 0, 0, 0, 0, 30, 0)); __m512i test_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const* __P) { // CHECK-LABEL: test_mm512_mask_expandloadu_epi16 diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 5565e881371c0..4a685c5474dc2 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -4431,41 +4431,57 @@ __m128d test_mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A) { // CHECK: @llvm.x86.avx512.mask.expand return _mm_mask_expand_pd(__W,__U,__A); } +TEST_CONSTEXPR(match_m128d(_mm_mask_expand_pd((__m128d)(__v2df){ 99.0, 99.0}, 0x2, (__m128d)(__v2df){ 1.0, 2.0} ), 99.0, 1.0)); + __m128d test_mm_maskz_expand_pd(__mmask8 __U, __m128d __A) { // CHECK-LABEL: test_mm_maskz_expand_pd // CHECK: @llvm.x86.avx512.mask.expand return _mm_maskz_expand_pd(__U,__A); } +TEST_CONSTEXPR(match_m128d(_mm_maskz_expand_pd(0x2, (__m128d)(__v2df){ 1.0, 2.0} ), 0.0, 1.0)); + __m256d test_mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A) { // CHECK-LABEL: test_mm256_mask_expand_pd // CHECK: @llvm.x86.avx512.mask.expand return _mm256_mask_expand_pd(__W,__U,__A); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_expand_pd((__m256d)(__v4df){ 99.0, 99.0, 99.0, 99.0}, 0x8, (__m256d)(__v4df){ 1.0, 2.0, 3.0, 4.0} ), 99.0, 99.0, 99.0, 1.0)); + __m256d test_mm256_maskz_expand_pd(__mmask8 __U, __m256d __A) { // CHECK-LABEL: test_mm256_maskz_expand_pd // CHECK: @llvm.x86.avx512.mask.expand return _mm256_maskz_expand_pd(__U,__A); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_expand_pd(0x8, (__m256d)(__v4df){ 1.0, 2.0, 3.0, 4.0} ), 0.0, 0.0, 0.0, 1.0)); + __m128i test_mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_expand_epi64 // CHECK: @llvm.x86.avx512.mask.expand return _mm_mask_expand_epi64(__W,__U,__A); } +TEST_CONSTEXPR(match_v2di(_mm_mask_expand_epi64((__m128i)(__v2di){ 99, 99}, 0x2, (__m128i)(__v2di){ 1, 2} ), 99, 1)); + __m128i test_mm_maskz_expand_epi64(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_expand_epi64 // CHECK: @llvm.x86.avx512.mask.expand return _mm_maskz_expand_epi64(__U,__A); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_expand_epi64(0x2, (__m128i)(__v2di){ 1, 2} ), 0, 1)); + __m256i test_mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_expand_epi64 // CHECK: @llvm.x86.avx512.mask.expand return _mm256_mask_expand_epi64(__W,__U,__A); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_expand_epi64((__m256i)(__v4di){ 99, 99, 99, 99}, 0xB, (__m256i)(__v4di){ 1, 2, 3, 4} ), 1, 2, 99, 3)); + __m256i test_mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_expand_epi64 // CHECK: @llvm.x86.avx512.mask.expand return _mm256_maskz_expand_epi64(__U,__A); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_expand_epi64(0xB, (__m256i)(__v4di){ 1, 2, 3, 4} ), 1, 2, 0, 3)); + __m128d test_mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P) { // CHECK-LABEL: test_mm_mask_expandloadu_pd // CHECK: @llvm.masked.expandload.v2f64(ptr %{{.*}}, <2 x i1> %{{.*}}, <2 x double> %{{.*}}) @@ -4551,41 +4567,57 @@ __m128 test_mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A) { // CHECK: @llvm.x86.avx512.mask.expand return _mm_mask_expand_ps(__W,__U,__A); } +TEST_CONSTEXPR(match_m128(_mm_mask_expand_ps((__m128)(__v4sf){ 99.0f, 99.0f, 99.0f, 99.0f}, 0xC, (__m128)(__v4sf){ 1.0f, 2.0f, 3.0f, 4.0f} ), 99.0f, 99.0f, 1.0f, 2.0f)); + __m128 test_mm_maskz_expand_ps(__mmask8 __U, __m128 __A) { // CHECK-LABEL: test_mm_maskz_expand_ps // CHECK: @llvm.x86.avx512.mask.expand return _mm_maskz_expand_ps(__U,__A); } +TEST_CONSTEXPR(match_m128(_mm_maskz_expand_ps(0xC, (__m128)(__v4sf){ 1.0f, 2.0f, 3.0f, 4.0f} ), 0.0f, 0.0f, 1.0f, 2.0f)); + __m256 test_mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: test_mm256_mask_expand_ps // CHECK: @llvm.x86.avx512.mask.expand return _mm256_mask_expand_ps(__W,__U,__A); } +TEST_CONSTEXPR(match_m256(_mm256_mask_expand_ps((__m256)(__v8sf){ 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f}, 0x6C, (__m256)(__v8sf){ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f} ), 99.0f, 99.0f, 1.0f, 2.0f, 99.0f, 3.0f, 4.0f, 99.0f)); + __m256 test_mm256_maskz_expand_ps(__mmask8 __U, __m256 __A) { // CHECK-LABEL: test_mm256_maskz_expand_ps // CHECK: @llvm.x86.avx512.mask.expand return _mm256_maskz_expand_ps(__U,__A); } +TEST_CONSTEXPR(match_m256(_mm256_maskz_expand_ps(0x6C, (__m256)(__v8sf){ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f} ), 0.0f, 0.0f, 1.0f, 2.0f, 0.0f, 3.0f, 4.0f, 0.0f)); + __m128i test_mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_expand_epi32 // CHECK: @llvm.x86.avx512.mask.expand return _mm_mask_expand_epi32(__W,__U,__A); } +TEST_CONSTEXPR(match_v4si(_mm_mask_expand_epi32((__m128i)(__v4si){ 99, 99, 99, 99}, 0x2, (__m128i)(__v4si){ 1, 2, 3, 4} ), 99, 1, 99, 99)); + __m128i test_mm_maskz_expand_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_expand_epi32 // CHECK: @llvm.x86.avx512.mask.expand return _mm_maskz_expand_epi32(__U,__A); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_expand_epi32(0x2, (__m128i)(__v4si){ 1, 2, 3, 4} ), 0, 1, 0, 0)); + __m256i test_mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_expand_epi32 // CHECK: @llvm.x86.avx512.mask.expand return _mm256_mask_expand_epi32(__W,__U,__A); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_expand_epi32((__m256i)(__v8si){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x87, (__m256i)(__v8si){ 1, 2, 3, 4, 5, 6, 7, 8} ), 1, 2, 3, 99, 99, 99, 99, 4)); + __m256i test_mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_expand_epi32 // CHECK: @llvm.x86.avx512.mask.expand return _mm256_maskz_expand_epi32(__U,__A); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_expand_epi32(0x87, (__m256i)(__v8si){ 1, 2, 3, 4, 5, 6, 7, 8} ), 1, 2, 3, 0, 0, 0, 0, 4)); + __m128d test_mm_getexp_pd(__m128d __A) { // CHECK-LABEL: test_mm_getexp_pd // CHECK: @llvm.x86.avx512.mask.getexp.pd.128 diff --git a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c index 2296e1b062b7a..94b16a9bd3541 100644 --- a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c @@ -30,14 +30,14 @@ __m128i test_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) { // CHECK: call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i1> %{{.*}}) return _mm_mask_compress_epi8(__S, __U, __D); } -TEST_CONSTEXPR(match_v16qi(_mm_mask_compress_epi8((__m128i)(__v16qs){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x8003, (__m128i)(__v16qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 15, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); +TEST_CONSTEXPR(match_v16qi(_mm_mask_compress_epi8((__m128i)(__v16qi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x8003, (__m128i)(__v16qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 15, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); __m128i test_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) { // CHECK-LABEL: test_mm_maskz_compress_epi8 // CHECK: call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i1> %{{.*}}) return _mm_maskz_compress_epi8(__U, __D); } -TEST_CONSTEXPR(match_v16qi(_mm_maskz_compress_epi8(0x8003, (__m128i)(__v16qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16qi(_mm_maskz_compress_epi8(0x8003, (__m128i)(__v16qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); void test_mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) { // CHECK-LABEL: test_mm_mask_compressstoreu_epi16 @@ -56,24 +56,28 @@ __m128i test_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) { // CHECK: call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i1> %{{.*}}) return _mm_mask_expand_epi16(__S, __U, __D); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_expand_epi16((__m128i)(__v8hi){ 99, 99, 99, 99, 99, 99, 99, 99}, 0xB7, (__m128i)(__v8hi){ 1, 2, 3, 4, 5, 6, 7, 8} ), 1, 2, 3, 99, 4, 5, 99, 6)); __m128i test_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) { // CHECK-LABEL: test_mm_maskz_expand_epi16 // CHECK: call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i1> %{{.*}}) return _mm_maskz_expand_epi16(__U, __D); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_expand_epi16(0xB7, (__m128i)(__v8hi){ 1, 2, 3, 4, 5, 6, 7, 8} ), 1, 2, 3, 0, 4, 5, 0, 6)); __m128i test_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) { // CHECK-LABEL: test_mm_mask_expand_epi8 // CHECK: call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i1> %{{.*}}) return _mm_mask_expand_epi8(__S, __U, __D); } +TEST_CONSTEXPR(match_v16qi(_mm_mask_expand_epi8((__m128i)(__v16qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x4A9C, (__m128i)(__v16qi){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} ), 99, 99, 1, 2, 3, 99, 99, 4, 99, 5, 99, 6, 99, 99, 7, 99)); __m128i test_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) { // CHECK-LABEL: test_mm_maskz_expand_epi8 // CHECK: call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i1> %{{.*}}) return _mm_maskz_expand_epi8(__U, __D); } +TEST_CONSTEXPR(match_v16qi(_mm_maskz_expand_epi8(0x4A9C, (__m128i)(__v16qi){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} ), 0, 0, 1, 2, 3, 0, 0, 4, 0, 5, 0, 6, 0, 0, 7, 0)); __m128i test_mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const* __P) { // CHECK-LABEL: test_mm_mask_expandloadu_epi16 @@ -118,14 +122,14 @@ __m256i test_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) { // CHECK: call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i1> %{{.*}}) return _mm256_mask_compress_epi8(__S, __U, __D); } -TEST_CONSTEXPR(match_v32qi(_mm256_mask_compress_epi8((__m256i)(__v32qs){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x80000003, (__m256i)(__v32qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 31, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); +TEST_CONSTEXPR(match_v32qi(_mm256_mask_compress_epi8((__m256i)(__v32qi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x80000003, (__m256i)(__v32qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 31, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); __m256i test_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) { // CHECK-LABEL: test_mm256_maskz_compress_epi8 // CHECK: call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i1> %{{.*}}) return _mm256_maskz_compress_epi8(__U, __D); } -TEST_CONSTEXPR(match_v32qi(_mm256_maskz_compress_epi8(0x80000003, (__m256i)(__v32qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_compress_epi8(0x80000003, (__m256i)(__v32qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); void test_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) { // CHECK-LABEL: test_mm256_mask_compressstoreu_epi16 @@ -144,24 +148,28 @@ __m256i test_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) { // CHECK: call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i1> %{{.*}}) return _mm256_mask_expand_epi16(__S, __U, __D); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_expand_epi16((__m256i)(__v16hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xE2EB, (__m256i)(__v16hi){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} ), 1, 2, 99, 3, 99, 4, 5, 6, 99, 7, 99, 99, 99, 8, 9, 10)); __m256i test_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) { // CHECK-LABEL: test_mm256_maskz_expand_epi16 // CHECK: call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i1> %{{.*}}) return _mm256_maskz_expand_epi16(__U, __D); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_expand_epi16(0xE2EB, (__m256i)(__v16hi){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} ), 1, 2, 0, 3, 0, 4, 5, 6, 0, 7, 0, 0, 0, 8, 9, 10)); __m256i test_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) { // CHECK-LABEL: test_mm256_mask_expand_epi8 // CHECK: call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i1> %{{.*}}) return _mm256_mask_expand_epi8(__S, __U, __D); } +TEST_CONSTEXPR(match_v32qi(_mm256_mask_expand_epi8((__m256i)(__v32qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x134DA768, (__m256i)(__v32qi){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} ), 99, 99, 99, 1, 99, 2, 3, 99, 4, 5, 6, 99, 99, 7, 99, 8, 9, 99, 10, 11, 99, 99, 12, 99, 13, 14, 99, 99, 15, 99, 99, 99)); __m256i test_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) { // CHECK-LABEL: test_mm256_maskz_expand_epi8 // CHECK: call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i1> %{{.*}}) return _mm256_maskz_expand_epi8(__U, __D); } +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_expand_epi8(0x134DA768, (__m256i)(__v32qi){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} ), 0, 0, 0, 1, 0, 2, 3, 0, 4, 5, 6, 0, 0, 7, 0, 8, 9, 0, 10, 11, 0, 0, 12, 0, 13, 14, 0, 0, 15, 0, 0, 0)); __m256i test_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const* __P) { // CHECK-LABEL: test_mm256_mask_expandloadu_epi16 >From 7506b7fb21f536bd7adf6c2563ee7fc16f157d38 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <[email protected]> Date: Mon, 23 Mar 2026 07:54:26 +0000 Subject: [PATCH 2/2] unused NumElems variable --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index ddc1c2b03bd7b..fa4eb2077e44e 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -5175,12 +5175,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_expandsi128_mask: case X86::BI__builtin_ia32_expandsi256_mask: case X86::BI__builtin_ia32_expandsi512_mask: { - unsigned NumElems = - Call->getArg(0)->getType()->castAs<VectorType>()->getNumElements(); return interp__builtin_ia32_shuffle_generic( - S, OpPC, Call, [NumElems](unsigned DstIdx, const APInt &ShuffleMask) { + S, OpPC, Call, [](unsigned DstIdx, const APInt &ShuffleMask) { // Trunc to the sub-mask for the dst index and count the number of - // src elements used prior to that. + // src elements used prior to that. APInt ExpandMask = ShuffleMask.trunc(DstIdx + 1); if (ExpandMask[DstIdx]) { int SrcIdx = ExpandMask.popcount() - 1; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
