https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/152705
>From 9e77ca23a4e075db6896bdd66c1a5877ac90133c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <llvm-...@redking.me.uk> Date: Fri, 8 Aug 2025 13:47:58 +0100 Subject: [PATCH] [Clang][X86] Add avx512 __builtin_ia32_select* constexpr handling Fixes #152321 --- clang/include/clang/Basic/BuiltinsX86.td | 48 +++++++++---------- clang/lib/AST/ExprConstant.cpp | 45 +++++++++++++++++ clang/lib/Headers/avx512fintrin.h | 4 +- clang/lib/Headers/avx512vpopcntdqintrin.h | 16 +++---- clang/lib/Headers/avx512vpopcntdqvlintrin.h | 28 ++++++----- clang/test/CodeGen/X86/avx512f-builtins.c | 3 ++ .../CodeGen/X86/avx512vpopcntdq-builtins.c | 4 ++ .../CodeGen/X86/avx512vpopcntdqvl-builtins.c | 8 ++++ 8 files changed, 109 insertions(+), 47 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index a4acc72fdc37d..492be89a0fe89 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -4246,99 +4246,99 @@ let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVecto def vfcmulcph512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectb_128 : X86Builtin<"_Vector<16, char>(unsigned short, _Vector<16, char>, _Vector<16, char>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectb_256 : X86Builtin<"_Vector<32, char>(unsigned int, _Vector<32, char>, _Vector<32, char>)">; } -let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectb_512 : X86Builtin<"_Vector<64, char>(unsigned long long int, _Vector<64, char>, _Vector<64, char>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectw_128 : X86Builtin<"_Vector<8, short>(unsigned char, _Vector<8, short>, _Vector<8, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectw_256 : X86Builtin<"_Vector<16, short>(unsigned short, _Vector<16, short>, _Vector<16, short>)">; } -let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectw_512 : X86Builtin<"_Vector<32, short>(unsigned int, _Vector<32, short>, _Vector<32, short>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectd_128 : X86Builtin<"_Vector<4, int>(unsigned char, _Vector<4, int>, _Vector<4, int>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectd_256 : X86Builtin<"_Vector<8, int>(unsigned char, _Vector<8, int>, _Vector<8, int>)">; } -let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectd_512 : X86Builtin<"_Vector<16, int>(unsigned short, _Vector<16, int>, _Vector<16, int>)">; } -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectph_128 : X86Builtin<"_Vector<8, _Float16>(unsigned char, _Vector<8, _Float16>, _Vector<8, _Float16>)">; } -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectph_256 : X86Builtin<"_Vector<16, _Float16>(unsigned short, _Vector<16, _Float16>, _Vector<16, _Float16>)">; } -let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectph_512 : X86Builtin<"_Vector<32, _Float16>(unsigned int, _Vector<32, _Float16>, _Vector<32, _Float16>)">; } -let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectpbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">; } -let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectpbf_256 : X86Builtin<"_Vector<16, __bf16>(unsigned short, _Vector<16, __bf16>, _Vector<16, __bf16>)">; } -let Features = "avx512bf16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bf16,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectpbf_512 : X86Builtin<"_Vector<32, __bf16>(unsigned int, _Vector<32, __bf16>, _Vector<32, __bf16>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectq_128 : X86Builtin<"_Vector<2, long long int>(unsigned char, _Vector<2, long long int>, _Vector<2, long long int>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectq_256 : X86Builtin<"_Vector<4, long long int>(unsigned char, _Vector<4, long long int>, _Vector<4, long long int>)">; } -let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectq_512 : X86Builtin<"_Vector<8, long long int>(unsigned char, _Vector<8, long long int>, _Vector<8, long long int>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectps_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectps_256 : X86Builtin<"_Vector<8, float>(unsigned char, _Vector<8, float>, _Vector<8, float>)">; } -let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectps_512 : X86Builtin<"_Vector<16, float>(unsigned short, _Vector<16, float>, _Vector<16, float>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectpd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectpd_256 : X86Builtin<"_Vector<4, double>(unsigned char, _Vector<4, double>, _Vector<4, double>)">; } -let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectpd_512 : X86Builtin<"_Vector<8, double>(unsigned char, _Vector<8, double>, _Vector<8, double>)">; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3679327da7b0c..c6d81bccf42e1 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11658,6 +11658,51 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_selectb_128: + case X86::BI__builtin_ia32_selectb_256: + case X86::BI__builtin_ia32_selectb_512: + case X86::BI__builtin_ia32_selectw_128: + case X86::BI__builtin_ia32_selectw_256: + case X86::BI__builtin_ia32_selectw_512: + case X86::BI__builtin_ia32_selectd_128: + case X86::BI__builtin_ia32_selectd_256: + case X86::BI__builtin_ia32_selectd_512: + case X86::BI__builtin_ia32_selectq_128: + case X86::BI__builtin_ia32_selectq_256: + case X86::BI__builtin_ia32_selectq_512: + case X86::BI__builtin_ia32_selectph_128: + case X86::BI__builtin_ia32_selectph_256: + case X86::BI__builtin_ia32_selectph_512: + case X86::BI__builtin_ia32_selectpbf_128: + case X86::BI__builtin_ia32_selectpbf_256: + case X86::BI__builtin_ia32_selectpbf_512: + case X86::BI__builtin_ia32_selectps_128: + case X86::BI__builtin_ia32_selectps_256: + case X86::BI__builtin_ia32_selectps_512: + case X86::BI__builtin_ia32_selectpd_128: + case X86::BI__builtin_ia32_selectpd_256: + case X86::BI__builtin_ia32_selectpd_512: { + // AVX512 predicated move: "Result = Mask[] ? LHS[] : RHS[]". + APValue SourceMask, SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceMask) || + !EvaluateAsRValue(Info, E->getArg(1), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(2), SourceRHS)) + return false; + + APSInt Mask = SourceMask.getInt(); + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + unsigned SourceLen = SourceLHS.getVectorLength(); + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(SourceLen); + + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + const APValue &LHS = SourceLHS.getVectorElt(EltNum); + const APValue &RHS = SourceRHS.getVectorElt(EltNum); + ResultElements.push_back(Mask[EltNum] ? LHS : RHS); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } } } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index e3bc71ed8e918..c1f61e76ed0e2 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8962,7 +8962,7 @@ _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, @@ -8970,7 +8970,7 @@ _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) (__v8df) __W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, diff --git a/clang/lib/Headers/avx512vpopcntdqintrin.h b/clang/lib/Headers/avx512vpopcntdqintrin.h index 79fc6e140c616..7bf0700da77c9 100644 --- a/clang/lib/Headers/avx512vpopcntdqintrin.h +++ b/clang/lib/Headers/avx512vpopcntdqintrin.h @@ -16,18 +16,19 @@ #define __AVX512VPOPCNTDQINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vpopcntdq,evex512"), \ - __min_vector_width__(512))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr + __min_vector_width__(512))) constexpr #else -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vpopcntdq,evex512"), \ + __min_vector_width__(512))) #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v8du)__A); } @@ -43,7 +44,7 @@ _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v16su)__A); } @@ -60,6 +61,5 @@ _mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) { } #undef __DEFAULT_FN_ATTRS -#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512vpopcntdqvlintrin.h b/clang/lib/Headers/avx512vpopcntdqvlintrin.h index d14cb1eb31f14..deb849f950357 100644 --- a/clang/lib/Headers/avx512vpopcntdqvlintrin.h +++ b/clang/lib/Headers/avx512vpopcntdqvlintrin.h @@ -16,6 +16,17 @@ #define __AVX512VPOPCNTDQVLINTRIN_H /* Define the default attributes for the functions in this file. */ + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ + __min_vector_width__(128))) constexpr +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ + __min_vector_width__(256))) constexpr +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ @@ -24,16 +35,9 @@ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ __min_vector_width__(256))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr -#else -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 #endif -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi64(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v2du)__A); } @@ -49,7 +53,7 @@ _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi32(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v4su)__A); } @@ -65,7 +69,7 @@ _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi64(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v4du)__A); } @@ -81,7 +85,7 @@ _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi32(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v8su)__A); } @@ -99,7 +103,5 @@ _mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 -#undef __DEFAULT_FN_ATTRS128_CONSTEXPR -#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index d59799e2c24e5..f939b5ac01a8c 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8905,17 +8905,20 @@ __m512d test_mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_cvtps_pd(__U, __A); } + __m512d test_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_mask_mov_pd // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_mov_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_mov_pd((__m512d){-8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0}, 0xC3, (__m512d){+1.0, +2.0, +3.0, +4.0, +5.0, +6.0, +7.0, +8.0}), +1.0, +2.0, -6.0, -5.0, -4.0, -3.0, +7.0, +8.0)); __m512d test_mm512_maskz_mov_pd(__mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_maskz_mov_pd // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_mov_pd(__U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_mov_pd(0xC3, (__m512d){+1.0, +2.0, +3.0, +4.0, +5.0, +6.0, +7.0, +8.0}), +1.0, +2.0, +0.0, +0.0, +0.0, +0.0, +7.0, +8.0)); __m512 test_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) { // CHECK-LABEL: test_mm512_mask_mov_ps diff --git a/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c b/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c index 8927ae273d29b..b80ffdf0dcc7c 100644 --- a/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c +++ b/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c @@ -19,6 +19,7 @@ __m512i test_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_popcnt_epi64(__W, __U, __A); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_popcnt_epi64(_mm512_set1_epi64(-1), 0x81, (__m512i)(__v8di){+5, -3, -10, +8, 0, -256, +256, -128}), 2, -1, -1, -1, -1, -1, -1, 57)); __m512i test_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_popcnt_epi64 @@ -26,6 +27,7 @@ __m512i test_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_popcnt_epi64(__U, __A); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_popcnt_epi64(0x42, (__m512i)(__v8di){+5, -3, -10, +8, 0, -256, +256, -128}), 0, 63, 0, 0, 0, 0, 1, 0)); __m512i test_mm512_popcnt_epi32(__m512i __A) { // CHECK-LABEL: test_mm512_popcnt_epi32 @@ -40,6 +42,7 @@ __m512i test_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_popcnt_epi32(__W, __U, __A); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_popcnt_epi32(_mm512_set1_epi32(-1), 0x0F81, (__m512i)(__v16si){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 2, -1, -1, -1, -1, -1, -1, 25, 2, 2, 4, 2, -1, -1, -1, -1)); __m512i test_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_popcnt_epi32 @@ -47,3 +50,4 @@ __m512i test_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_popcnt_epi32(__U, __A); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_popcnt_epi32(0xF042, (__m512i)(__v16si){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 0, 31, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 6, 2, 9, 2)); diff --git a/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c b/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c index d9fbd7628142f..7258034c57adc 100644 --- a/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c @@ -19,6 +19,7 @@ __m128i test_mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> %{{.+}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_popcnt_epi64(__W, __U, __A); } +TEST_CONSTEXPR(match_v2di(_mm_mask_popcnt_epi64(_mm_set1_epi64x(-1), 0x2, (__m128i)(__v2di){+5, -3}), -1, 63)); __m128i test_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_popcnt_epi64 @@ -26,6 +27,7 @@ __m128i test_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> %{{.+}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_popcnt_epi64(__U, __A); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_popcnt_epi64(0x1, (__m128i)(__v2di){+5, -3}), 2, 0)); __m128i test_mm_popcnt_epi32(__m128i __A) { // CHECK-LABEL: test_mm_popcnt_epi32 @@ -40,6 +42,7 @@ __m128i test_mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.+}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_popcnt_epi32(__W, __U, __A); } +TEST_CONSTEXPR(match_v4si(_mm_mask_popcnt_epi32(_mm_set1_epi32(-1), 0x3, (__m128i)(__v4si){+5, -3, -10, +8}), 2, 31, -1, -1)); __m128i test_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_popcnt_epi32 @@ -47,6 +50,7 @@ __m128i test_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.+}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_popcnt_epi32(__U, __A); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_popcnt_epi32(0x5, (__m128i)(__v4si){+5, -3, -10, +8}), 2, 0, 30, 0)); __m256i test_mm256_popcnt_epi64(__m256i __A) { // CHECK-LABEL: test_mm256_popcnt_epi64 @@ -61,6 +65,7 @@ __m256i test_mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.+}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_popcnt_epi64(__W, __U, __A); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_popcnt_epi64(_mm256_set1_epi64x(-1), 0x3, (__m256i)(__v4di){+5, -3, -10, +8}), 2, 63, -1, -1)); __m256i test_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_popcnt_epi64 @@ -68,6 +73,7 @@ __m256i test_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.+}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_popcnt_epi64(__U, __A); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_popcnt_epi64(0x5, (__m256i)(__v4di){+5, -3, -10, +8}), 2, 0, 62, 0)); __m256i test_mm256_popcnt_epi32(__m256i __A) { // CHECK-LABEL: test_mm256_popcnt_epi32 @@ -82,6 +88,7 @@ __m256i test_mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.+}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_popcnt_epi32(__W, __U, __A); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_popcnt_epi32(_mm256_set1_epi32(-1), 0x37, (__m256i)(__v8si){+5, -3, -10, +8, 0, -256, +256, -128}), 2, 31, 30, -1, 0, 24, -1, -1)); __m256i test_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_popcnt_epi32 @@ -89,3 +96,4 @@ __m256i test_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.+}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_popcnt_epi32(__U, __A); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_popcnt_epi32(0x8C, (__m256i)(__v8si){+5, -3, -10, +8, 0, -256, +256, -128}), 0, 0, 30, 1, 0, 0, 0, 25)); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits