https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/152705
>From ceff5091174b9565818d1bb4d420e1f9fd7c843f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <llvm-...@redking.me.uk> Date: Fri, 8 Aug 2025 13:47:58 +0100 Subject: [PATCH 1/2] [Clang][X86] Add avx512 __builtin_ia32_select* constexpr handling Fixes #152321 --- clang/include/clang/Basic/BuiltinsX86.td | 48 +++++++++---------- clang/lib/AST/ExprConstant.cpp | 45 +++++++++++++++++ clang/lib/Headers/avx512bitalgintrin.h | 8 ++-- clang/lib/Headers/avx512fintrin.h | 8 ++-- clang/lib/Headers/avx512vlbitalgintrin.h | 16 +++---- clang/lib/Headers/avx512vlintrin.h | 27 +++++++---- clang/lib/Headers/avx512vpopcntdqintrin.h | 16 +++---- clang/lib/Headers/avx512vpopcntdqvlintrin.h | 28 ++++++----- .../test/CodeGen/X86/avx512bitalg-builtins.c | 6 +++ clang/test/CodeGen/X86/avx512f-builtins.c | 5 ++ clang/test/CodeGen/X86/avx512vl-builtins.c | 9 ++++ .../CodeGen/X86/avx512vlbitalg-builtins.c | 12 +++++ .../CodeGen/X86/avx512vpopcntdq-builtins.c | 4 ++ .../CodeGen/X86/avx512vpopcntdqvl-builtins.c | 8 ++++ 14 files changed, 170 insertions(+), 70 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index fc1ee3be7889f..3355217a05156 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -4233,99 +4233,99 @@ let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVecto def vfcmulcph512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectb_128 : X86Builtin<"_Vector<16, char>(unsigned short, _Vector<16, char>, _Vector<16, char>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectb_256 : X86Builtin<"_Vector<32, char>(unsigned int, _Vector<32, char>, _Vector<32, char>)">; } -let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectb_512 : X86Builtin<"_Vector<64, char>(unsigned long long int, _Vector<64, char>, _Vector<64, char>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectw_128 : X86Builtin<"_Vector<8, short>(unsigned char, _Vector<8, short>, _Vector<8, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectw_256 : X86Builtin<"_Vector<16, short>(unsigned short, _Vector<16, short>, _Vector<16, short>)">; } -let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectw_512 : X86Builtin<"_Vector<32, short>(unsigned int, _Vector<32, short>, _Vector<32, short>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectd_128 : X86Builtin<"_Vector<4, int>(unsigned char, _Vector<4, int>, _Vector<4, int>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectd_256 : X86Builtin<"_Vector<8, int>(unsigned char, _Vector<8, int>, _Vector<8, int>)">; } -let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectd_512 : X86Builtin<"_Vector<16, int>(unsigned short, _Vector<16, int>, _Vector<16, int>)">; } -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectph_128 : X86Builtin<"_Vector<8, _Float16>(unsigned char, _Vector<8, _Float16>, _Vector<8, _Float16>)">; } -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectph_256 : X86Builtin<"_Vector<16, _Float16>(unsigned short, _Vector<16, _Float16>, _Vector<16, _Float16>)">; } -let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectph_512 : X86Builtin<"_Vector<32, _Float16>(unsigned int, _Vector<32, _Float16>, _Vector<32, _Float16>)">; } -let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectpbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">; } -let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectpbf_256 : X86Builtin<"_Vector<16, __bf16>(unsigned short, _Vector<16, __bf16>, _Vector<16, __bf16>)">; } -let Features = "avx512bf16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bf16,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectpbf_512 : X86Builtin<"_Vector<32, __bf16>(unsigned int, _Vector<32, __bf16>, _Vector<32, __bf16>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectq_128 : X86Builtin<"_Vector<2, long long int>(unsigned char, _Vector<2, long long int>, _Vector<2, long long int>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectq_256 : X86Builtin<"_Vector<4, long long int>(unsigned char, _Vector<4, long long int>, _Vector<4, long long int>)">; } -let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectq_512 : X86Builtin<"_Vector<8, long long int>(unsigned char, _Vector<8, long long int>, _Vector<8, long long int>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectps_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectps_256 : X86Builtin<"_Vector<8, float>(unsigned char, _Vector<8, float>, _Vector<8, float>)">; } -let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectps_512 : X86Builtin<"_Vector<16, float>(unsigned short, _Vector<16, float>, _Vector<16, float>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectpd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def selectpd_256 : X86Builtin<"_Vector<4, double>(unsigned char, _Vector<4, double>, _Vector<4, double>)">; } -let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def selectpd_512 : X86Builtin<"_Vector<8, double>(unsigned char, _Vector<8, double>, _Vector<8, double>)">; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 36dd0f5d7a065..3a03b9544d79f 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11723,6 +11723,51 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_selectb_128: + case X86::BI__builtin_ia32_selectb_256: + case X86::BI__builtin_ia32_selectb_512: + case X86::BI__builtin_ia32_selectw_128: + case X86::BI__builtin_ia32_selectw_256: + case X86::BI__builtin_ia32_selectw_512: + case X86::BI__builtin_ia32_selectd_128: + case X86::BI__builtin_ia32_selectd_256: + case X86::BI__builtin_ia32_selectd_512: + case X86::BI__builtin_ia32_selectq_128: + case X86::BI__builtin_ia32_selectq_256: + case X86::BI__builtin_ia32_selectq_512: + case X86::BI__builtin_ia32_selectph_128: + case X86::BI__builtin_ia32_selectph_256: + case X86::BI__builtin_ia32_selectph_512: + case X86::BI__builtin_ia32_selectpbf_128: + case X86::BI__builtin_ia32_selectpbf_256: + case X86::BI__builtin_ia32_selectpbf_512: + case X86::BI__builtin_ia32_selectps_128: + case X86::BI__builtin_ia32_selectps_256: + case X86::BI__builtin_ia32_selectps_512: + case X86::BI__builtin_ia32_selectpd_128: + case X86::BI__builtin_ia32_selectpd_256: + case X86::BI__builtin_ia32_selectpd_512: { + // AVX512 predicated move: "Result = Mask[] ? LHS[] : RHS[]". + APValue SourceMask, SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceMask) || + !EvaluateAsRValue(Info, E->getArg(1), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(2), SourceRHS)) + return false; + + APSInt Mask = SourceMask.getInt(); + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + unsigned SourceLen = SourceLHS.getVectorLength(); + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(SourceLen); + + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + const APValue &LHS = SourceLHS.getVectorElt(EltNum); + const APValue &RHS = SourceRHS.getVectorElt(EltNum); + ResultElements.push_back(Mask[EltNum] ? LHS : RHS); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } } } diff --git a/clang/lib/Headers/avx512bitalgintrin.h b/clang/lib/Headers/avx512bitalgintrin.h index 9a1ff8f39734f..76c1a158b223f 100644 --- a/clang/lib/Headers/avx512bitalgintrin.h +++ b/clang/lib/Headers/avx512bitalgintrin.h @@ -32,7 +32,7 @@ _mm512_popcnt_epi16(__m512i __A) return (__m512i)__builtin_elementwise_popcount((__v32hu)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) { return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U, @@ -40,7 +40,7 @@ _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) (__v32hi) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) { return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(), @@ -54,7 +54,7 @@ _mm512_popcnt_epi8(__m512i __A) return (__m512i)__builtin_elementwise_popcount((__v64qu)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) { return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U, @@ -62,7 +62,7 @@ _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) (__v64qi) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) { return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(), diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 9fc1df3acd3d0..73a915efd516f 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8947,7 +8947,7 @@ _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, @@ -8955,7 +8955,7 @@ _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) (__v8df) __W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, @@ -8963,7 +8963,7 @@ _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) (__v8df) _mm512_setzero_pd ()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, @@ -8971,7 +8971,7 @@ _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) (__v16sf) __W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h b/clang/lib/Headers/avx512vlbitalgintrin.h index 739e78aab753d..e29a149df5f90 100644 --- a/clang/lib/Headers/avx512vlbitalgintrin.h +++ b/clang/lib/Headers/avx512vlbitalgintrin.h @@ -38,7 +38,7 @@ _mm256_popcnt_epi16(__m256i __A) return (__m256i)__builtin_elementwise_popcount((__v16hu)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) { return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U, @@ -46,7 +46,7 @@ _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) (__v16hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) { return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(), @@ -60,7 +60,7 @@ _mm_popcnt_epi16(__m128i __A) return (__m128i)__builtin_elementwise_popcount((__v8hu)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, @@ -68,7 +68,7 @@ _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) (__v8hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) { return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), @@ -82,7 +82,7 @@ _mm256_popcnt_epi8(__m256i __A) return (__m256i)__builtin_elementwise_popcount((__v32qu)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) { return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U, @@ -90,7 +90,7 @@ _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) (__v32qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) { return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(), @@ -104,7 +104,7 @@ _mm_popcnt_epi8(__m128i __A) return (__m128i)__builtin_elementwise_popcount((__v16qu)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, @@ -112,7 +112,7 @@ _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) (__v16qi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) { return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index cbad39acad84f..fd1bd291ecbcb 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -23,6 +23,14 @@ __target__("avx512vl,no-evex512"), \ __min_vector_width__(256))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + typedef short __v2hi __attribute__((__vector_size__(4))); typedef char __v4qi __attribute__((__vector_size__(4))); typedef char __v2qi __attribute__((__vector_size__(2))); @@ -8258,7 +8266,7 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) (__v4si)_mm_shuffle_epi32((A), (I)), \ (__v4si)_mm_setzero_si128())) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, @@ -8266,7 +8274,7 @@ _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) (__v2df) __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, @@ -8274,7 +8282,7 @@ _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) (__v2df) _mm_setzero_pd ()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, @@ -8282,7 +8290,7 @@ _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) (__v4df) __W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, @@ -8290,7 +8298,7 @@ _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) (__v4df) _mm256_setzero_pd ()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, @@ -8298,7 +8306,7 @@ _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) (__v4sf) __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, @@ -8306,7 +8314,7 @@ _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) (__v4sf) _mm_setzero_ps ()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, @@ -8314,7 +8322,7 @@ _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) (__v8sf) __W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, @@ -8382,8 +8390,9 @@ _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) #define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph #define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph - #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif /* __AVX512VLINTRIN_H */ diff --git a/clang/lib/Headers/avx512vpopcntdqintrin.h b/clang/lib/Headers/avx512vpopcntdqintrin.h index 79fc6e140c616..7bf0700da77c9 100644 --- a/clang/lib/Headers/avx512vpopcntdqintrin.h +++ b/clang/lib/Headers/avx512vpopcntdqintrin.h @@ -16,18 +16,19 @@ #define __AVX512VPOPCNTDQINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vpopcntdq,evex512"), \ - __min_vector_width__(512))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr + __min_vector_width__(512))) constexpr #else -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vpopcntdq,evex512"), \ + __min_vector_width__(512))) #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v8du)__A); } @@ -43,7 +44,7 @@ _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v16su)__A); } @@ -60,6 +61,5 @@ _mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) { } #undef __DEFAULT_FN_ATTRS -#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512vpopcntdqvlintrin.h b/clang/lib/Headers/avx512vpopcntdqvlintrin.h index d14cb1eb31f14..deb849f950357 100644 --- a/clang/lib/Headers/avx512vpopcntdqvlintrin.h +++ b/clang/lib/Headers/avx512vpopcntdqvlintrin.h @@ -16,6 +16,17 @@ #define __AVX512VPOPCNTDQVLINTRIN_H /* Define the default attributes for the functions in this file. */ + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ + __min_vector_width__(128))) constexpr +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ + __min_vector_width__(256))) constexpr +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ @@ -24,16 +35,9 @@ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ __min_vector_width__(256))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr -#else -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 #endif -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi64(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v2du)__A); } @@ -49,7 +53,7 @@ _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi32(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v4su)__A); } @@ -65,7 +69,7 @@ _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi64(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v4du)__A); } @@ -81,7 +85,7 @@ _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi32(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v8su)__A); } @@ -99,7 +103,5 @@ _mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 -#undef __DEFAULT_FN_ATTRS128_CONSTEXPR -#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif diff --git a/clang/test/CodeGen/X86/avx512bitalg-builtins.c b/clang/test/CodeGen/X86/avx512bitalg-builtins.c index 30d364a283641..8092f2d30214a 100644 --- a/clang/test/CodeGen/X86/avx512bitalg-builtins.c +++ b/clang/test/CodeGen/X86/avx512bitalg-builtins.c @@ -19,12 +19,15 @@ __m512i test_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) { // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_popcnt_epi16(__A, __U, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_popcnt_epi16(_mm512_set1_epi16(-1), 0xF0F0F0F0, (__m512i)(__v32hi){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025, +5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), -1, -1, -1, -1, 0, 8, 1, 9, -1, -1, -1, -1, 6, 2, 9, 2, -1, -1, -1, -1, 0, 8, 1, 9, -1, -1, -1, -1, 6, 2, 9, 2)); + __m512i test_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_popcnt_epi16 // CHECK: @llvm.ctpop.v32i16 // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_popcnt_epi16(__U, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_popcnt_epi16(0x0F0F0F0F, (__m512i)(__v32hi){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025, +5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 2, 15, 14, 1, 0, 0, 0, 0, 2, 2, 4, 2, 0, 0, 0, 0, 2, 15, 14, 1, 0, 0, 0, 0, 2, 2, 4, 2, 0, 0, 0, 0)); __m512i test_mm512_popcnt_epi8(__m512i __A) { // CHECK-LABEL: test_mm512_popcnt_epi8 @@ -39,12 +42,15 @@ __m512i test_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) { // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_mask_popcnt_epi8(__A, __U, __B); } +TEST_CONSTEXPR(match_v64qi(_mm512_mask_popcnt_epi8(_mm512_set1_epi8(-1), 0xF0F0F0F00F0F0F0FULL, (__m512i)(__v64qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 2, 7, 6, 1, -1, -1, -1, -1, 2, 2, 4, 2, -1, -1, -1, -1, 2, 7, 6, 1, -1, -1, -1, -1, 2, 2, 4, 2, -1, -1, -1, -1, -1, -1, -1, -1, 0, 4, 1, 4, -1, -1, -1, -1, 6, 2, 4, 3, -1, -1, -1, -1, 0, 4, 1, 4, -1, -1, -1, -1, 6, 2, 4, 3)); + __m512i test_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_popcnt_epi8 // CHECK: @llvm.ctpop.v64i8 // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_maskz_popcnt_epi8(__U, __B); } +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_popcnt_epi8(0x0F0F0F0FF0F0F0F0ULL, (__m512i)(__v64qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 0, 0, 0, 0, 0, 4, 1, 4, 0, 0, 0, 0, 6, 2, 4, 3, 0, 0, 0, 0, 0, 4, 1, 4, 0, 0, 0, 0, 6, 2, 4, 3, 2, 7, 6, 1, 0, 0, 0, 0, 2, 2, 4, 2, 0, 0, 0, 0, 2, 7, 6, 1, 0, 0, 0, 0, 2, 2, 4, 2, 0, 0, 0, 0)); __mmask64 test_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_bitshuffle_epi64_mask diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 048bc3057b5f4..2767f779e9411 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8941,29 +8941,34 @@ __m512d test_mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_cvtps_pd(__U, __A); } + __m512d test_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_mask_mov_pd // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_mov_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_mov_pd((__m512d){-8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0}, 0xC3, (__m512d){+1.0, +2.0, +3.0, +4.0, +5.0, +6.0, +7.0, +8.0}), +1.0, +2.0, -6.0, -5.0, -4.0, -3.0, +7.0, +8.0)); __m512d test_mm512_maskz_mov_pd(__mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_maskz_mov_pd // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_mov_pd(__U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_mov_pd(0xC3, (__m512d){+1.0, +2.0, +3.0, +4.0, +5.0, +6.0, +7.0, +8.0}), +1.0, +2.0, +0.0, +0.0, +0.0, +0.0, +7.0, +8.0)); __m512 test_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) { // CHECK-LABEL: test_mm512_mask_mov_ps // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_mov_ps(__W, __U, __A); } +TEST_CONSTEXPR(match_m512(_mm512_mask_mov_ps((__m512){-16.0f, -15.0f, -14.0f, -13.0f, -12.0f, -11.0f, -10.0f, -9.0f, -8.0f, -7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f}, 0x0FF0, (__m512){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f, +9.0f, +10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f}), -16.0f, -15.0f, -14.0f, -13.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, -4.0f, -3.0f, -2.0f, -1.0f)); __m512 test_mm512_maskz_mov_ps(__mmask16 __U, __m512 __A) { // CHECK-LABEL: test_mm512_maskz_mov_ps // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_mov_ps(__U, __A); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_mov_ps(0xF3F3, (__m512){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f, +9.0f, +10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f}), +1.0f, +2.0f, 0.0f, 0.0f, +5.0f, +6.0f, +7.0f, +8.0f, +9.0f, +10.0f, 0.0f, 0.0f, +13.0f, +14.0f, +15.0f, +16.0f)); void test_mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_mask_compressstoreu_pd diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index ac7aa3eedbb99..b17cedbbede56 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s #include <immintrin.h> +#include "builtin_test_helpers.h" __mmask8 test_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) { // CHECK-LABEL: test_mm_cmpeq_epu32_mask @@ -9514,48 +9515,56 @@ __m128d test_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_mask_mov_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m128d(_mm_mask_mov_pd((__m128d){-2.0, -1.0}, 0x2, (__m128d){+1.0, +2.0}), -2.0, +2.0)); __m128d test_mm_maskz_mov_pd(__mmask8 __U, __m128d __A) { // CHECK-LABEL: test_mm_maskz_mov_pd // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_maskz_mov_pd(__U, __A); } +TEST_CONSTEXPR(match_m128d(_mm_maskz_mov_pd(0x1, (__m128d){+1.0, +2.0}), +1.0, +0.0)); __m256d test_mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A) { // CHECK-LABEL: test_mm256_mask_mov_pd // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_mov_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_mov_pd((__m256d){-4.0, -3.0, -2.0, -1.0}, 0x3, (__m256d){+1.0, +2.0, +3.0, +4.0}), +1.0, +2.0, -2.0, -1.0)); __m256d test_mm256_maskz_mov_pd(__mmask8 __U, __m256d __A) { // CHECK-LABEL: test_mm256_maskz_mov_pd // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_maskz_mov_pd(__U, __A); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_mov_pd(0xC, (__m256d){+1.0, +2.0, +3.0, +4.0}), 0.0, 0.0, +3.0, +4.0)); __m128 test_mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A) { // CHECK-LABEL: test_mm_mask_mov_ps // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_mov_ps(__W, __U, __A); } +TEST_CONSTEXPR(match_m128(_mm_mask_mov_ps((__m128){-4.0f, -3.0f, -2.0f, -1.0f}, 0x3, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +1.0f, +2.0f, -2.0f, -1.0f)); __m128 test_mm_maskz_mov_ps(__mmask8 __U, __m128 __A) { // CHECK-LABEL: test_mm_maskz_mov_ps // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_mov_ps(__U, __A); } +TEST_CONSTEXPR(match_m128(_mm_maskz_mov_ps(0xC, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), 0.0f, 0.0f, +3.0f, +4.0f)); __m256 test_mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: test_mm256_mask_mov_ps // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_mov_ps(__W, __U, __A); } +TEST_CONSTEXPR(match_m256(_mm256_mask_mov_ps((__m256){-8.0f, -7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f}, 0xC3, (__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}), +1.0f, +2.0f, -6.0f, -5.0f, -4.0f, -3.0f, +7.0f, +8.0f)); __m256 test_mm256_maskz_mov_ps(__mmask8 __U, __m256 __A) { // CHECK-LABEL: test_mm256_maskz_mov_ps // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_mov_ps(__U, __A); } +TEST_CONSTEXPR(match_m256(_mm256_maskz_mov_ps(0xC3, (__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}), +1.0f, +2.0f, 0.0f, 0.0f, 0.0f, 0.0f, +7.0f, +8.0f)); __m128 test_mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_cvtph_ps diff --git a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c index b53410ae43297..e2cfb3a348a93 100644 --- a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c @@ -19,12 +19,15 @@ __m256i test_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) { // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_popcnt_epi16(__A, __U, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_popcnt_epi16(_mm256_set1_epi16(-1), 0xF0F0, (__m256i)(__v16hi){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), -1, -1, -1, -1, 0, 8, 1, 9, -1, -1, -1, -1, 6, 2, 9, 2)); + __m256i test_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_popcnt_epi16 // CHECK: @llvm.ctpop.v16i16 // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_popcnt_epi16(__U, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_popcnt_epi16(0x0F0F, (__m256i)(__v16hi){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 2, 15, 14, 1, 0, 0, 0, 0, 2, 2, 4, 2, 0, 0, 0, 0)); __m128i test_mm_popcnt_epi16(__m128i __A) { // CHECK-LABEL: test_mm_popcnt_epi16 @@ -39,12 +42,15 @@ __m128i test_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_popcnt_epi16(__A, __U, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_popcnt_epi16(_mm_set1_epi16(-1), 0xF0, (__m128i)(__v8hi){+5, -3, -10, +8, 0, -256, +256, -128}), -1, -1, -1, -1, 0, 8, 1, 9)); + __m128i test_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) { // CHECK-LABEL: test_mm_maskz_popcnt_epi16 // CHECK: @llvm.ctpop.v8i16 // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_popcnt_epi16(__U, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_popcnt_epi16(0x0F, (__m128i)(__v8hi){+5, -3, -10, +8, 0, -256, +256, -128}), 2, 15, 14, 1, 0, 0, 0, 0)); __m256i test_mm256_popcnt_epi8(__m256i __A) { // CHECK-LABEL: test_mm256_popcnt_epi8 @@ -59,12 +65,15 @@ __m256i test_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) { // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_mask_popcnt_epi8(__A, __U, __B); } +TEST_CONSTEXPR(match_v32qi(_mm256_mask_popcnt_epi8(_mm256_set1_epi8(-1), 0xF00F, (__m256i)(__v32qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 2, 7, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 6, 2, 4, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)); + __m256i test_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_popcnt_epi8 // CHECK: @llvm.ctpop.v32i8 // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_maskz_popcnt_epi8(__U, __B); } +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_popcnt_epi8(0x0FF0, (__m256i)(__v32qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 0, 0, 0, 0, 0, 4, 1, 4, 2, 2, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test_mm_popcnt_epi8(__m128i __A) { // CHECK-LABEL: test_mm_popcnt_epi8 @@ -79,12 +88,15 @@ __m128i test_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_mask_popcnt_epi8(__A, __U, __B); } +TEST_CONSTEXPR(match_v16qi(_mm_mask_popcnt_epi8(_mm_set1_epi8(-1), 0xF00F, (__m128i)(__v16qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 2, 7, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 6, 2, 4, 3)); + __m128i test_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) { // CHECK-LABEL: test_mm_maskz_popcnt_epi8 // CHECK: @llvm.ctpop.v16i8 // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_maskz_popcnt_epi8(__U, __B); } +TEST_CONSTEXPR(match_v16qi(_mm_maskz_popcnt_epi8(0x0FF0, (__m128i)(__v16qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 0, 0, 0, 0, 0, 4, 1, 4, 2, 2, 4, 2, 0, 0, 0, 0)); __mmask32 test_mm256_mask_bitshuffle_epi64_mask(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_bitshuffle_epi64_mask diff --git a/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c b/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c index 8927ae273d29b..b80ffdf0dcc7c 100644 --- a/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c +++ b/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c @@ -19,6 +19,7 @@ __m512i test_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_popcnt_epi64(__W, __U, __A); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_popcnt_epi64(_mm512_set1_epi64(-1), 0x81, (__m512i)(__v8di){+5, -3, -10, +8, 0, -256, +256, -128}), 2, -1, -1, -1, -1, -1, -1, 57)); __m512i test_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_popcnt_epi64 @@ -26,6 +27,7 @@ __m512i test_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_popcnt_epi64(__U, __A); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_popcnt_epi64(0x42, (__m512i)(__v8di){+5, -3, -10, +8, 0, -256, +256, -128}), 0, 63, 0, 0, 0, 0, 1, 0)); __m512i test_mm512_popcnt_epi32(__m512i __A) { // CHECK-LABEL: test_mm512_popcnt_epi32 @@ -40,6 +42,7 @@ __m512i test_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_popcnt_epi32(__W, __U, __A); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_popcnt_epi32(_mm512_set1_epi32(-1), 0x0F81, (__m512i)(__v16si){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 2, -1, -1, -1, -1, -1, -1, 25, 2, 2, 4, 2, -1, -1, -1, -1)); __m512i test_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_popcnt_epi32 @@ -47,3 +50,4 @@ __m512i test_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_popcnt_epi32(__U, __A); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_popcnt_epi32(0xF042, (__m512i)(__v16si){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 0, 31, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 6, 2, 9, 2)); diff --git a/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c b/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c index d9fbd7628142f..7258034c57adc 100644 --- a/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c @@ -19,6 +19,7 @@ __m128i test_mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> %{{.+}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_popcnt_epi64(__W, __U, __A); } +TEST_CONSTEXPR(match_v2di(_mm_mask_popcnt_epi64(_mm_set1_epi64x(-1), 0x2, (__m128i)(__v2di){+5, -3}), -1, 63)); __m128i test_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_popcnt_epi64 @@ -26,6 +27,7 @@ __m128i test_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> %{{.+}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_popcnt_epi64(__U, __A); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_popcnt_epi64(0x1, (__m128i)(__v2di){+5, -3}), 2, 0)); __m128i test_mm_popcnt_epi32(__m128i __A) { // CHECK-LABEL: test_mm_popcnt_epi32 @@ -40,6 +42,7 @@ __m128i test_mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.+}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_popcnt_epi32(__W, __U, __A); } +TEST_CONSTEXPR(match_v4si(_mm_mask_popcnt_epi32(_mm_set1_epi32(-1), 0x3, (__m128i)(__v4si){+5, -3, -10, +8}), 2, 31, -1, -1)); __m128i test_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_popcnt_epi32 @@ -47,6 +50,7 @@ __m128i test_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.+}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_popcnt_epi32(__U, __A); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_popcnt_epi32(0x5, (__m128i)(__v4si){+5, -3, -10, +8}), 2, 0, 30, 0)); __m256i test_mm256_popcnt_epi64(__m256i __A) { // CHECK-LABEL: test_mm256_popcnt_epi64 @@ -61,6 +65,7 @@ __m256i test_mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.+}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_popcnt_epi64(__W, __U, __A); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_popcnt_epi64(_mm256_set1_epi64x(-1), 0x3, (__m256i)(__v4di){+5, -3, -10, +8}), 2, 63, -1, -1)); __m256i test_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_popcnt_epi64 @@ -68,6 +73,7 @@ __m256i test_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.+}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_popcnt_epi64(__U, __A); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_popcnt_epi64(0x5, (__m256i)(__v4di){+5, -3, -10, +8}), 2, 0, 62, 0)); __m256i test_mm256_popcnt_epi32(__m256i __A) { // CHECK-LABEL: test_mm256_popcnt_epi32 @@ -82,6 +88,7 @@ __m256i test_mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.+}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_popcnt_epi32(__W, __U, __A); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_popcnt_epi32(_mm256_set1_epi32(-1), 0x37, (__m256i)(__v8si){+5, -3, -10, +8, 0, -256, +256, -128}), 2, 31, 30, -1, 0, 24, -1, -1)); __m256i test_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_popcnt_epi32 @@ -89,3 +96,4 @@ __m256i test_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.+}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_popcnt_epi32(__U, __A); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_popcnt_epi32(0x8C, (__m256i)(__v8si){+5, -3, -10, +8, 0, -256, +256, -128}), 0, 0, 30, 1, 0, 0, 0, 25)); >From 2d99c137d3df39a01e40624fd3e5ad70b40777a4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <llvm-...@redking.me.uk> Date: Wed, 13 Aug 2025 09:27:26 +0100 Subject: [PATCH 2/2] clang-format --- clang/lib/Headers/avx512bitalgintrin.h | 36 ++++------- clang/lib/Headers/avx512fintrin.h | 32 ++++----- clang/lib/Headers/avx512vlbitalgintrin.h | 72 +++++++-------------- clang/lib/Headers/avx512vlintrin.h | 64 +++++++----------- clang/lib/Headers/avx512vpopcntdqintrin.h | 6 +- clang/lib/Headers/avx512vpopcntdqvlintrin.h | 6 +- 6 files changed, 76 insertions(+), 140 deletions(-) diff --git a/clang/lib/Headers/avx512bitalgintrin.h b/clang/lib/Headers/avx512bitalgintrin.h index 76c1a158b223f..5cc32077c2c04 100644 --- a/clang/lib/Headers/avx512bitalgintrin.h +++ b/clang/lib/Headers/avx512bitalgintrin.h @@ -27,47 +27,35 @@ #endif static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_popcnt_epi16(__m512i __A) -{ +_mm512_popcnt_epi16(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v32hu)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) -{ - return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U, - (__v32hi) _mm512_popcnt_epi16(__B), - (__v32hi) __A); +_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) { + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hi)_mm512_popcnt_epi16(__B), (__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) -{ - return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(), - __U, - __B); +_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) { + return _mm512_mask_popcnt_epi16((__m512i)_mm512_setzero_si512(), __U, __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_popcnt_epi8(__m512i __A) -{ +_mm512_popcnt_epi8(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v64qu)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) -{ - return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U, - (__v64qi) _mm512_popcnt_epi8(__B), - (__v64qi) __A); +_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_popcnt_epi8(__B), (__v64qi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) -{ - return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(), - __U, - __B); +_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) { + return _mm512_mask_popcnt_epi8((__m512i)_mm512_setzero_si512(), __U, __B); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 73a915efd516f..1d56d2fdafe85 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8948,35 +8948,27 @@ _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) } static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) -{ - return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, - (__v8df) __A, - (__v8df) __W); +_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A, + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) -{ - return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, - (__v8df) __A, - (__v8df) _mm512_setzero_pd ()); +_mm512_maskz_mov_pd(__mmask8 __U, __m512d __A) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A, + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) -{ - return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, - (__v16sf) __A, - (__v16sf) __W); +_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A, + (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) -{ - return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, - (__v16sf) __A, - (__v16sf) _mm512_setzero_ps ()); +_mm512_maskz_mov_ps(__mmask16 __U, __m512 __A) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A, + (__v16sf)_mm512_setzero_ps()); } static __inline__ void __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h b/clang/lib/Headers/avx512vlbitalgintrin.h index e29a149df5f90..21bf858a20c59 100644 --- a/clang/lib/Headers/avx512vlbitalgintrin.h +++ b/clang/lib/Headers/avx512vlbitalgintrin.h @@ -33,91 +33,67 @@ #endif static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_popcnt_epi16(__m256i __A) -{ +_mm256_popcnt_epi16(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v16hu)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) -{ - return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U, - (__v16hi) _mm256_popcnt_epi16(__B), - (__v16hi) __A); +_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) { + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hi)_mm256_popcnt_epi16(__B), (__v16hi)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) -{ - return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(), - __U, - __B); +_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) { + return _mm256_mask_popcnt_epi16((__m256i)_mm256_setzero_si256(), __U, __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_popcnt_epi16(__m128i __A) -{ +_mm_popcnt_epi16(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v8hu)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) -{ - return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, - (__v8hi) _mm_popcnt_epi16(__B), - (__v8hi) __A); +_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { + return (__m128i)__builtin_ia32_selectw_128( + (__mmask8)__U, (__v8hi)_mm_popcnt_epi16(__B), (__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) -{ - return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), - __U, - __B); +_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) { + return _mm_mask_popcnt_epi16((__m128i)_mm_setzero_si128(), __U, __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_popcnt_epi8(__m256i __A) -{ +_mm256_popcnt_epi8(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v32qu)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) -{ - return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U, - (__v32qi) _mm256_popcnt_epi8(__B), - (__v32qi) __A); +_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask32)__U, (__v32qi)_mm256_popcnt_epi8(__B), (__v32qi)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) -{ - return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(), - __U, - __B); +_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) { + return _mm256_mask_popcnt_epi8((__m256i)_mm256_setzero_si256(), __U, __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_popcnt_epi8(__m128i __A) -{ +_mm_popcnt_epi8(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v16qu)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) -{ - return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, - (__v16qi) _mm_popcnt_epi8(__B), - (__v16qi) __A); +_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_popcnt_epi8(__B), (__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) -{ - return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), - __U, - __B); +_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) { + return _mm_mask_popcnt_epi8((__m128i)_mm_setzero_si128(), __U, __B); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index fd1bd291ecbcb..e8d5083c6850e 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -8267,67 +8267,51 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) (__v4si)_mm_setzero_si128())) static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) -{ - return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, - (__v2df) __A, - (__v2df) __W); +_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)__A, + (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_maskz_mov_pd (__mmask8 __U, __m128d __A) -{ - return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, - (__v2df) __A, - (__v2df) _mm_setzero_pd ()); +_mm_maskz_mov_pd(__mmask8 __U, __m128d __A) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)__A, + (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) -{ - return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, - (__v4df) __A, - (__v4df) __W); +_mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)__A, + (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) -{ - return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, - (__v4df) __A, - (__v4df) _mm256_setzero_pd ()); +_mm256_maskz_mov_pd(__mmask8 __U, __m256d __A) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)__A, + (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) -{ - return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, - (__v4sf) __A, - (__v4sf) __W); +_mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)__A, + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_maskz_mov_ps (__mmask8 __U, __m128 __A) -{ - return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, - (__v4sf) __A, - (__v4sf) _mm_setzero_ps ()); +_mm_maskz_mov_ps(__mmask8 __U, __m128 __A) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)__A, + (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) -{ - return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, - (__v8sf) __A, - (__v8sf) __W); +_mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)__A, + (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) -{ - return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, - (__v8sf) __A, - (__v8sf) _mm256_setzero_ps ()); +_mm256_maskz_mov_ps(__mmask8 __U, __m256 __A) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)__A, + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 diff --git a/clang/lib/Headers/avx512vpopcntdqintrin.h b/clang/lib/Headers/avx512vpopcntdqintrin.h index 7bf0700da77c9..ac71808f69803 100644 --- a/clang/lib/Headers/avx512vpopcntdqintrin.h +++ b/clang/lib/Headers/avx512vpopcntdqintrin.h @@ -28,8 +28,7 @@ __min_vector_width__(512))) #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_popcnt_epi64(__m512i __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v8du)__A); } @@ -44,8 +43,7 @@ _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_popcnt_epi32(__m512i __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v16su)__A); } diff --git a/clang/lib/Headers/avx512vpopcntdqvlintrin.h b/clang/lib/Headers/avx512vpopcntdqvlintrin.h index deb849f950357..bed951b764cf7 100644 --- a/clang/lib/Headers/avx512vpopcntdqvlintrin.h +++ b/clang/lib/Headers/avx512vpopcntdqvlintrin.h @@ -37,8 +37,7 @@ __min_vector_width__(256))) #endif -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_popcnt_epi64(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi64(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v2du)__A); } @@ -53,8 +52,7 @@ _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_popcnt_epi32(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi32(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v4su)__A); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits