Author: stomfaig Date: 2025-11-22T14:37:50Z New Revision: 2e424deeb6180d112323f4df955c8034eb56780c
URL: https://github.com/llvm/llvm-project/commit/2e424deeb6180d112323f4df955c8034eb56780c DIFF: https://github.com/llvm/llvm-project/commit/2e424deeb6180d112323f4df955c8034eb56780c.diff LOG: [Clang][X86] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow VPERMILPD/S variable mask intrinsics to be used in constexpr (#168861) Allowing VPERMILPD/S intrinsics to be used in constexpr Closes #167878 Added: Modified: clang/include/clang/Basic/BuiltinsX86.td clang/lib/AST/ByteCode/InterpBuiltin.cpp clang/lib/AST/ExprConstant.cpp clang/lib/Headers/avx512fintrin.h clang/lib/Headers/avx512vlintrin.h clang/lib/Headers/avxintrin.h clang/test/CodeGen/X86/avx-builtins.c clang/test/CodeGen/X86/avx512f-builtins.c clang/test/CodeGen/X86/avx512vl-builtins.c Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 132c1e8ea7190..3c9fbd912ceaf 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -461,11 +461,14 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; } -let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">; def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">; def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">; def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; +} + +let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">; def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; @@ -2338,15 +2341,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> def vcvttss2si32 : X86Builtin<"int(_Vector<4, float>, _Constant int)">; def vcvttss2usi32 : X86Builtin<"unsigned int(_Vector<4, float>, _Constant int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { - def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">; - def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">; -} - -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">; - def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">; -} let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def rndscalesd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int, _Constant int)">; @@ -2439,6 +2433,14 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512> let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def shufpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">; def shufps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">; + def vpermilpd512 + : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">; + def vpermilps512 + : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">; + def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, " + "_Vector<8, long long int>)">; + def vpermilvarps512 + : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">; } let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index a403f293a5c34..27eb6c5c698f2 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4653,6 +4653,30 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::make_pair(0, static_cast<int>(LaneBase + Sel)); }); + case X86::BI__builtin_ia32_vpermilvarpd: + case X86::BI__builtin_ia32_vpermilvarpd256: + case X86::BI__builtin_ia32_vpermilvarpd512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned NumElemPerLane = 2; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = ShuffleMask & 0b10 ? 1 : 0; + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); + }); + + case X86::BI__builtin_ia32_vpermilvarps: + case X86::BI__builtin_ia32_vpermilvarps256: + case X86::BI__builtin_ia32_vpermilvarps512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned NumElemPerLane = 4; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = ShuffleMask & 0b11; + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); + }); + case X86::BI__builtin_ia32_vpermilpd: case X86::BI__builtin_ia32_vpermilpd256: case X86::BI__builtin_ia32_vpermilpd512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index bc885f4c89028..ad1f49ce9b04e 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13043,6 +13043,23 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_vpermilvarpd: + case X86::BI__builtin_ia32_vpermilvarpd256: + case X86::BI__builtin_ia32_vpermilvarpd512: { + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { + unsigned NumElemPerLane = 2; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = Mask & 0b10 ? 1 : 0; + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_vpermilpd: case X86::BI__builtin_ia32_vpermilpd256: case X86::BI__builtin_ia32_vpermilpd512: { @@ -13062,6 +13079,23 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_vpermilvarps: + case X86::BI__builtin_ia32_vpermilvarps256: + case X86::BI__builtin_ia32_vpermilvarps512: { + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { + unsigned NumElemPerLane = 4; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = Mask & 0b11; + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_phminposuw128: { APValue Source; if (!Evaluate(Source, Info, E->getArg(0))) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index e4184795e47e9..e1de56069870b 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -5879,45 +5879,39 @@ _mm_cvttss_u64 (__m128 __A) (__v16sf)_mm512_permute_ps((X), (C)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_permutevar_pd(__m512d __A, __m512i __C) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutevar_pd(__m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutevar_pd(__A, __C), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutevar_pd(__A, __C), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_permutevar_ps(__m512 __A, __m512i __C) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutevar_ps(__m512 __A, __m512i __C) { return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutevar_ps(__A, __C), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutevar_ps(__A, __C), (__v16sf)_mm512_setzero_ps()); diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 5a1b540e07e3a..99c057030a4cc 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -5847,65 +5847,57 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) (__v8sf)_mm256_permute_ps((X), (C)), \ (__v8sf)_mm256_setzero_ps())) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_permutevar_pd(__A, __C), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_permutevar_pd(__A, __C), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_permutevar_pd(__A, __C), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_permutevar_pd(__A, __C), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_permutevar_ps(__A, __C), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_permutevar_ps(__A, __C), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_permutevar_ps(__A, __C), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_permutevar_ps(__A, __C), (__v8sf)_mm256_setzero_ps()); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 3e1618ed192c8..44ef88db5cbce 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -787,9 +787,8 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a, /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. -static __inline __m128d __DEFAULT_FN_ATTRS128 -_mm_permutevar_pd(__m128d __a, __m128i __c) -{ +static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_permutevar_pd(__m128d __a, __m128i __c) { return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); } @@ -826,9 +825,8 @@ _mm_permutevar_pd(__m128d __a, __m128i __c) /// 1: Bits [255:192] of the source are copied to bits [255:192] of the /// returned vector. /// \returns A 256-bit vector of [4 x double] containing the copied values. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_permutevar_pd(__m256d __a, __m256i __c) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_permutevar_pd(__m256d __a, __m256i __c) { return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c); } @@ -881,9 +879,8 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c) /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. -static __inline __m128 __DEFAULT_FN_ATTRS128 -_mm_permutevar_ps(__m128 __a, __m128i __c) -{ +static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_permutevar_ps(__m128 __a, __m128i __c) { return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); } @@ -972,9 +969,8 @@ _mm_permutevar_ps(__m128 __a, __m128i __c) /// 11: Bits [255:224] of the source are copied to bits [255:224] of the /// returned vector. /// \returns A 256-bit vector of [8 x float] containing the copied values. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_permutevar_ps(__m256 __a, __m256i __c) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_permutevar_ps(__m256 __a, __m256i __c) { return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c); } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index f8931e7e55410..00bcf9cc1da58 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1454,24 +1454,52 @@ __m128d test_mm_permutevar_pd(__m128d A, __m128i B) { // CHECK: call {{.*}}<2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %{{.*}}, <2 x i64> %{{.*}}) return _mm_permutevar_pd(A, B); } +TEST_CONSTEXPR(match_m128d( + _mm_permutevar_pd( + ((__m128d){0.0, 1.0}), + ((__m128i){0b10, 0b00}) + ), + 1.0, 0.0 +)); __m256d test_mm256_permutevar_pd(__m256d A, __m256i B) { // CHECK-LABEL: test_mm256_permutevar_pd // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_permutevar_pd(A, B); } +TEST_CONSTEXPR(match_m256d( + _mm256_permutevar_pd( + ((__m256d){0.0, 1.0, 2.0, 3.0}), + ((__m256i){0b10, 0b00, 0b00, 0b10}) + ), + 1.0, 0.0, 2.0, 3.0 +)); __m128 test_mm_permutevar_ps(__m128 A, __m128i B) { // CHECK-LABEL: test_mm_permutevar_ps // CHECK: call {{.*}}<4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %{{.*}}, <4 x i32> %{{.*}}) return _mm_permutevar_ps(A, B); } +TEST_CONSTEXPR(match_m128( + _mm_permutevar_ps( + ((__m128){0.0, 1.0, 2.0, 3.0}), + ((__m128i)(__v4si){0b11, 0b10, 0b01, 0b00}) + ), + 3.0, 2.0, 1.0, 0.0 +)); __m256 test_mm256_permutevar_ps(__m256 A, __m256i B) { // CHECK-LABEL: test_mm256_permutevar_ps // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_permutevar_ps(A, B); } +TEST_CONSTEXPR(match_m256( + _mm256_permutevar_ps( + ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + ((__m256i)(__v8si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10}) + ), + 3.0, 2.0, 1.0, 0.0, 5.0, 4.0, 7.0, 6.0 +)); __m256 test_mm256_rcp_ps(__m256 A) { // CHECK-LABEL: test_mm256_rcp_ps diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 1f4bb51a2e34f..0d53dd6ef4c64 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -5622,6 +5622,13 @@ __m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) { // CHECK: @llvm.x86.avx512.vpermilvar.pd.512 return _mm512_permutevar_pd(__A, __C); } +TEST_CONSTEXPR(match_m512d( + _mm512_permutevar_pd( + ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + ((__m512i){0b00, 0b00, 0b10, 0b00, 0b00, 0b10, 0b10, 0b10}) + ), + 0.0, 0.0, 3.0, 2.0, 4.0, 5.0, 7.0, 7.0 +)); __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) { // CHECK-LABEL: test_mm512_mask_permutevar_pd @@ -5629,6 +5636,15 @@ __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_permutevar_pd(__W, __U, __A, __C); } +TEST_CONSTEXPR(match_m512d( + _mm512_mask_permutevar_pd( + ((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + (__mmask8)0b01010101, + ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + ((__m512i){0b00, 0b00, 0b10, 0b00, 0b00, 0b10, 0b10, 0b10}) + ), + 0.0, 9.0, 3.0, 11.0, 4.0, 13.0, 7.0, 15.0 +)); __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { // CHECK-LABEL: test_mm512_maskz_permutevar_pd @@ -5636,12 +5652,27 @@ __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_permutevar_pd(__U, __A, __C); } +TEST_CONSTEXPR(match_m512d( + _mm512_maskz_permutevar_pd( + (__mmask8)0b01010101, + ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + ((__m512i){0b00, 0b00, 0b10, 0b00, 0b00, 0b10, 0b10, 0b10}) + ), + 0.0, 0.0, 3.0, 0.0, 4.0, 0.0, 7.0, 0.0 +)); __m512 test_mm512_permutevar_ps(__m512 __A, __m512i __C) { // CHECK-LABEL: test_mm512_permutevar_ps // CHECK: @llvm.x86.avx512.vpermilvar.ps.512 return _mm512_permutevar_ps(__A, __C); } +TEST_CONSTEXPR(match_m512( + _mm512_permutevar_ps( + ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + ((__m512i)(__v16si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10, 0b10, 0b11, 0b00, 0b01, 0b00, 0b11, 0b01, 0b10}) + ), + 3.0, 2.0, 1.0, 0.0, 5.0, 4.0, 7.0, 6.0, 10.0, 11.0, 8.0, 9.0, 12.0, 15.0, 13.0, 14.0 +)); __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) { // CHECK-LABEL: test_mm512_mask_permutevar_ps @@ -5649,6 +5680,15 @@ __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_permutevar_ps(__W, __U, __A, __C); } +TEST_CONSTEXPR(match_m512( + _mm512_mask_permutevar_ps( + ((__m512){16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0}), + (__mmask16)0b0101010101010101, + ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + ((__m512i)(__v16si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10, 0b10, 0b11, 0b00, 0b01, 0b00, 0b11, 0b01, 0b10}) + ), + 3.0, 17.0, 1.0, 19.0, 5.0, 21.0, 7.0, 23.0, 10.0, 25.0, 8.0, 27.0, 12.0, 29.0, 13.0, 31.0 +)); __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { // CHECK-LABEL: test_mm512_maskz_permutevar_ps @@ -5656,6 +5696,14 @@ __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_permutevar_ps(__U, __A, __C); } +TEST_CONSTEXPR(match_m512( + _mm512_maskz_permutevar_ps( + (__mmask16)0b0101010101010101, + ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + ((__m512i)(__v16si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10, 0b10, 0b11, 0b00, 0b01, 0b00, 0b11, 0b01, 0b10}) + ), + 3.0, 0.0, 1.0, 0.0, 5.0, 0.0, 7.0, 0.0, 10.0, 0.0, 8.0, 0.0, 12.0, 0.0, 13.0, 0.0 +)); __m512i test_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) { // CHECK-LABEL: test_mm512_permutex2var_epi32 diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 4b2ba3341af29..e6eb91e6e8ce4 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -8131,6 +8131,15 @@ __m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m12 // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_mask_permutevar_pd(__W, __U, __A, __C); } +TEST_CONSTEXPR(match_m128d( + _mm_mask_permutevar_pd( + ((__m128d){3.0, 4.0}), + (__mmask8)0b01, + ((__m128d){0.0, 1.0}), + ((__m128i){0b10, 0b00}) + ), + 1.0, 4.0 +)); __m128d test_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) { // CHECK-LABEL: test_mm_maskz_permutevar_pd @@ -8138,6 +8147,14 @@ __m128d test_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) { // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_maskz_permutevar_pd(__U, __A, __C); } +TEST_CONSTEXPR(match_m128d( + _mm_maskz_permutevar_pd( + (__mmask8)0b01, + ((__m128d){0.0, 1.0}), + ((__m128i){0b10, 0b00}) + ), + 1.0, 0.0 +)); __m256d test_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) { // CHECK-LABEL: test_mm256_mask_permutevar_pd @@ -8145,6 +8162,15 @@ __m256d test_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_permutevar_pd(__W, __U, __A, __C); } +TEST_CONSTEXPR(match_m256d( + _mm256_mask_permutevar_pd( + ((__m256d){4.0, 5.0, 6.0, 7.0}), + (__mmask8)0b0101, + ((__m256d){0.0, 1.0, 2.0, 3.0}), + ((__m256i){0b10, 0b00, 0b00, 0b10}) + ), + 1.0, 5.0, 2.0, 7.0 +)); __m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) { // CHECK-LABEL: test_mm256_maskz_permutevar_pd @@ -8152,6 +8178,14 @@ __m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) { // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_maskz_permutevar_pd(__U, __A, __C); } +TEST_CONSTEXPR(match_m256d( + _mm256_maskz_permutevar_pd( + (__mmask8)0b0101, + ((__m256d){0.0, 1.0, 2.0, 3.0}), + ((__m256i){0b10, 0b00, 0b00, 0b10}) + ), + 1.0, 0.0, 2.0, 0.0 +)); __m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) { // CHECK-LABEL: test_mm_mask_permutevar_ps @@ -8159,6 +8193,15 @@ __m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_permutevar_ps(__W, __U, __A, __C); } +TEST_CONSTEXPR(match_m128( + _mm_mask_permutevar_ps( + ((__m128){4.0, 5.0, 6.0, 7.0}), + (__mmask8)0b0101, + ((__m128){0.0, 1.0, 2.0, 3.0}), + ((__m128i)(__v4si){0b11, 0b10, 0b01, 0b00}) + ), + 3.0, 5.0, 1.0, 7.0 +)); __m128 test_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) { // CHECK-LABEL: test_mm_maskz_permutevar_ps @@ -8166,6 +8209,14 @@ __m128 test_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) { // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_permutevar_ps(__U, __A, __C); } +TEST_CONSTEXPR(match_m128( + _mm_maskz_permutevar_ps( + (__mmask8)0b0101, + ((__m128){0.0, 1.0, 2.0, 3.0}), + ((__m128i)(__v4si){0b11, 0b10, 0b01, 0b00}) + ), + 3.0, 0.0, 1.0, 0.0 +)); __m256 test_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) { // CHECK-LABEL: test_mm256_mask_permutevar_ps @@ -8173,6 +8224,15 @@ __m256 test_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m25 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_permutevar_ps(__W, __U, __A, __C); } +TEST_CONSTEXPR(match_m256( + _mm256_mask_permutevar_ps( + ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + (__mmask8)0b01010101, + ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + ((__m256i)(__v8si){0b11, 0b10, 0b01, 0b00, 0b00, 0b11, 0b01, 0b10}) + ), + 3.0, 9.0, 1.0, 11.0, 4.0, 13.0, 5.0, 15.0 +)); __m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) { // CHECK-LABEL: test_mm256_maskz_permutevar_ps @@ -8180,6 +8240,14 @@ __m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) { // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_permutevar_ps(__U, __A, __C); } +TEST_CONSTEXPR(match_m256( + _mm256_maskz_permutevar_ps( + (__mmask8)0b01010101, + ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + ((__m256i)(__v8si){0b11, 0b10, 0b01, 0b00, 0b00, 0b11, 0b01, 0b10}) + ), + 3.0, 0.0, 1.0, 0.0, 4.0, 0.0, 5.0, 0.0 +)); __mmask8 test_mm_test_epi32_mask(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_test_epi32_mask _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
