Author: moorabbit Date: 2025-09-04T12:50:06Z New Revision: 8bdd9090d0c75226b47232380d96051ff173b067
URL: https://github.com/llvm/llvm-project/commit/8bdd9090d0c75226b47232380d96051ff173b067 DIFF: https://github.com/llvm/llvm-project/commit/8bdd9090d0c75226b47232380d96051ff173b067.diff LOG: [Headers][X86] Add constexpr support for some AVX[512] intrinsics. (#156567) The following AVX[512] intrinsics are now constexpr: - `_mm_mask_cvtepi32_pd` - `_mm_maskz_cvtepi32_pd` - `_mm_mask_cvtepi32_ps` - `_mm_maskz_cvtepi32_ps` - `_mm_cvtepu32_pd` - `_mm_mask_cvtepu32_pd` - `_mm_maskz_cvtepu32_pd` - `_mm_cvtepu32_ps` - `_mm_mask_cvtepu32_ps` - `_mm_maskz_cvtepu32_ps` - `_mm256_mask_cvtepi32_pd` - `_mm256_maskz_cvtepi32_pd` - `_mm256_mask_cvtepi32_ps` - `_mm256_maskz_cvtepi32_ps` - `_mm256_cvtepu32_pd` - `_mm256_mask_cvtepu32_pd` - `_mm256_maskz_cvtepu32_pd` - `_mm256_cvtepu32_ps` - `_mm256_mask_cvtepu32_ps` - `_mm256_maskz_cvtepu32_ps` - `_mm512_cvtepi64_pd` - `_mm512_mask_cvtepi64_pd` - `_mm512_maskz_cvtepi64_pd` - `_mm512_cvtepu64_pd` - `_mm512_mask_cvtepu64_pd` - `_mm512_maskz_cvtepu64_pd` This PR is part 2 [[part 1](https://github.com/llvm/llvm-project/pull/156187)] of a series of PRs fixing #155798 Added: Modified: clang/lib/Headers/avx512dqintrin.h clang/lib/Headers/avx512vlintrin.h clang/test/CodeGen/X86/avx512dq-builtins.c clang/test/CodeGen/X86/avx512vl-builtins.c Removed: ################################################################################ diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index b0fe903d82431..ee211d3eae9d7 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -483,21 +483,20 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U), (int)(R))) - -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_cvtepi64_pd (__m512i __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi64_pd(__m512i __A) { return (__m512d)__builtin_convertvector((__v8di)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtepi64_pd(__A), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtepi64_pd(__A), (__v8df)_mm512_setzero_pd()); @@ -714,20 +713,20 @@ _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_cvtepu64_pd (__m512i __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu64_pd(__m512i __A) { return (__m512d)__builtin_convertvector((__v8du)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtepu64_pd(__A), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtepu64_pd(__A), (__v8df)_mm512_setzero_pd()); diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index aa186c136bb53..a85c6fdd22e5d 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -1732,57 +1732,57 @@ _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepi32_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepi32_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepi32_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepi32_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepi32_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepi32_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepi32_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepi32_ps(__A), (__v8sf)_mm256_setzero_ps()); @@ -2143,78 +2143,78 @@ _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_cvtepu32_pd (__m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepu32_pd(__m128i __A) { return (__m128d) __builtin_convertvector( __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepu32_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepu32_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_cvtepu32_pd (__m128i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu32_pd(__m128i __A) { return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepu32_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepu32_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_cvtepu32_ps (__m128i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepu32_ps(__m128i __A) { return (__m128)__builtin_convertvector((__v4su)__A, __v4sf); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepu32_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepu32_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_cvtepu32_ps (__m256i __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu32_ps(__m256i __A) { return (__m256)__builtin_convertvector((__v8su)__A, __v8sf); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepu32_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepu32_ps(__A), (__v8sf)_mm256_setzero_ps()); diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index 88f160a6deda5..df096e3607f30 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -602,6 +602,8 @@ __m512d test_mm512_cvtepi64_pd(__m512i __A) { return _mm512_cvtepi64_pd(__A); } +TEST_CONSTEXPR(match_m512d(_mm512_cvtepi64_pd((__m512i)(__v8di){-1, -1, 2, 2, -4, -4, 8, 8}), -1.0, -1.0, 2.0, 2.0, -4.0, -4.0, 8.0, 8.0)); + __m512d test_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_cvtepi64_pd // CHECK: sitofp <8 x i64> %{{.*}} to <8 x double> @@ -609,6 +611,8 @@ __m512d test_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) { return _mm512_mask_cvtepi64_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_cvtepi64_pd(_mm512_set1_pd(-777.0), /*1010 0101=*/0xa5, (__m512i)(__v8di){-1, -1, 2, 2, -4, -4, 8, 8}), -1.0, -777.0, 2.0, -777.0, -777.0, -4.0, -777.0, 8.0)); + __m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_cvtepi64_pd // CHECK: sitofp <8 x i64> %{{.*}} to <8 x double> @@ -616,6 +620,8 @@ __m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) { return _mm512_maskz_cvtepi64_pd(__U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_cvtepi64_pd(/*1010 0101=*/0xa5, (__m512i)(__v8di){-1, -1, 2, 2, -4, -4, 8, 8}), -1.0, 0.0, 2.0, 0.0, 0.0, -4.0, 0.0, 8.0)); + __m512d test_mm512_cvt_roundepi64_pd(__m512i __A) { // CHECK-LABEL: test_mm512_cvt_roundepi64_pd // CHECK: @llvm.x86.avx512.sitofp.round.v8f64.v8i64 @@ -826,6 +832,8 @@ __m512d test_mm512_cvtepu64_pd(__m512i __A) { return _mm512_cvtepu64_pd(__A); } +TEST_CONSTEXPR(match_m512d(_mm512_cvtepu64_pd((__m512i)(__v8du){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 8.0, 8.0)); + __m512d test_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_cvtepu64_pd // CHECK: uitofp <8 x i64> %{{.*}} to <8 x double> @@ -833,6 +841,8 @@ __m512d test_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) { return _mm512_mask_cvtepu64_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_cvtepu64_pd(_mm512_set1_pd(-777.0), /*1010 0101=*/0xa5, (__m512i)(__v8du){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, -777.0, 2.0, -777.0, -777.0, 4.0, -777.0, 8.0)); + __m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_cvtepu64_pd // CHECK: uitofp <8 x i64> %{{.*}} to <8 x double> @@ -840,6 +850,8 @@ __m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) { return _mm512_maskz_cvtepu64_pd(__U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_cvtepu64_pd(/*1010 0101=*/0xa5, (__m512i)(__v8du){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 8.0)); + __m512d test_mm512_cvt_roundepu64_pd(__m512i __A) { // CHECK-LABEL: test_mm512_cvt_roundepu64_pd // CHECK: @llvm.x86.avx512.uitofp.round.v8f64.v8i64 diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 533896938a205..ab5d80c973242 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -3784,6 +3784,9 @@ __m128d test_mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} return _mm_mask_cvtepi32_pd(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128d(_mm_mask_cvtepi32_pd((__m128d){-777.0, -777.0}, /*10=*/0x2, (__m128i)(__v4si){-1, 1, -2, 2}), -777.0, 1.0)); + __m128d test_mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_cvtepi32_pd // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1> @@ -3791,42 +3794,63 @@ __m128d test_mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} return _mm_maskz_cvtepi32_pd(__U,__A); } + +TEST_CONSTEXPR(match_m128d(_mm_maskz_cvtepi32_pd(/*10=*/0x2, (__m128i)(__v4si){-1, 1, -2, 2}), 0.0, 1.0)); + __m256d test_mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm256_mask_cvtepi32_pd // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double> // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} return _mm256_mask_cvtepi32_pd(__W,__U,__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_mask_cvtepi32_pd(_mm256_set1_pd(-777.0), /*0101=*/0x5, (__m128i)(__v4si){-1, 1, -2, 2}), -1.0, -777.0, -2.0, -777.0)); + __m256d test_mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepi32_pd // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double> // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} return _mm256_maskz_cvtepi32_pd(__U,__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_maskz_cvtepi32_pd(/*0101=*/0x5, (__m128i)(__v4si){-1, 1, -2, 2}), -1.0, 0.0, -2.0, 0.0)); + __m128 test_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_cvtepi32_ps // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float> // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} return _mm_mask_cvtepi32_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_mask_cvtepi32_ps((__m128){-777.0f, -777.0f, -777.0f, -777.0f}, /*0101=*/0x5, (__m128i)(__v4si){-1, 1, -2, 2}), -1.0f, -777.0f, -2.0f, -777.0f)); + __m128 test_mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_cvtepi32_ps // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float> // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} return _mm_maskz_cvtepi32_ps(__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_maskz_cvtepi32_ps(/*0101=*/0x5, (__m128i)(__v4si){-1, 1, -2, 2}), -1.0f, 0.0f, -2.0f, 0.0f)); + __m256 test_mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_cvtepi32_ps // CHECK: sitofp <8 x i32> %{{.*}} to <8 x float> // CHECK: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}} return _mm256_mask_cvtepi32_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m256(_mm256_mask_cvtepi32_ps(_mm256_set1_ps(-777.0f), /*1001 0101=*/0x95, (__m256i)(__v8si){-1, 1, -2, 2, -4, 4, -8, 8}), -1.0f, -777.0f, -2.0f, -777.0f, -4.0f, -777.0f, -777.0f, 8.0f)); + __m256 test_mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepi32_ps // CHECK: sitofp <8 x i32> %{{.*}} to <8 x float> // CHECK: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}} return _mm256_maskz_cvtepi32_ps(__U,__A); } + +TEST_CONSTEXPR(match_m256(_mm256_maskz_cvtepi32_ps(/*1001 0101=*/0x95, (__m256i)(__v8si){-1, 1, -2, 2, -4, 4, -8, 8}), -1.0f, 0.0f, -2.0f, 0.0f, -4.0f, 0.0f, 0.0f, 8.0f)); + __m128i test_mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A) { // CHECK-LABEL: test_mm_mask_cvtpd_epi32 // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.128 @@ -4091,6 +4115,9 @@ __m128d test_mm_cvtepu32_pd(__m128i __A) { // CHECK: uitofp <2 x i32> %{{.*}} to <2 x double> return _mm_cvtepu32_pd(__A); } + +TEST_CONSTEXPR(match_m128d(_mm_cvtepu32_pd((__m128i)(__v4su){1, 2, 4, 8}), 1.0, 2.0)); + __m128d test_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_cvtepu32_pd // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1> @@ -4098,6 +4125,9 @@ __m128d test_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} return _mm_mask_cvtepu32_pd(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128d(_mm_mask_cvtepu32_pd((__m128d){-777.0, -777.0}, /*10=*/0x2, (__m128i)(__v4su){1, 2, 4, 8}), -777.0, 2.0)); + __m128d test_mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_cvtepu32_pd // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1> @@ -4105,57 +4135,87 @@ __m128d test_mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} return _mm_maskz_cvtepu32_pd(__U,__A); } + +TEST_CONSTEXPR(match_m128d(_mm_maskz_cvtepu32_pd(/*10=*/0x2, (__m128i)(__v4su){1, 2, 4, 8}), 0.0, 2.0)); + __m256d test_mm256_cvtepu32_pd(__m128i __A) { // CHECK-LABEL: test_mm256_cvtepu32_pd // CHECK: uitofp <4 x i32> %{{.*}} to <4 x double> return _mm256_cvtepu32_pd(__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_cvtepu32_pd((__m128i)(__v4su){1, 1, 2, 2}), 1.0, 1.0, 2.0, 2.0)); + __m256d test_mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm256_mask_cvtepu32_pd // CHECK: uitofp <4 x i32> %{{.*}} to <4 x double> // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} return _mm256_mask_cvtepu32_pd(__W,__U,__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_mask_cvtepu32_pd(_mm256_set1_pd(-777.0), /*1010=*/0xa, (__m128i)(__v4su){1, 1, 2, 2}), -777.0, 1.0, -777.0, 2.0)); + __m256d test_mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepu32_pd // CHECK: uitofp <4 x i32> %{{.*}} to <4 x double> // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} return _mm256_maskz_cvtepu32_pd(__U,__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_maskz_cvtepu32_pd(/*1010=*/0xa, (__m128i)(__v4su){1, 1, 2, 2}), 0.0, 1.0, 0.0, 2.0)); + __m128 test_mm_cvtepu32_ps(__m128i __A) { // CHECK-LABEL: test_mm_cvtepu32_ps // CHECK: uitofp <4 x i32> %{{.*}} to <4 x float> return _mm_cvtepu32_ps(__A); } + +TEST_CONSTEXPR(match_m128(_mm_cvtepu32_ps((__m128i)(__v4su){1, 2, 4, 8}), 1.0f, 2.0f, 4.0f, 8.0f)); + __m128 test_mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_cvtepu32_ps // CHECK: uitofp <4 x i32> %{{.*}} to <4 x float> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_cvtepu32_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_mask_cvtepu32_ps((__m128){-777.0f, -777.0f, -777.0f, -777.0f}, /*0101=*/0x5, (__m128i)(__v4su){1, 2, 4, 8}), 1.0f, -777.0f, 4.0f, -777.0f)); + __m128 test_mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_cvtepu32_ps // CHECK: uitofp <4 x i32> %{{.*}} to <4 x float> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_cvtepu32_ps(__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_maskz_cvtepu32_ps(/*0101=*/0x5, (__m128i)(__v4su){1, 2, 4, 8}), 1.0f, 0.0f, 4.0f, 0.0f)); + __m256 test_mm256_cvtepu32_ps(__m256i __A) { // CHECK-LABEL: test_mm256_cvtepu32_ps // CHECK: uitofp <8 x i32> %{{.*}} to <8 x float> return _mm256_cvtepu32_ps(__A); } + +TEST_CONSTEXPR(match_m256(_mm256_cvtepu32_ps((__m256i)(__v8su){1, 1, 2, 2, 4, 4, 8, 8}), 1.0f, 1.0f, 2.0f, 2.0f, 4.0f, 4.0f, 8.0f, 8.0f)); + __m256 test_mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_cvtepu32_ps // CHECK: uitofp <8 x i32> %{{.*}} to <8 x float> // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_cvtepu32_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m256(_mm256_mask_cvtepu32_ps(_mm256_set1_ps(-777.0f), /*1010 0101=*/0xa5,(__m256i)(__v8su){1, 1, 2, 2, 4, 4, 8, 8}), 1.0f, -777.0f, 2.0f, -777.0f, -777.0f, 4.0f, -777.0f, 8.0f)); + __m256 test_mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepu32_ps // CHECK: uitofp <8 x i32> %{{.*}} to <8 x float> // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_cvtepu32_ps(__U,__A); } + +TEST_CONSTEXPR(match_m256(_mm256_maskz_cvtepu32_ps(/*1010 0101=*/0xa5,(__m256i)(__v8su){1, 1, 2, 2, 4, 4, 8, 8}), 1.0f, 0.0f, 2.0f, 0.0f, 0.0f, 4.0f, 0.0f, 8.0f)); + __m128d test_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: test_mm_mask_div_pd // CHECK: fdiv <2 x double> %{{.*}}, %{{.*}} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
