Author: moorabbit Date: 2025-09-07T10:29:58Z New Revision: 49f28f6efb90cfcc45adf4e9ececc4b45254623a
URL: https://github.com/llvm/llvm-project/commit/49f28f6efb90cfcc45adf4e9ececc4b45254623a DIFF: https://github.com/llvm/llvm-project/commit/49f28f6efb90cfcc45adf4e9ececc4b45254623a.diff LOG: [Headers][X86] Add constexpr support for some AVX[512] intrinsics. (#157260) The following AVX[512] intrinsics are now constexpr: - `_mm_cvtepi64_pd` - `_mm_mask_cvtepi64_pd` - `_mm_maskz_cvtepi64_pd` - `_mm_cvtepu64_pd` - `_mm_mask_cvtepu64_pd` - `_mm_maskz_cvtepu64_pd` - `_mm256_cvtepi64_pd` - `_mm256_mask_cvtepi64_pd` - `_mm256_maskz_cvtepi64_pd` - `_mm256_cvtepu64_pd` - `_mm256_mask_cvtepu64_pd` - `_mm256_maskz_cvtepu64_pd` - `_mm256_cvtepi64_ps` - `_mm256_mask_cvtepi64_ps` - `_mm256_maskz_cvtepi64_ps` - `_mm256_cvtepu64_ps` - `_mm256_mask_cvtepu64_ps` - `_mm256_maskz_cvtepu64_ps` - `_mm_cvtepi16_ph` - `_mm_mask_cvtepi16_ph` - `_mm_maskz_cvtepi16_ph` - `_mm_set1_ph` - `_mm_cvtepu16_ph` - `_mm_mask_cvtepu16_ph` - `_mm_maskz_cvtepu16_ph` - `_mm256_cvtepi16_ph` - `_mm256_mask_cvtepi16_ph` - `_mm256_set1_ph` - `_mm256_maskz_cvtepi16_ph` This PR is part 3 [[part 1](https://github.com/llvm/llvm-project/pull/156187) - [part 2](https://github.com/llvm/llvm-project/pull/156567)] of a series of PRs fixing #155798 Added: Modified: clang/lib/Headers/avx512vldqintrin.h clang/lib/Headers/avx512vlfp16intrin.h clang/test/CodeGen/X86/avx512vldq-builtins.c clang/test/CodeGen/X86/avx512vlfp16-builtins.c Removed: ################################################################################ diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h index e23a92151a055..68bd52e43981a 100644 --- a/clang/lib/Headers/avx512vldqintrin.h +++ b/clang/lib/Headers/avx512vldqintrin.h @@ -460,39 +460,39 @@ _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_cvtepi64_pd (__m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi64_pd(__m128i __A) { return (__m128d)__builtin_convertvector((__v2di)__A, __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepi64_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepi64_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_cvtepi64_pd (__m256i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi64_pd(__m256i __A) { return (__m256d)__builtin_convertvector((__v4di)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepi64_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepi64_pd(__A), (__v4df)_mm256_setzero_pd()); @@ -519,20 +519,20 @@ _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_cvtepi64_ps (__m256i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi64_ps(__m256i __A) { return (__m128)__builtin_convertvector((__v4di)__A, __v4sf); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepi64_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepi64_ps(__A), (__v4sf)_mm_setzero_ps()); @@ -706,39 +706,39 @@ _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_cvtepu64_pd (__m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepu64_pd(__m128i __A) { return (__m128d)__builtin_convertvector((__v2du)__A, __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepu64_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepu64_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_cvtepu64_pd (__m256i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu64_pd(__m256i __A) { return (__m256d)__builtin_convertvector((__v4du)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepu64_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepu64_pd(__A), (__v4df)_mm256_setzero_pd()); @@ -765,20 +765,20 @@ _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_cvtepu64_ps (__m256i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu64_ps(__m256i __A) { return (__m128)__builtin_convertvector((__v4du)__A, __v4sf); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepu64_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepu64_ps(__A), (__v4sf)_mm_setzero_ps()); diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h index 573533bd895f0..8eb31eae6173b 100644 --- a/clang/lib/Headers/avx512vlfp16intrin.h +++ b/clang/lib/Headers/avx512vlfp16intrin.h @@ -46,11 +46,13 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h) { return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0}; } -static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_ph(_Float16 __h) { +static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_set1_ph(_Float16 __h) { return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h}; } -static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_ph(_Float16 __h) { +static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_set1_ph(_Float16 __h) { return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h}; } @@ -807,34 +809,35 @@ _mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) { (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi16_ph(__m128i __A) { return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph()); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi16_ph(__m256i __A) { return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), @@ -911,17 +914,18 @@ _mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) { (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepu16_ph(__m128i __A) { return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph()); diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c index e1e8916bf60b3..802784472163d 100644 --- a/clang/test/CodeGen/X86/avx512vldq-builtins.c +++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c @@ -440,6 +440,8 @@ __m128d test_mm_cvtepi64_pd(__m128i __A) { return _mm_cvtepi64_pd(__A); } +TEST_CONSTEXPR(match_m128d(_mm_cvtepi64_pd((__m128i)(__v2di){-1, -1}), -1.0, -1.0)); + __m128d test_mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_cvtepi64_pd // CHECK: sitofp <2 x i64> %{{.*}} to <2 x double> @@ -447,6 +449,8 @@ __m128d test_mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A) { return _mm_mask_cvtepi64_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m128d(_mm_mask_cvtepi64_pd((__m128d){-777.0, -777.0}, /*01=*/0x1, (__m128i)(__v2di){-1, -1}), -1.0, -777.0)); + __m128d test_mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_cvtepi64_pd // CHECK: sitofp <2 x i64> %{{.*}} to <2 x double> @@ -454,12 +458,16 @@ __m128d test_mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A) { return _mm_maskz_cvtepi64_pd(__U, __A); } +TEST_CONSTEXPR(match_m128d(_mm_maskz_cvtepi64_pd(/*01=*/0x1, (__m128i)(__v2di){-1, -1}), -1.0, 0.0)); + __m256d test_mm256_cvtepi64_pd(__m256i __A) { // CHECK-LABEL: test_mm256_cvtepi64_pd // CHECK: sitofp <4 x i64> %{{.*}} to <4 x double> return _mm256_cvtepi64_pd(__A); } +TEST_CONSTEXPR(match_m256d(_mm256_cvtepi64_pd((__m256i)(__v4di){-1, -1, 2, 2}), -1.0, -1.0, 2.0, 2.0)); + __m256d test_mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_cvtepi64_pd // CHECK: sitofp <4 x i64> %{{.*}} to <4 x double> @@ -467,6 +475,8 @@ __m256d test_mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A) { return _mm256_mask_cvtepi64_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_cvtepi64_pd((__m256d){-777.0, -777.0, -777.0, -777.0}, /*1100*/0xc, (__m256i)(__v4di){-1, -1, 2, 2}), -777.0, -777.0, 2.0, 2.0)); + __m256d test_mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepi64_pd // CHECK: sitofp <4 x i64> %{{.*}} to <4 x double> @@ -474,6 +484,8 @@ __m256d test_mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A) { return _mm256_maskz_cvtepi64_pd(__U, __A); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_cvtepi64_pd(/*1100*/0xc, (__m256i)(__v4di){-1, -1, 2, 2}), 0.0, 0.0, 2.0, 2.0)); + __m128 test_mm_cvtepi64_ps(__m128i __A) { // CHECK-LABEL: test_mm_cvtepi64_ps // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.128 @@ -498,6 +510,8 @@ __m128 test_mm256_cvtepi64_ps(__m256i __A) { return _mm256_cvtepi64_ps(__A); } +TEST_CONSTEXPR(match_m128(_mm256_cvtepi64_ps((__m256i)(__v4di){-1, -1, 2, 2}), -1.0f, -1.0f, 2.0f, 2.0f)); + __m128 test_mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_cvtepi64_ps // CHECK: sitofp <4 x i64> %{{.*}} to <4 x float> @@ -505,6 +519,8 @@ __m128 test_mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A) { return _mm256_mask_cvtepi64_ps(__W, __U, __A); } +TEST_CONSTEXPR(match_m128(_mm256_mask_cvtepi64_ps((__m128){-777.0f, -777.0f, -777.0f, -777.0f}, /*1010=*/0xa, (__m256i)(__v4di){-1, -1, 2, 2}), -777.0f, -1.0f, -777.0f, 2.0f)); + __m128 test_mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepi64_ps // CHECK: sitofp <4 x i64> %{{.*}} to <4 x float> @@ -512,6 +528,8 @@ __m128 test_mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A) { return _mm256_maskz_cvtepi64_ps(__U, __A); } +TEST_CONSTEXPR(match_m128(_mm256_maskz_cvtepi64_ps(/*1010=*/0xa, (__m256i)(__v4di){-1, -1, 2, 2}), 0.0f, -1.0f, 0.0f, 2.0f)); + __m128i test_mm_cvttpd_epi64(__m128d __A) { // CHECK-LABEL: test_mm_cvttpd_epi64 // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.128 @@ -662,6 +680,8 @@ __m128d test_mm_cvtepu64_pd(__m128i __A) { return _mm_cvtepu64_pd(__A); } +TEST_CONSTEXPR(match_m128d(_mm_cvtepu64_pd((__m128i)(__v2du){1, 1}), 1.0, 1.0)); + __m128d test_mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_cvtepu64_pd // CHECK: uitofp <2 x i64> %{{.*}} to <2 x double> @@ -669,6 +689,8 @@ __m128d test_mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A) { return _mm_mask_cvtepu64_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m128d(_mm_mask_cvtepu64_pd((__m128d){-777.0, -777.0}, /*01=*/0x1, (__m128i)(__v2du){1, 1}), 1.0, -777.0)); + __m128d test_mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_cvtepu64_pd // CHECK: uitofp <2 x i64> %{{.*}} to <2 x double> @@ -676,12 +698,16 @@ __m128d test_mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A) { return _mm_maskz_cvtepu64_pd(__U, __A); } +TEST_CONSTEXPR(match_m128d(_mm_maskz_cvtepu64_pd(/*01=*/0x1, (__m128i)(__v2du){1, 1}), 1.0, 0.0)); + __m256d test_mm256_cvtepu64_pd(__m256i __A) { // CHECK-LABEL: test_mm256_cvtepu64_pd // CHECK: uitofp <4 x i64> %{{.*}} to <4 x double> return _mm256_cvtepu64_pd(__A); } +TEST_CONSTEXPR(match_m256d(_mm256_cvtepu64_pd((__m256i)(__v4du){1, 1, 2, 2}), 1.0, 1.0, 2.0, 2.0)); + __m256d test_mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_cvtepu64_pd // CHECK: uitofp <4 x i64> %{{.*}} to <4 x double> @@ -689,6 +715,8 @@ __m256d test_mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A) { return _mm256_mask_cvtepu64_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_cvtepu64_pd((__m256d){-777.0, -777.0, -777.0, -777.0}, /*1100*/0xc, (__m256i)(__v4du){1, 1, 2, 2}), -777.0, -777.0, 2.0, 2.0)); + __m256d test_mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepu64_pd // CHECK: uitofp <4 x i64> %{{.*}} to <4 x double> @@ -696,6 +724,8 @@ __m256d test_mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A) { return _mm256_maskz_cvtepu64_pd(__U, __A); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_cvtepu64_pd(/*1100*/0xc, (__m256i)(__v4du){1, 1, 2, 2}), 0.0, 0.0, 2.0, 2.0)); + __m128 test_mm_cvtepu64_ps(__m128i __A) { // CHECK-LABEL: test_mm_cvtepu64_ps // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.128 @@ -720,6 +750,8 @@ __m128 test_mm256_cvtepu64_ps(__m256i __A) { return _mm256_cvtepu64_ps(__A); } +TEST_CONSTEXPR(match_m128(_mm256_cvtepu64_ps((__m256i)(__v4du){1, 1, 2, 2}), 1.0f, 1.0f, 2.0f, 2.0f)); + __m128 test_mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_cvtepu64_ps // CHECK: uitofp <4 x i64> %{{.*}} to <4 x float> @@ -727,6 +759,8 @@ __m128 test_mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A) { return _mm256_mask_cvtepu64_ps(__W, __U, __A); } +TEST_CONSTEXPR(match_m128(_mm256_mask_cvtepu64_ps((__m128){-777.0f, -777.0f, -777.0f, -777.0f}, /*1010=*/0xa, (__m256i)(__v4du){1, 1, 2, 2}), -777.0f, 1.0f, -777.0f, 2.0f)); + __m128 test_mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepu64_ps // CHECK: uitofp <4 x i64> %{{.*}} to <4 x float> @@ -734,6 +768,8 @@ __m128 test_mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) { return _mm256_maskz_cvtepu64_ps(__U, __A); } +TEST_CONSTEXPR(match_m128(_mm256_maskz_cvtepu64_ps(/*1010=*/0xa, (__m256i)(__v4du){1, 1, 2, 2}), 0.0f, 1.0f, 0.0f, 2.0f)); + __m128d test_mm_range_pd(__m128d __A, __m128d __B) { // CHECK-LABEL: test_mm_range_pd // CHECK: @llvm.x86.avx512.mask.range.pd.128 diff --git a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c index 828876e971dd2..fd6ea8fe6056d 100644 --- a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c @@ -50,6 +50,8 @@ __m128h test_mm_set1_ph(_Float16 h) { return _mm_set1_ph(h); } +TEST_CONSTEXPR(match_m128h(_mm_set1_ph(-777.0), -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0)); + __m256h test_mm256_set1_ph(_Float16 h) { // CHECK-LABEL: test_mm256_set1_ph // CHECK: insertelement <16 x half> {{.*}}, i32 0 @@ -71,6 +73,8 @@ __m256h test_mm256_set1_ph(_Float16 h) { return _mm256_set1_ph(h); } +TEST_CONSTEXPR(match_m256h(_mm256_set1_ph(-777.0), -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0)); + __m128h test_mm_set1_pch(_Float16 _Complex h) { // CHECK-LABEL: test_mm_set1_pch // CHECK: insertelement <4 x float> {{.*}}, i32 0 @@ -1799,36 +1803,48 @@ __m128h test_mm_cvtepi16_ph(__m128i A) { return _mm_cvtepi16_ph(A); } +TEST_CONSTEXPR(match_m128h(_mm_cvtepi16_ph((__m128i)(__v8hi){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 8.0, 8.0)); + __m128h test_mm_mask_cvtepi16_ph(__m128h A, __mmask8 B, __m128i C) { // CHECK-LABEL: test_mm_mask_cvtepi16_ph // CHECK: %{{.*}} = sitofp <8 x i16> %{{.*}} to <8 x half> return _mm_mask_cvtepi16_ph(A, B, C); } +TEST_CONSTEXPR(match_m128h(_mm_mask_cvtepi16_ph(_mm_set1_ph(-777.0), /*1001 0011=*/0x93, (__m128i)(__v8hi){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, -777.0, -777.0, 4.0, -777.0, -777.0, 8.0)); + __m128h test_mm_maskz_cvtepi16_ph(__mmask8 A, __m128i B) { // CHECK-LABEL: test_mm_maskz_cvtepi16_ph // CHECK: %{{.*}} = sitofp <8 x i16> %{{.*}} to <8 x half> return _mm_maskz_cvtepi16_ph(A, B); } +TEST_CONSTEXPR(match_m128h(_mm_maskz_cvtepi16_ph(/*1001 0011=*/0x93, (__m128i)(__v8hi){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 8.0)); + __m256h test_mm256_cvtepi16_ph(__m256i A) { // CHECK-LABEL: test_mm256_cvtepi16_ph // CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half> return _mm256_cvtepi16_ph(A); } +TEST_CONSTEXPR(match_m256h(_mm256_cvtepi16_ph((__m256i)(__v16hi){-1, -1, 2, 2, -4, -4, 8, 8, -16, -16, 32, 32, -64, -64, 128, 128}), -1.0, -1.0, 2.0, 2.0, -4.0, -4.0, 8.0, 8.0, -16.0, -16.0, 32.0, 32.0, -64.0, -64.0, 128.0, 128.0)); + __m256h test_mm256_mask_cvtepi16_ph(__m256h A, __mmask16 B, __m256i C) { // CHECK-LABEL: test_mm256_mask_cvtepi16_ph // CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half> return _mm256_mask_cvtepi16_ph(A, B, C); } +TEST_CONSTEXPR(match_m256h(_mm256_mask_cvtepi16_ph(_mm256_set1_ph(-777.0), /*1101 0101 1101 1100=*/0xd5dc, (__m256i)(__v16hi){-1, -1, 2, 2, -4, -4, 8, 8, -16, -16, 32, 32, -64, -64, 128, 128}), -777.0, -777.0, 2.0, 2.0, -4.0, -777.0, 8.0, 8.0, -16.0, -777.0, 32.0, -777.0, -64.0, -777.0, 128.0, 128.0)); + __m256h test_mm256_maskz_cvtepi16_ph(__mmask16 A, __m256i B) { // CHECK-LABEL: test_mm256_maskz_cvtepi16_ph // CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half> return _mm256_maskz_cvtepi16_ph(A, B); } +TEST_CONSTEXPR(match_m256h(_mm256_maskz_cvtepi16_ph(/*1101 0101 1101 1100=*/0xd5dc, (__m256i)(__v16hi){-1, -1, 2, 2, -4, -4, 8, 8, -16, -16, 32, 32, -64, -64, 128, 128}), 0.0, 0.0, 2.0, 2.0, -4.0, 0.0, 8.0, 8.0, -16.0, 0.0, 32.0, 0.0, -64.0, 0.0, 128.0, 128.0)); + __m128i test_mm_cvtph_epu16(__m128h A) { // CHECK-LABEL: test_mm_cvtph_epu16 // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.128 @@ -1907,18 +1923,24 @@ __m128h test_mm_cvtepu16_ph(__m128i A) { return _mm_cvtepu16_ph(A); } +TEST_CONSTEXPR(match_m128h(_mm_cvtepu16_ph((__m128i)(__v8hu){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 8.0, 8.0)); + __m128h test_mm_mask_cvtepu16_ph(__m128h A, __mmask8 B, __m128i C) { // CHECK-LABEL: test_mm_mask_cvtepu16_ph // CHECK: %{{.*}} = uitofp <8 x i16> %{{.*}} to <8 x half> return _mm_mask_cvtepu16_ph(A, B, C); } +TEST_CONSTEXPR(match_m128h(_mm_mask_cvtepu16_ph(_mm_set1_ph(-777.0), /*1001 0011=*/0x93, (__m128i)(__v8hu){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, -777.0, -777.0, 4.0, -777.0, -777.0, 8.0)); + __m128h test_mm_maskz_cvtepu16_ph(__mmask8 A, __m128i B) { // CHECK-LABEL: test_mm_maskz_cvtepu16_ph // CHECK: %{{.*}} = uitofp <8 x i16> %{{.*}} to <8 x half> return _mm_maskz_cvtepu16_ph(A, B); } +TEST_CONSTEXPR(match_m128h(_mm_maskz_cvtepu16_ph(/*1001 0011=*/0x93, (__m128i)(__v8hu){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 8.0)); + __m256h test_mm256_cvtepu16_ph(__m256i A) { // CHECK-LABEL: test_mm256_cvtepu16_ph // CHECK: %{{.*}} = uitofp <16 x i16> %{{.*}} to <16 x half> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits