Author: woruyu Date: 2025-08-11T18:38:17+08:00 New Revision: 50bd897fdee98bef494641288c359fd3777dd88d
URL: https://github.com/llvm/llvm-project/commit/50bd897fdee98bef494641288c359fd3777dd88d DIFF: https://github.com/llvm/llvm-project/commit/50bd897fdee98bef494641288c359fd3777dd88d.diff LOG: [Headers][X86] Allow SSE41/AVX2/AVX512F/AVX512BW integer extension intrinsics to be used in constexpr (#152971) ### Summary This PR resolves https://github.com/llvm/llvm-project/issues/152315 Added: Modified: clang/lib/Headers/avx2intrin.h clang/lib/Headers/avx512bwintrin.h clang/lib/Headers/avx512fintrin.h clang/lib/Headers/smmintrin.h clang/test/CodeGen/X86/avx2-builtins.c clang/test/CodeGen/X86/avx512bw-builtins.c clang/test/CodeGen/X86/avx512f-builtins.c clang/test/CodeGen/X86/sse41-builtins.c Removed: ################################################################################ diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 55e7102a7d192..f00a8a516ecfe 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1371,9 +1371,8 @@ _mm256_movemask_epi8(__m256i __a) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [16 x i16] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi8_epi16(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi); @@ -1399,9 +1398,8 @@ _mm256_cvtepi8_epi16(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [8 x i32] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi8_epi32(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi8_epi32(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); @@ -1426,9 +1424,8 @@ _mm256_cvtepi8_epi32(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi8_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi8_epi64(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di); @@ -1454,9 +1451,8 @@ _mm256_cvtepi8_epi64(__m128i __V) /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [8 x i32] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi16_epi32(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si); } @@ -1479,9 +1475,8 @@ _mm256_cvtepi16_epi32(__m128i __V) /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi16_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di); } @@ -1504,9 +1499,8 @@ _mm256_cvtepi16_epi64(__m128i __V) /// A 128-bit vector of [4 x i32] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi32_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4si)__V, __v4di); } @@ -1530,9 +1524,8 @@ _mm256_cvtepi32_epi64(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [16 x i16] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu8_epi16(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu8_epi16(__m128i __V) { return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi); } @@ -1556,9 +1549,8 @@ _mm256_cvtepu8_epi16(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [8 x i32] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu8_epi32(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu8_epi32(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } @@ -1581,9 +1573,8 @@ _mm256_cvtepu8_epi32(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu8_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu8_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di); } @@ -1607,9 +1598,8 @@ _mm256_cvtepu8_epi64(__m128i __V) /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [8 x i32] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu16_epi32(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si); } @@ -1632,9 +1622,8 @@ _mm256_cvtepu16_epi32(__m128i __V) /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu16_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di); } @@ -1657,9 +1646,8 @@ _mm256_cvtepu16_epi64(__m128i __V) /// A 128-bit vector of [4 x i32] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu32_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4su)__V, __v4di); } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 233d4a6574f30..d7377e5d5196c 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1379,9 +1379,8 @@ _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepi8_epi16(__m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi8_epi16(__m256i __A) { /* This function always performs a signed extension, but __v32qi is a char which may be signed or unsigned, so use __v32qs. */ return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi); @@ -1403,9 +1402,8 @@ _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepu8_epi16(__m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu8_epi16(__m256i __A) { return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 95b80cc59bb02..940064675ba45 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -4714,9 +4714,8 @@ _mm512_knot(__mmask16 __M) #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \ _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepi8_epi32(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi8_epi32(__m128i __A) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si); @@ -4738,9 +4737,8 @@ _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepi8_epi64(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi8_epi64(__m128i __A) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); @@ -4762,9 +4760,8 @@ _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) (__v8di)_mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepi32_epi64(__m256i __X) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi32_epi64(__m256i __X) { return (__m512i)__builtin_convertvector((__v8si)__X, __v8di); } @@ -4784,9 +4781,8 @@ _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepi16_epi32(__m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi16_epi32(__m256i __A) { return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si); } @@ -4806,9 +4802,8 @@ _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) (__v16si)_mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepi16_epi64(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi16_epi64(__m128i __A) { return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di); } @@ -4828,9 +4823,8 @@ _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepu8_epi32(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu8_epi32(__m128i __A) { return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si); } @@ -4850,9 +4844,8 @@ _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepu8_epi64(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu8_epi64(__m128i __A) { return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); } @@ -4872,9 +4865,8 @@ _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepu32_epi64(__m256i __X) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu32_epi64(__m256i __X) { return (__m512i)__builtin_convertvector((__v8su)__X, __v8di); } @@ -4894,9 +4886,8 @@ _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepu16_epi32(__m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu16_epi32(__m256i __A) { return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si); } @@ -4916,9 +4907,8 @@ _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepu16_epi64(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu16_epi64(__m128i __A) { return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di); } diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index bc6fe4c801d7e..c5075c419b70b 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -27,6 +27,12 @@ __min_vector_width__(128))) #endif +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + /* SSE4 Rounding macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 #define _MM_FROUND_TO_NEG_INF 0x01 @@ -1224,7 +1230,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi64(__m128i __V1, /// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are /// sign-extended to 16-bit values. /// \returns A 128-bit vector of [8 x i16] containing the sign-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i) __builtin_convertvector( @@ -1246,7 +1253,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) { /// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are /// sign-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi8_epi32(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i) __builtin_convertvector( @@ -1266,7 +1274,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V) { /// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are /// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi8_epi64(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i) __builtin_convertvector( @@ -1286,7 +1295,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V) { /// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are /// sign-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi16_epi32(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si); } @@ -1304,7 +1314,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V) { /// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are /// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi16_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di); } @@ -1322,7 +1333,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V) { /// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are /// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi32_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di); } @@ -1341,7 +1353,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V) { /// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are /// zero-extended to 16-bit values. /// \returns A 128-bit vector of [8 x i16] containing the zero-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepu8_epi16(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), @@ -1361,7 +1374,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) { /// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are /// zero-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepu8_epi32(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si); } @@ -1379,7 +1393,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) { /// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are /// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepu8_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di); } @@ -1397,7 +1412,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) { /// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are /// zero-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepu16_epi32(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si); } @@ -1415,7 +1431,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) { /// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are /// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepu16_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di); } @@ -1433,7 +1450,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) { /// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are /// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepu32_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di); } @@ -2326,6 +2344,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi64(__m128i __V1, } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #include <popcntintrin.h> diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index adbb854660a8b..8790485f00a8c 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -311,6 +311,8 @@ __m256i test_mm256_cvtepi8_epi16(__m128i a) { return _mm256_cvtepi8_epi16(a); } +TEST_CONSTEXPR(match_v16hi(_mm256_cvtepi8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)); + __m256i test_mm256_cvtepi8_epi32(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi8_epi32 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -318,6 +320,8 @@ __m256i test_mm256_cvtepi8_epi32(__m128i a) { return _mm256_cvtepi8_epi32(a); } +TEST_CONSTEXPR(match_v8si(_mm256_cvtepi8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0, 1, -2, 3, -4)); + __m256i test_mm256_cvtepi8_epi64(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi8_epi64 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -325,12 +329,16 @@ __m256i test_mm256_cvtepi8_epi64(__m128i a) { return _mm256_cvtepi8_epi64(a); } +TEST_CONSTEXPR(match_v4di(_mm256_cvtepi8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0)); + __m256i test_mm256_cvtepi16_epi32(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi16_epi32 // CHECK: sext <8 x i16> %{{.*}} to <8 x i32> return _mm256_cvtepi16_epi32(a); } +TEST_CONSTEXPR(match_v8si(_mm256_cvtepi16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), -300, 2, -1, 0, 1, -2, 3, -4)); + __m256i test_mm256_cvtepi16_epi64(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi16_epi64 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -338,18 +346,24 @@ __m256i test_mm256_cvtepi16_epi64(__m128i a) { return _mm256_cvtepi16_epi64(a); } +TEST_CONSTEXPR(match_v4di(_mm256_cvtepi16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), -300, 2, -1, 0)); + __m256i test_mm256_cvtepi32_epi64(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi32_epi64 // CHECK: sext <4 x i32> %{{.*}} to <4 x i64> return _mm256_cvtepi32_epi64(a); } +TEST_CONSTEXPR(match_v4di(_mm256_cvtepi32_epi64(_mm_setr_epi32(-70000, 2, -1, 0)), -70000, 2, -1, 0)); + __m256i test_mm256_cvtepu8_epi16(__m128i a) { // CHECK-LABEL: test_mm256_cvtepu8_epi16 // CHECK: zext <16 x i8> %{{.*}} to <16 x i16> return _mm256_cvtepu8_epi16(a); } +TEST_CONSTEXPR(match_v16hi(_mm256_cvtepu8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0, 1, 254, 3, 252, 5, 250, 7, 248, 9, 246, 11, 244)); + __m256i test_mm256_cvtepu8_epi32(__m128i a) { // CHECK-LABEL: test_mm256_cvtepu8_epi32 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -357,6 +371,8 @@ __m256i test_mm256_cvtepu8_epi32(__m128i a) { return _mm256_cvtepu8_epi32(a); } +TEST_CONSTEXPR(match_v8si(_mm256_cvtepu8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0, 1, 254, 3, 252)); + __m256i test_mm256_cvtepu8_epi64(__m128i a) { // CHECK-LABEL: test_mm256_cvtepu8_epi64 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -364,12 +380,16 @@ __m256i test_mm256_cvtepu8_epi64(__m128i a) { return _mm256_cvtepu8_epi64(a); } +TEST_CONSTEXPR(match_v4di(_mm256_cvtepu8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0)); + __m256i test_mm256_cvtepu16_epi32(__m128i a) { // CHECK-LABEL: test_mm256_cvtepu16_epi32 // CHECK: zext <8 x i16> {{.*}} to <8 x i32> return _mm256_cvtepu16_epi32(a); } +TEST_CONSTEXPR(match_v8si(_mm256_cvtepu16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532)); + __m256i test_mm256_cvtepu16_epi64(__m128i a) { // CHECK-LABEL: test_mm256_cvtepu16_epi64 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -377,12 +397,16 @@ __m256i test_mm256_cvtepu16_epi64(__m128i a) { return _mm256_cvtepu16_epi64(a); } +TEST_CONSTEXPR(match_v4di(_mm256_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0)); + __m256i test_mm256_cvtepu32_epi64(__m128i a) { // CHECK-LABEL: test_mm256_cvtepu32_epi64 // CHECK: zext <4 x i32> %{{.*}} to <4 x i64> return _mm256_cvtepu32_epi64(a); } +TEST_CONSTEXPR(match_v4di(_mm256_cvtepu32_epi64(_mm_setr_epi32(-70000, 2, -1, 0)), 4294897296, 2, 4294967295, 0)); + __m128i test0_mm256_extracti128_si256_0(__m256i a) { // CHECK-LABEL: test0_mm256_extracti128_si256 // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <2 x i32> <i32 0, i32 1> diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 37765eb9616b3..2e3e13d2769c4 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -1547,6 +1547,8 @@ __m512i test_mm512_cvtepi8_epi16(__m256i __A) { return _mm512_cvtepi8_epi16(__A); } +TEST_CONSTEXPR(match_v32hi(_mm512_cvtepi8_epi16(_mm256_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16, 17, -18, 19, -20, 21, -22, 23, -24, 25, -26, 27, -28)), -3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16, 17, -18, 19, -20, 21, -22, 23, -24, 25, -26, 27, -28)); + __m512i test_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) { // CHECK-LABEL: @test_mm512_mask_cvtepi8_epi16 // CHECK: sext <32 x i8> %{{.*}} to <32 x i16> @@ -1567,6 +1569,8 @@ __m512i test_mm512_cvtepu8_epi16(__m256i __A) { return _mm512_cvtepu8_epi16(__A); } +TEST_CONSTEXPR(match_v32hi(_mm512_cvtepu8_epi16(_mm256_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16, 17, -18, 19, -20, 21, -22, 23, -24, 25, -26, 27, -28)), 253, 2, 255, 0, 1, 254, 3, 252, 5, 250, 7, 248, 9, 246, 11, 244, 13, 242, 15, 240, 17, 238, 19, 236, 21, 234, 23, 232, 25, 230, 27, 228)); + __m512i test_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) { // CHECK-LABEL: @test_mm512_mask_cvtepu8_epi16 // CHECK: zext <32 x i8> %{{.*}} to <32 x i16> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 8c446f5715c00..26bf5fa9766d7 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -3840,6 +3840,8 @@ __m512i test_mm512_cvtepi8_epi32(__m128i __A) { return _mm512_cvtepi8_epi32(__A); } +TEST_CONSTEXPR(match_v16si(_mm512_cvtepi8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)); + __m512i test_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { // CHECK-LABEL: test_mm512_mask_cvtepi8_epi32 // CHECK: sext <16 x i8> %{{.*}} to <16 x i32> @@ -3860,6 +3862,8 @@ __m512i test_mm512_cvtepi8_epi64(__m128i __A) { return _mm512_cvtepi8_epi64(__A); } +TEST_CONSTEXPR(match_v8di(_mm512_cvtepi8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 0, 0, 0, 0, 0, 0, 0, 0)), -3, 2, -1, 0, 1, -2, 3, -4)); + __m512i test_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm512_mask_cvtepi8_epi64 // CHECK: sext <8 x i8> %{{.*}} to <8 x i64> @@ -3880,6 +3884,8 @@ __m512i test_mm512_cvtepi32_epi64(__m256i __X) { return _mm512_cvtepi32_epi64(__X); } +TEST_CONSTEXPR(match_v8di(_mm512_cvtepi32_epi64(_mm256_setr_epi32(-70000, 2, -1, 0, 1, -2, 3, -4)), -70000, 2, -1, 0, 1, -2, 3, -4)); + __m512i test_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { // CHECK-LABEL: test_mm512_mask_cvtepi32_epi64 // CHECK: sext <8 x i32> %{{.*}} to <8 x i64> @@ -3900,6 +3906,8 @@ __m512i test_mm512_cvtepi16_epi32(__m256i __A) { return _mm512_cvtepi16_epi32(__A); } +TEST_CONSTEXPR(match_v16si(_mm512_cvtepi16_epi32(_mm256_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -300, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)); + __m512i test_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { // CHECK-LABEL: test_mm512_mask_cvtepi16_epi32 // CHECK: sext <16 x i16> %{{.*}} to <16 x i32> @@ -3920,6 +3928,8 @@ __m512i test_mm512_cvtepi16_epi64(__m128i __A) { return _mm512_cvtepi16_epi64(__A); } +TEST_CONSTEXPR(match_v8di(_mm512_cvtepi16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), -300, 2, -1, 0, 1, -2, 3, -4)); + __m512i test_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm512_mask_cvtepi16_epi64 // CHECK: sext <8 x i16> %{{.*}} to <8 x i64> @@ -3940,6 +3950,8 @@ __m512i test_mm512_cvtepu8_epi32(__m128i __A) { return _mm512_cvtepu8_epi32(__A); } +TEST_CONSTEXPR(match_v16si(_mm512_cvtepu8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0, 1, 254, 3, 252, 5, 250, 7, 248, 9, 246, 11, 244)); + __m512i test_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { // CHECK-LABEL: test_mm512_mask_cvtepu8_epi32 // CHECK: zext <16 x i8> %{{.*}} to <16 x i32> @@ -3960,6 +3972,8 @@ __m512i test_mm512_cvtepu8_epi64(__m128i __A) { return _mm512_cvtepu8_epi64(__A); } +TEST_CONSTEXPR(match_v8di(_mm512_cvtepu8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 0, 0, 0, 0, 0, 0, 0, 0)), 253, 2, 255, 0, 1, 254, 3, 252)); + __m512i test_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm512_mask_cvtepu8_epi64 // CHECK: zext <8 x i8> %{{.*}} to <8 x i64> @@ -3980,6 +3994,8 @@ __m512i test_mm512_cvtepu32_epi64(__m256i __X) { return _mm512_cvtepu32_epi64(__X); } +TEST_CONSTEXPR(match_v8di(_mm512_cvtepu32_epi64(_mm256_setr_epi32(-70000, 2, -1, 0, 1, -2, 3, -4)), 4294897296, 2, 4294967295, 0, 1, 4294967294, 3, 4294967292)); + __m512i test_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { // CHECK-LABEL: test_mm512_mask_cvtepu32_epi64 // CHECK: zext <8 x i32> %{{.*}} to <8 x i64> @@ -4000,6 +4016,8 @@ __m512i test_mm512_cvtepu16_epi32(__m256i __A) { return _mm512_cvtepu16_epi32(__A); } +TEST_CONSTEXPR(match_v16si(_mm512_cvtepu16_epi32(_mm256_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 65236, 2, 65535, 0, 1, 65534, 3, 65532, 5, 65530, 7, 65528, 9, 65526, 11, 65524)); + __m512i test_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { // CHECK-LABEL: test_mm512_mask_cvtepu16_epi32 // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> @@ -4020,6 +4038,8 @@ __m512i test_mm512_cvtepu16_epi64(__m128i __A) { return _mm512_cvtepu16_epi64(__A); } +TEST_CONSTEXPR(match_v8di(_mm512_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532)); + __m512i test_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm512_mask_cvtepu16_epi64 // CHECK: zext <8 x i16> %{{.*}} to <8 x i64> diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index d71a4b7e789ef..10deb386d82aa 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -9,6 +9,7 @@ #include <immintrin.h> +#include "builtin_test_helpers.h" // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll @@ -86,6 +87,8 @@ __m128i test_mm_cvtepi8_epi16(__m128i a) { return _mm_cvtepi8_epi16(a); } +TEST_CONSTEXPR(match_v8hi(_mm_cvtepi8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 0, 0, 0, 0, 0, 0, 0, 0)), -3, 2, -1, 0, 1, -2, 3, -4)); + __m128i test_mm_cvtepi8_epi32(__m128i a) { // CHECK-LABEL: test_mm_cvtepi8_epi32 // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -93,6 +96,8 @@ __m128i test_mm_cvtepi8_epi32(__m128i a) { return _mm_cvtepi8_epi32(a); } +TEST_CONSTEXPR(match_v4si(_mm_cvtepi8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 0, 0, 0, 0, 0, 0, 0, 0)), -3, 2, -1, 0)); + __m128i test_mm_cvtepi8_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepi8_epi64 // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <2 x i32> <i32 0, i32 1> @@ -100,6 +105,8 @@ __m128i test_mm_cvtepi8_epi64(__m128i a) { return _mm_cvtepi8_epi64(a); } +TEST_CONSTEXPR(match_v2di(_mm_cvtepi8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 0, 0, 0, 0, 0, 0, 0, 0)), -3, 2)); + __m128i test_mm_cvtepi16_epi32(__m128i a) { // CHECK-LABEL: test_mm_cvtepi16_epi32 // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -107,6 +114,8 @@ __m128i test_mm_cvtepi16_epi32(__m128i a) { return _mm_cvtepi16_epi32(a); } +TEST_CONSTEXPR(match_v4si(_mm_cvtepi16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), -300, 2, -1, 0)); + __m128i test_mm_cvtepi16_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepi16_epi64 // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <2 x i32> <i32 0, i32 1> @@ -114,6 +123,8 @@ __m128i test_mm_cvtepi16_epi64(__m128i a) { return _mm_cvtepi16_epi64(a); } +TEST_CONSTEXPR(match_v2di(_mm_cvtepi16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), -300, 2)); + __m128i test_mm_cvtepi32_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepi32_epi64 // CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> {{.*}}, <2 x i32> <i32 0, i32 1> @@ -121,6 +132,8 @@ __m128i test_mm_cvtepi32_epi64(__m128i a) { return _mm_cvtepi32_epi64(a); } +TEST_CONSTEXPR(match_v2di(_mm_cvtepi32_epi64(_mm_setr_epi32(-70000, 2, -1, 0)), -70000, 2)); + __m128i test_mm_cvtepu8_epi16(__m128i a) { // CHECK-LABEL: test_mm_cvtepu8_epi16 // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -128,6 +141,8 @@ __m128i test_mm_cvtepu8_epi16(__m128i a) { return _mm_cvtepu8_epi16(a); } +TEST_CONSTEXPR(match_v8hi(_mm_cvtepu8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 0, 0, 0, 0, 0, 0, 0, 0)), 253, 2, 255, 0, 1, 254, 3, 252)); + __m128i test_mm_cvtepu8_epi32(__m128i a) { // CHECK-LABEL: test_mm_cvtepu8_epi32 // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -135,6 +150,8 @@ __m128i test_mm_cvtepu8_epi32(__m128i a) { return _mm_cvtepu8_epi32(a); } +TEST_CONSTEXPR(match_v4si(_mm_cvtepu8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 0, 0, 0, 0, 0, 0, 0, 0)), 253, 2, 255, 0)); + __m128i test_mm_cvtepu8_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepu8_epi64 // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <2 x i32> <i32 0, i32 1> @@ -142,6 +159,8 @@ __m128i test_mm_cvtepu8_epi64(__m128i a) { return _mm_cvtepu8_epi64(a); } +TEST_CONSTEXPR(match_v2di(_mm_cvtepu8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, -2, 3, -4, 0, 0, 0, 0, 0, 0, 0, 0)), 253, 2)); + __m128i test_mm_cvtepu16_epi32(__m128i a) { // CHECK-LABEL: test_mm_cvtepu16_epi32 // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -149,6 +168,8 @@ __m128i test_mm_cvtepu16_epi32(__m128i a) { return _mm_cvtepu16_epi32(a); } +TEST_CONSTEXPR(match_v4si(_mm_cvtepu16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0)); + __m128i test_mm_cvtepu16_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepu16_epi64 // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <2 x i32> <i32 0, i32 1> @@ -156,6 +177,8 @@ __m128i test_mm_cvtepu16_epi64(__m128i a) { return _mm_cvtepu16_epi64(a); } +TEST_CONSTEXPR(match_v2di(_mm_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2)); + __m128i test_mm_cvtepu32_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepu32_epi64 // CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> {{.*}}, <2 x i32> <i32 0, i32 1> @@ -163,6 +186,8 @@ __m128i test_mm_cvtepu32_epi64(__m128i a) { return _mm_cvtepu32_epi64(a); } +TEST_CONSTEXPR(match_v2di(_mm_cvtepu32_epi64(_mm_setr_epi32(-70000, 2, -1, 0)), 4294897296, 2)); + __m128d test_mm_dp_pd(__m128d x, __m128d y) { // CHECK-LABEL: test_mm_dp_pd // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.dppd(<2 x double> {{.*}}, <2 x double> {{.*}}, i8 7) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits