This revision was automatically updated to reflect the committed changes. Closed by commit rC336643: [X86] Lowering integer truncation intrinsics to native IR (authored by mike.dvoretsky, committed by ).
Changed prior to commit: https://reviews.llvm.org/D48712?vs=153471&id=154765#toc Repository: rC Clang https://reviews.llvm.org/D48712 Files: lib/Headers/avx512vlbwintrin.h lib/Headers/avx512vlintrin.h test/CodeGen/avx512vl-builtins.c test/CodeGen/avx512vlbw-builtins.c
Index: test/CodeGen/avx512vl-builtins.c =================================================================== --- test/CodeGen/avx512vl-builtins.c +++ test/CodeGen/avx512vl-builtins.c @@ -8503,7 +8503,8 @@ __m128i test_mm_cvtepi32_epi8(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.128 + // CHECK: trunc <4 x i32> %{{.*}} to <4 x i8> + // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> return _mm_cvtepi32_epi8(__A); } @@ -8527,7 +8528,8 @@ __m128i test_mm256_cvtepi32_epi8(__m256i __A) { // CHECK-LABEL: @test_mm256_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.256 + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i8> + // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> return _mm256_cvtepi32_epi8(__A); } @@ -8551,7 +8553,8 @@ __m128i test_mm_cvtepi32_epi16(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.dw.128 + // CHECK: trunc <4 x i32> %{{.*}} to <4 x i16> + // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> return _mm_cvtepi32_epi16(__A); } @@ -8599,7 +8602,8 @@ __m128i test_mm_cvtepi64_epi8(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.128 + // CHECK: trunc <2 x i64> %{{.*}} to <2 x i8> + // CHECK: shufflevector <2 x i8> %{{.*}}, <2 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> return _mm_cvtepi64_epi8(__A); } @@ -8623,7 +8627,8 @@ __m128i test_mm256_cvtepi64_epi8(__m256i __A) { // CHECK-LABEL: @test_mm256_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.256 + // CHECK: trunc <4 x i64> %{{.*}} to <4 x i8> + // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> return _mm256_cvtepi64_epi8(__A); } @@ -8647,7 +8652,8 @@ __m128i test_mm_cvtepi64_epi32(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmov.qd.128 + // CHECK: trunc <2 x i64> %{{.*}} to <2 x i32> + // CHECK: shufflevector <2 x i32> %{{.*}}, <2 x i32> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> return _mm_cvtepi64_epi32(__A); } @@ -8697,7 +8703,8 @@ __m128i test_mm_cvtepi64_epi16(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.128 + // CHECK: trunc <2 x i64> %{{.*}} to <2 x i16> + // CHECK: shufflevector <2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> return _mm_cvtepi64_epi16(__A); } @@ -8721,7 +8728,8 @@ __m128i test_mm256_cvtepi64_epi16(__m256i __A) { // CHECK-LABEL: @test_mm256_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.256 + // CHECK: trunc <4 x i64> %{{.*}} to <4 x i16> + // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> return _mm256_cvtepi64_epi16(__A); } Index: test/CodeGen/avx512vlbw-builtins.c =================================================================== --- test/CodeGen/avx512vlbw-builtins.c +++ test/CodeGen/avx512vlbw-builtins.c @@ -1792,7 +1792,8 @@ __m128i test_mm_cvtepi16_epi8(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.wb.128 + // CHECK: trunc <8 x i16> %{{.*}} to <8 x i8> + // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> return _mm_cvtepi16_epi8(__A); } Index: lib/Headers/avx512vlintrin.h =================================================================== --- lib/Headers/avx512vlintrin.h +++ lib/Headers/avx512vlintrin.h @@ -31,6 +31,10 @@ #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256))) +typedef short __v2hi __attribute__((__vector_size__(4))); +typedef char __v4qi __attribute__((__vector_size__(4))); +typedef char __v2qi __attribute__((__vector_size__(2))); + /* Integer compare */ #define _mm_cmpeq_epi32_mask(A, B) \ @@ -7341,9 +7345,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi8 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, - (__v16qi)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -7371,9 +7375,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi8 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, - (__v16qi)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v8si)__A, __v8qi), + (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 @@ -7400,9 +7405,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi16 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, - (__v8hi) _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -7456,9 +7461,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi8 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, - (__v16qi) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -7485,9 +7490,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi8 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, - (__v16qi) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 @@ -7514,9 +7519,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi32 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, - (__v4si)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -7571,9 +7575,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi16 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, - (__v8hi) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, + 3, 3, 3, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -7601,9 +7605,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi16 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, - (__v8hi)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 Index: lib/Headers/avx512vlbwintrin.h =================================================================== --- lib/Headers/avx512vlbwintrin.h +++ lib/Headers/avx512vlbwintrin.h @@ -1496,10 +1496,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi16_epi8 (__m128i __A) { - - return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, - (__v16qi) _mm_setzero_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v8hi)__A, __v8qi), + (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15); } static __inline__ __m128i __DEFAULT_FN_ATTRS128
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits