Author: rksimon Date: Wed Jun 1 16:46:51 2016 New Revision: 271436 URL: http://llvm.org/viewvc/llvm-project?rev=271436&view=rev Log: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (clang)
The 'cvtt' truncation (round to zero) conversions can be safely represented as generic __builtin_convertvector (fptosi) calls instead of x86 intrinsics. We already do this (implicitly) for the scalar equivalents. Note: I looked at updating _mm_cvttpd_epi32 as well but this still requires a lot more backend work to correctly lower (both for debug and optimized builds). Differential Revision: http://reviews.llvm.org/D20859 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/test/CodeGen/avx-builtins.c cfe/trunk/test/CodeGen/builtins-x86.c cfe/trunk/test/CodeGen/sse2-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=271436&r1=271435&r2=271436&view=diff ============================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Jun 1 16:46:51 2016 @@ -339,7 +339,6 @@ TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2") TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2") TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2") TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2") @@ -462,9 +461,7 @@ TARGET_BUILTIN(__builtin_ia32_cmpps256, TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx") -TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx") -TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx") Modified: cfe/trunk/lib/Headers/avxintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=271436&r1=271435&r2=271436&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avxintrin.h (original) +++ cfe/trunk/lib/Headers/avxintrin.h Wed Jun 1 16:46:51 2016 @@ -2108,7 +2108,7 @@ _mm256_cvtps_pd(__m128 __a) static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a) { - return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); + return (__m128i)__builtin_convertvector((__v4df) __a, __v4si); } static __inline __m128i __DEFAULT_FN_ATTRS @@ -2120,7 +2120,7 @@ _mm256_cvtpd_epi32(__m256d __a) static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a) { - return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); + return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si); } static __inline double __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Headers/emmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=271436&r1=271435&r2=271436&view=diff ============================================================================== --- cfe/trunk/lib/Headers/emmintrin.h (original) +++ cfe/trunk/lib/Headers/emmintrin.h Wed Jun 1 16:46:51 2016 @@ -1744,7 +1744,7 @@ _mm_cvtps_epi32(__m128 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { - return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); + return (__m128i)__builtin_convertvector((__v4sf)__a, __v4si); } /// \brief Returns a vector of [4 x i32] where the lowest element is the input Modified: cfe/trunk/test/CodeGen/avx-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=271436&r1=271435&r2=271436&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/avx-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx-builtins.c Wed Jun 1 16:46:51 2016 @@ -286,13 +286,13 @@ __m256d test_mm256_cvtps_pd(__m128 A) { __m128i test_mm256_cvttpd_epi32(__m256d A) { // CHECK-LABEL: test_mm256_cvttpd_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %{{.*}}) + // CHECK: fptosi <4 x double> %{{.*}} to <4 x i32> return _mm256_cvttpd_epi32(A); } __m256i test_mm256_cvttps_epi32(__m256 A) { // CHECK-LABEL: test_mm256_cvttps_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %{{.*}}) + // CHECK: fptosi <8 x float> %{{.*}} to <8 x i32> return _mm256_cvttps_epi32(A); } Modified: cfe/trunk/test/CodeGen/builtins-x86.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-x86.c?rev=271436&r1=271435&r2=271436&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/builtins-x86.c (original) +++ cfe/trunk/test/CodeGen/builtins-x86.c Wed Jun 1 16:46:51 2016 @@ -335,7 +335,6 @@ void f0() { tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d); #endif tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f); - tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f); (void) __builtin_ia32_clflush(tmp_vCp); (void) __builtin_ia32_lfence(); (void) __builtin_ia32_mfence(); @@ -415,9 +414,7 @@ void f0() { tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i); tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d); tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f); - tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d); tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d); - tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f); tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7); tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7); tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7); Modified: cfe/trunk/test/CodeGen/sse2-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=271436&r1=271435&r2=271436&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/sse2-builtins.c (original) +++ cfe/trunk/test/CodeGen/sse2-builtins.c Wed Jun 1 16:46:51 2016 @@ -533,7 +533,7 @@ __m128i test_mm_cvttpd_epi32(__m128d A) __m128i test_mm_cvttps_epi32(__m128 A) { // CHECK-LABEL: test_mm_cvttps_epi32 - // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}}) + // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32> return _mm_cvttps_epi32(A); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits