llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen Author: None (moorabbit) <details> <summary>Changes</summary> The following intrinsics were replaced by a combination of `__builtin_shufflevector` and `__builtin_convertvector`: - `__builtin_ia32_vcvtph2ps` - `__builtin_ia32_vcvtph2ps256` Fixes #<!-- -->152749 --- Full diff: https://github.com/llvm/llvm-project/pull/152911.diff 6 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.td (-8) - (modified) clang/lib/CodeGen/TargetBuiltins/X86.cpp (-2) - (modified) clang/lib/Headers/emmintrin.h (+5) - (modified) clang/lib/Headers/f16cintrin.h (+5-3) - (modified) clang/test/CodeGen/X86/f16c-builtins-constrained.c (+2-2) - (modified) clang/test/CodeGen/X86/f16c-builtins.c (+2-2) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 3efc0be8fa698..fc1ee3be7889f 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -757,14 +757,6 @@ let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int)">; } -let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vcvtph2ps : X86Builtin<"_Vector<4, float>(_Vector<8, short>)">; -} - -let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vcvtph2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, short>)">; -} - let Features = "rdrnd", Attributes = [NoThrow] in { def rdrand16_step : X86Builtin<"unsigned int(unsigned short *)">; def rdrand32_step : X86Builtin<"unsigned int(unsigned int *)">; diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index b508709e4bbae..f8d451bd20fa3 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -2841,8 +2841,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); // f16c half2float intrinsics - case X86::BI__builtin_ia32_vcvtph2ps: - case X86::BI__builtin_ia32_vcvtph2ps256: case X86::BI__builtin_ia32_vcvtph2ps_mask: case X86::BI__builtin_ia32_vcvtph2ps256_mask: case X86::BI__builtin_ia32_vcvtph2ps512_mask: { diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 60d2000dfb809..9b5567396f60b 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -40,12 +40,17 @@ typedef signed char __v16qs __attribute__((__vector_size__(16))); #ifdef __SSE2__ /* Both _Float16 and __bf16 require SSE2 being enabled. */ +typedef _Float16 __v4hf __attribute__((__vector_size__(8))); typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16))); typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16))); typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1))); typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16))); typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16))); +#else +/* Use __fp16 when _Float16 is not supported. */ +typedef __fp16 __v4hf __attribute__((__vector_size__(8))); +typedef __fp16 __v8hf __attribute__((__vector_size__(16), __aligned__(16))); #endif /* Define the default attributes for the functions in this file. */ diff --git a/clang/lib/Headers/f16cintrin.h b/clang/lib/Headers/f16cintrin.h index 94a662c1d93a8..98b25f5a56953 100644 --- a/clang/lib/Headers/f16cintrin.h +++ b/clang/lib/Headers/f16cintrin.h @@ -39,7 +39,8 @@ static __inline float __DEFAULT_FN_ATTRS128 _cvtsh_ss(unsigned short __a) { __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0}; - __v4sf __r = __builtin_ia32_vcvtph2ps(__v); + __v4hi __w = __builtin_shufflevector(__v, __v, 0, 1, 2, 3); + __v4sf __r = __builtin_convertvector((__v4hf)__w, __v4sf); return __r[0]; } @@ -109,7 +110,8 @@ _cvtsh_ss(unsigned short __a) static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_cvtph_ps(__m128i __a) { - return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a); + __v4hi __v = __builtin_shufflevector((__v8hi)__a, (__v8hi)__a, 0, 1, 2, 3); + return __builtin_convertvector((__v4hf)__v, __v4sf); } /// Converts a 256-bit vector of [8 x float] into a 128-bit vector @@ -153,7 +155,7 @@ _mm_cvtph_ps(__m128i __a) static __inline __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtph_ps(__m128i __a) { - return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); + return __builtin_convertvector((__v8hf)__a, __v8sf); } #undef __DEFAULT_FN_ATTRS128 diff --git a/clang/test/CodeGen/X86/f16c-builtins-constrained.c b/clang/test/CodeGen/X86/f16c-builtins-constrained.c index bbd4d8f83b53a..5716f09a677f5 100644 --- a/clang/test/CodeGen/X86/f16c-builtins-constrained.c +++ b/clang/test/CodeGen/X86/f16c-builtins-constrained.c @@ -16,7 +16,7 @@ float test_cvtsh_ss(unsigned short a) { // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict") // CHECK: extractelement <4 x float> %{{.*}}, i32 0 return _cvtsh_ss(a); @@ -38,7 +38,7 @@ unsigned short test_cvtss_sh(float a) { __m128 test_mm_cvtph_ps(__m128i a) { // CHECK-LABEL: test_mm_cvtph_ps - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: call {{.*}}<4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict") return _mm_cvtph_ps(a); } diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c index 3c6d64c225b32..61ffa24e6f5c7 100644 --- a/clang/test/CodeGen/X86/f16c-builtins.c +++ b/clang/test/CodeGen/X86/f16c-builtins.c @@ -16,7 +16,7 @@ float test_cvtsh_ss(unsigned short a) { // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: fpext <4 x half> %{{.*}} to <4 x float> // CHECK: extractelement <4 x float> %{{.*}}, i32 0 return _cvtsh_ss(a); @@ -35,7 +35,7 @@ unsigned short test_cvtss_sh(float a) { __m128 test_mm_cvtph_ps(__m128i a) { // CHECK-LABEL: test_mm_cvtph_ps - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: fpext <4 x half> %{{.*}} to <4 x float> return _mm_cvtph_ps(a); } `````````` </details> https://github.com/llvm/llvm-project/pull/152911 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits