Author: Nikolas Klauser Date: 2025-11-27T11:36:43Z New Revision: 0dbedd195c94e89b43660e67aa56dd139a81fa40
URL: https://github.com/llvm/llvm-project/commit/0dbedd195c94e89b43660e67aa56dd139a81fa40 DIFF: https://github.com/llvm/llvm-project/commit/0dbedd195c94e89b43660e67aa56dd139a81fa40.diff LOG: [Clang] Replace some x86 sqrt builtins with the generic __builtin_elementwise_sqrt versions (#165682) Added: Modified: clang/include/clang/Basic/BuiltinsX86.td clang/lib/CodeGen/TargetBuiltins/X86.cpp clang/lib/Headers/avx10_2_512bf16intrin.h clang/lib/Headers/avx10_2bf16intrin.h clang/lib/Headers/avx512vlfp16intrin.h clang/lib/Headers/avxintrin.h clang/lib/Headers/emmintrin.h clang/lib/Headers/xmmintrin.h clang/test/CodeGen/X86/sse-builtins-constrained.c clang/test/CodeGen/X86/sse-builtins.c clang/test/CodeGen/X86/sse2-builtins-constrained.c clang/test/CodeGen/X86/sse2-builtins.c clang/test/CodeGen/builtins-x86.c Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 86dc0d3228df2..32773107a7038 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -156,8 +156,6 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def rcpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; def rsqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; def rsqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; - def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; - def sqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; } let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in { @@ -170,8 +168,6 @@ let Features = "sse2", Attributes = [NoThrow] in { let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">; - def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; - def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">; def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">; def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">; @@ -513,8 +509,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid } let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">; - def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; def rcpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; def roundpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; @@ -3539,14 +3533,6 @@ let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<1 def reducesh_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int, _Constant int)">; } -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def sqrtph : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>)">; -} - -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def sqrtph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>)">; -} - let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def sqrtph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Constant int)">; } @@ -5065,15 +5051,3 @@ let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256> let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def vgetmantbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">; } - -let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vsqrtbf16 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>)">; -} - -let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vsqrtbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>)">; -} - -let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">; -} diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index 00c8a1cf16e31..94bea7464869e 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -2171,21 +2171,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateBitCast(Res, Ops[0]->getType()); } - case X86::BI__builtin_ia32_sqrtss: - case X86::BI__builtin_ia32_sqrtsd: { - Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); - Function *F; - if (Builder.getIsFPConstrained()) { - CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); - F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, - A->getType()); - A = Builder.CreateConstrainedFPCall(F, {A}); - } else { - F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); - A = Builder.CreateCall(F, {A}); - } - return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); - } case X86::BI__builtin_ia32_sqrtsh_round_mask: case X86::BI__builtin_ia32_sqrtsd_round_mask: case X86::BI__builtin_ia32_sqrtss_round_mask: { @@ -2225,40 +2210,29 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, A = EmitX86ScalarSelect(*this, Ops[3], A, Src); return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); } - case X86::BI__builtin_ia32_sqrtpd256: - case X86::BI__builtin_ia32_sqrtpd: - case X86::BI__builtin_ia32_sqrtps256: - case X86::BI__builtin_ia32_sqrtps: - case X86::BI__builtin_ia32_sqrtph256: - case X86::BI__builtin_ia32_sqrtph: case X86::BI__builtin_ia32_sqrtph512: - case X86::BI__builtin_ia32_vsqrtbf16256: - case X86::BI__builtin_ia32_vsqrtbf16: - case X86::BI__builtin_ia32_vsqrtbf16512: case X86::BI__builtin_ia32_sqrtps512: case X86::BI__builtin_ia32_sqrtpd512: { - if (Ops.size() == 2) { - unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); - // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), - // otherwise keep the intrinsic. - if (CC != 4) { - Intrinsic::ID IID; - - switch (BuiltinID) { - default: - llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_sqrtph512: - IID = Intrinsic::x86_avx512fp16_sqrt_ph_512; - break; - case X86::BI__builtin_ia32_sqrtps512: - IID = Intrinsic::x86_avx512_sqrt_ps_512; - break; - case X86::BI__builtin_ia32_sqrtpd512: - IID = Intrinsic::x86_avx512_sqrt_pd_512; - break; - } - return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); + unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); + // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), + // otherwise keep the intrinsic. + if (CC != 4) { + Intrinsic::ID IID; + + switch (BuiltinID) { + default: + llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_sqrtph512: + IID = Intrinsic::x86_avx512fp16_sqrt_ph_512; + break; + case X86::BI__builtin_ia32_sqrtps512: + IID = Intrinsic::x86_avx512_sqrt_ps_512; + break; + case X86::BI__builtin_ia32_sqrtpd512: + IID = Intrinsic::x86_avx512_sqrt_pd_512; + break; } + return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } if (Builder.getIsFPConstrained()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h index 3201307af4731..3e9f27443ecce 100644 --- a/clang/lib/Headers/avx10_2_512bf16intrin.h +++ b/clang/lib/Headers/avx10_2_512bf16intrin.h @@ -429,7 +429,7 @@ _mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) { (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U))) static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) { - return (__m512bh)__builtin_ia32_vsqrtbf16512((__v32bf)__A); + return __builtin_elementwise_sqrt(__A); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h index 3df6930f94be3..179ec534025c2 100644 --- a/clang/lib/Headers/avx10_2bf16intrin.h +++ b/clang/lib/Headers/avx10_2bf16intrin.h @@ -826,7 +826,7 @@ _mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) { (__v8bf)_mm_setzero_pbh(), (__mmask8)(__U))) static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_sqrt_pbh(__m256bh __A) { - return (__m256bh)__builtin_ia32_vsqrtbf16256((__v16bf)__A); + return __builtin_elementwise_sqrt(__A); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 @@ -843,7 +843,7 @@ _mm256_maskz_sqrt_pbh(__mmask16 __U, __m256bh __A) { } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_sqrt_pbh(__m128bh __A) { - return (__m128bh)__builtin_ia32_vsqrtbf16((__v8bf)__A); + return __builtin_elementwise_sqrt(__A); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h index 885231b030b23..7a762e105e9af 100644 --- a/clang/lib/Headers/avx512vlfp16intrin.h +++ b/clang/lib/Headers/avx512vlfp16intrin.h @@ -623,7 +623,7 @@ _mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) { (__mmask16)(U))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a) { - return __builtin_ia32_sqrtph((__v8hf)__a); + return __builtin_elementwise_sqrt(__a); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W, @@ -640,7 +640,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U, } static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a) { - return (__m256h)__builtin_ia32_sqrtph256((__v16hf)__a); + return __builtin_elementwise_sqrt(__a); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 44ef88db5cbce..54a6e0cd73ab9 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -333,10 +333,8 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_ps(__m256 __a, /// A 256-bit vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the square roots of the /// values in the operand. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_sqrt_pd(__m256d __a) -{ - return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a); +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a) { + return __builtin_elementwise_sqrt(__a); } /// Calculates the square roots of the values in a 256-bit vector of @@ -350,10 +348,8 @@ _mm256_sqrt_pd(__m256d __a) /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the square roots of the /// values in the operand. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_sqrt_ps(__m256 __a) -{ - return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a); +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a) { + return __builtin_elementwise_sqrt(__a); } /// Calculates the reciprocal square roots of the values in a 256-bit diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index dbe5ca0379cf5..7eb15e06db698 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -241,8 +241,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_pd(__m128d __a, /// bits are copied from the upper 64 bits of operand \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b); - return __extension__(__m128d){__c[0], __a[1]}; + return __extension__(__m128d){__builtin_elementwise_sqrt(__b[0]), __a[1]}; } /// Calculates the square root of the each of two values stored in a @@ -257,7 +256,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, /// \returns A 128-bit vector of [2 x double] containing the square roots of the /// values in the operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) { - return __builtin_ia32_sqrtpd((__v2df)__a); + return __builtin_elementwise_sqrt(__a); } /// Compares lower 64-bit double-precision values of both operands, and diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index fe6afdcfc3fdb..72a643948bed6 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -231,10 +231,9 @@ _mm_div_ps(__m128 __a, __m128 __b) { /// used in the calculation. /// \returns A 128-bit vector of [4 x float] containing the square root of the /// value in the low-order bits of the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_sqrt_ss(__m128 __a) -{ - return (__m128)__builtin_ia32_sqrtss((__v4sf)__a); +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ss(__m128 __a) { + __a[0] = __builtin_elementwise_sqrt(__a[0]); + return __a; } /// Calculates the square roots of the values stored in a 128-bit vector @@ -248,10 +247,8 @@ _mm_sqrt_ss(__m128 __a) /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the square roots of the /// values in the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_sqrt_ps(__m128 __a) -{ - return __builtin_ia32_sqrtps((__v4sf)__a); +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a) { + return __builtin_elementwise_sqrt(__a); } /// Calculates the approximate reciprocal of the value stored in the diff --git a/clang/test/CodeGen/X86/sse-builtins-constrained.c b/clang/test/CodeGen/X86/sse-builtins-constrained.c index 92240bbc5bb31..f3b8d20944bd4 100644 --- a/clang/test/CodeGen/X86/sse-builtins-constrained.c +++ b/clang/test/CodeGen/X86/sse-builtins-constrained.c @@ -28,11 +28,10 @@ __m128 test_mm_sqrt_ps(__m128 x) { __m128 test_sqrt_ss(__m128 x) { // COMMON-LABEL: test_sqrt_ss - // COMMONIR: extractelement <4 x float> {{.*}}, i64 0 + // COMMONIR: extractelement <4 x float> {{.*}}, i32 0 // UNCONSTRAINED: call float @llvm.sqrt.f32(float {{.*}}) // CONSTRAINED: call float @llvm.experimental.constrained.sqrt.f32(float {{.*}}, metadata !{{.*}}) // CHECK-ASM: sqrtss - // COMMONIR: insertelement <4 x float> {{.*}}, float {{.*}}, i64 0 + // COMMONIR: insertelement <4 x float> {{.*}}, float {{.*}}, i32 0 return _mm_sqrt_ss(x); } - diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index 6c5297e45dc82..fd4775739fad8 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -751,9 +751,9 @@ __m128 test_mm_sqrt_ps(__m128 x) { __m128 test_mm_sqrt_ss(__m128 x) { // CHECK-LABEL: test_mm_sqrt_ss - // CHECK: extractelement <4 x float> {{.*}}, i64 0 + // CHECK: extractelement <4 x float> {{.*}}, i32 0 // CHECK: call float @llvm.sqrt.f32(float {{.*}}) - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i64 0 + // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 0 return _mm_sqrt_ss(x); } diff --git a/clang/test/CodeGen/X86/sse2-builtins-constrained.c b/clang/test/CodeGen/X86/sse2-builtins-constrained.c index 587fd3aa7c92f..a4a0829720501 100644 --- a/clang/test/CodeGen/X86/sse2-builtins-constrained.c +++ b/clang/test/CodeGen/X86/sse2-builtins-constrained.c @@ -28,11 +28,10 @@ __m128d test_mm_sqrt_pd(__m128d x) { __m128d test_sqrt_sd(__m128d x, __m128d y) { // COMMON-LABEL: test_sqrt_sd - // COMMONIR: extractelement <2 x double> {{.*}}, i64 0 + // COMMONIR: extractelement <2 x double> {{.*}}, i32 0 // UNCONSTRAINED: call double @llvm.sqrt.f64(double {{.*}}) // CONSTRAINED: call double @llvm.experimental.constrained.sqrt.f64(double {{.*}}, metadata !{{.*}}) // CHECK-ASM: sqrtsd - // COMMONIR: insertelement <2 x double> {{.*}}, double {{.*}}, i64 0 + // COMMONIR: insertelement <2 x double> {{.*}}, double {{.*}}, i32 0 return _mm_sqrt_sd(x, y); } - diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 379ae48995d26..efe8930fae336 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1440,9 +1440,10 @@ __m128d test_mm_sqrt_pd(__m128d A) { __m128d test_mm_sqrt_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_sqrt_sd - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: call double @llvm.sqrt.f64(double {{.*}}) - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 + // CHECK: %[[sqrt_vec:.*]] = insertelement <2 x double> poison, double %{{.*}}, i32 0 + // CHECK: insertelement <2 x double> %[[sqrt_vec]], double %{{.*}}, i32 1 return _mm_sqrt_sd(A, B); } diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index 31f309791c9f7..a1e63d59e88e1 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -282,8 +282,6 @@ void f0(void) { tmp_V4f = __builtin_ia32_rcpss(tmp_V4f); tmp_V4f = __builtin_ia32_rsqrtps(tmp_V4f); tmp_V4f = __builtin_ia32_rsqrtss(tmp_V4f); - tmp_V4f = __builtin_ia32_sqrtps(tmp_V4f); - tmp_V4f = __builtin_ia32_sqrtss(tmp_V4f); (void) __builtin_ia32_maskmovdqu(tmp_V16c, tmp_V16c, tmp_cp); tmp_i = __builtin_ia32_movmskpd(tmp_V2d); tmp_i = __builtin_ia32_pmovmskb128(tmp_V16c); @@ -292,8 +290,6 @@ void f0(void) { (void) __builtin_ia32_movnti64(tmp_LLip, tmp_LLi); #endif tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c); - tmp_V2d = __builtin_ia32_sqrtpd(tmp_V2d); - tmp_V2d = __builtin_ia32_sqrtsd(tmp_V2d); tmp_V2LLi = __builtin_ia32_cvtpd2dq(tmp_V2d); tmp_V4f = __builtin_ia32_cvtpd2ps(tmp_V2d); tmp_V4i = __builtin_ia32_cvttpd2dq(tmp_V2d); @@ -400,8 +396,6 @@ void f0(void) { tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7); tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7); tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7); - tmp_V4d = __builtin_ia32_sqrtpd256(tmp_V4d); - tmp_V8f = __builtin_ia32_sqrtps256(tmp_V8f); tmp_V8f = __builtin_ia32_rsqrtps256(tmp_V8f); tmp_V8f = __builtin_ia32_rcpps256(tmp_V8f); tmp_V4d = __builtin_ia32_roundpd256(tmp_V4d, 0x1); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
