llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: None (moorabbit) <details> <summary>Changes</summary> The following intrinsics were replaced by `__builtin_elementwise_fma`: - `__builtin_ia32_vfmaddps(256)` - `__builtin_ia32_vfmaddpd(256)` - `__builtin_ia32_vfmaddph(256)` - `__builtin_ia32_vfmaddbf16(128 | 256 | 512)` All `__builtin_ia32_vfmadd*` intrinsics are lowered to `__builtin_elementwise_fma`, so keeping them is an unnecessary indirection. Fixes [#<!-- -->152461](https://github.com/llvm/llvm-project/issues/152461) --- Patch is 59.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152545.diff 9 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.td (-24) - (modified) clang/lib/CodeGen/TargetBuiltins/X86.cpp (-9) - (modified) clang/lib/Headers/avx10_2_512bf16intrin.h (+4-4) - (modified) clang/lib/Headers/avx10_2bf16intrin.h (+8-8) - (modified) clang/lib/Headers/avx512vlfp16intrin.h (+30-30) - (modified) clang/lib/Headers/avx512vlintrin.h (+48-48) - (modified) clang/lib/Headers/fma4intrin.h (+16-16) - (modified) clang/lib/Headers/fmaintrin.h (+16-16) - (modified) clang/test/CodeGen/target-builtin-noerror.c (+3-3) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index a4acc72fdc37d..609b720357e38 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -878,11 +878,6 @@ let Features = "sha", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def sha256msg2 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; } -let Features = "fma|fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vfmaddps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">; - def vfmaddpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">; -} - let Features = "fma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def vfmaddss3 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">; def vfmaddsd3 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">; @@ -899,8 +894,6 @@ let Features = "fma|fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<128 } let Features = "fma|fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vfmaddps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">; - def vfmaddpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">; def vfmaddsubps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">; def vfmaddsubpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">; } @@ -4140,14 +4133,6 @@ let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVecto def vcvtps2phx512_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, float>, _Vector<16, _Float16>, unsigned short, _Constant int)">; } -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vfmaddph : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>)">; -} - -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vfmaddph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>)">; -} - let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def vfmaddph512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">; def vfmaddph512_mask3 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">; @@ -5373,13 +5358,4 @@ let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth< let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">; - def vfmaddbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Vector<32, __bf16>)">; -} - -let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vfmaddbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, _Vector<16, __bf16>)">; -} - -let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vfmaddbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Vector<8, __bf16>)">; } diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index e23d19d2f6b6b..b508709e4bbae 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -1051,18 +1051,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vfmsubsd3_mask3: return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2, /*NegAcc*/ true); - case X86::BI__builtin_ia32_vfmaddph: - case X86::BI__builtin_ia32_vfmaddps: - case X86::BI__builtin_ia32_vfmaddpd: - case X86::BI__builtin_ia32_vfmaddph256: - case X86::BI__builtin_ia32_vfmaddps256: - case X86::BI__builtin_ia32_vfmaddpd256: case X86::BI__builtin_ia32_vfmaddph512_mask: case X86::BI__builtin_ia32_vfmaddph512_maskz: case X86::BI__builtin_ia32_vfmaddph512_mask3: - case X86::BI__builtin_ia32_vfmaddbf16128: - case X86::BI__builtin_ia32_vfmaddbf16256: - case X86::BI__builtin_ia32_vfmaddbf16512: case X86::BI__builtin_ia32_vfmaddps512_mask: case X86::BI__builtin_ia32_vfmaddps512_maskz: case X86::BI__builtin_ia32_vfmaddps512_mask3: diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h index 75290d22ef259..a1613420eee2b 100644 --- a/clang/lib/Headers/avx10_2_512bf16intrin.h +++ b/clang/lib/Headers/avx10_2_512bf16intrin.h @@ -441,7 +441,7 @@ _mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) { static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B, + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B, (__v32bf)__C); } @@ -469,7 +469,7 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B, + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B, -(__v32bf)__C); } @@ -497,7 +497,7 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B, + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B, (__v32bf)__C); } @@ -527,7 +527,7 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B, + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B, -(__v32bf)__C); } diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h index 66797ae00fe4f..8cf1f78f189aa 100644 --- a/clang/lib/Headers/avx10_2bf16intrin.h +++ b/clang/lib/Headers/avx10_2bf16intrin.h @@ -852,7 +852,7 @@ _mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) { static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B, + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, (__v16bf)__B, (__v16bf)__C); } @@ -880,7 +880,7 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B, + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, (__v16bf)__B, -(__v16bf)__C); } @@ -908,7 +908,7 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fnmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B, + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, -(__v16bf)__B, (__v16bf)__C); } @@ -938,7 +938,7 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fnmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B, + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, -(__v16bf)__B, -(__v16bf)__C); } @@ -969,7 +969,7 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pbh( static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B, + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, (__v8bf)__B, (__v8bf)__C); } @@ -997,7 +997,7 @@ _mm_maskz_fmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B, + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, (__v8bf)__B, -(__v8bf)__C); } @@ -1025,7 +1025,7 @@ _mm_maskz_fmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B, + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, -(__v8bf)__B, (__v8bf)__C); } @@ -1053,7 +1053,7 @@ _mm_maskz_fnmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B, + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, -(__v8bf)__B, -(__v8bf)__C); } diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h index a12acb7d9a24a..ba27f0459ff25 100644 --- a/clang/lib/Headers/avx512vlfp16intrin.h +++ b/clang/lib/Headers/avx512vlfp16intrin.h @@ -1419,7 +1419,7 @@ _mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) { static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C); } @@ -1429,7 +1429,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } @@ -1437,7 +1437,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } @@ -1445,14 +1445,14 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C); } @@ -1476,7 +1476,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } @@ -1484,7 +1484,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } @@ -1492,14 +1492,14 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C); } @@ -1507,7 +1507,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__A); } @@ -1515,7 +1515,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } @@ -1523,14 +1523,14 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C); } @@ -1538,7 +1538,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__A); } @@ -1546,7 +1546,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } @@ -1554,7 +1554,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } @@ -1562,7 +1562,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } @@ -1570,7 +1570,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } @@ -1684,7 +1684,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)__C); } @@ -1692,7 +1692,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__C); } @@ -1715,7 +1715,7 @@ _mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C); } @@ -1723,14 +1723,14 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A, ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/152545 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits