https://gcc.gnu.org/g:43736a27f86849bb0fd8887f71f022245a9c099c
commit r16-6647-g43736a27f86849bb0fd8887f71f022245a9c099c Author: Alfie Richards <[email protected]> Date: Thu Oct 23 11:42:17 2025 +0000 aarch64: Add support for FEAT_F8F32MM, FEAT_F8F16MM, and FEAT_SVE_F16F32MM. Adds support for the AArch64 2024 fmmla extensions. Note this includes a work around in the testsuite for spurious warnings from binutils with movprfx and fmmla instructions. (PR gas/33562). gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (aarch64_expand_pragma_builtin): Add case for FMMLA. * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Add new __ARM_FEATURE_X macros. * config/aarch64/aarch64-simd-pragma-builtins.def (vmmlaq_f16_mf8): New intrinsic. (vmmlaq_f32_mf8): Likewise. * config/aarch64/aarch64-simd.md (@aarch64_<insn><VDQ_HSF_FMMLA:mode>): New instruction. * config/aarch64/aarch64-sve-builtins-base.cc: Update mmla_impl for new instructions. * config/aarch64/aarch64-sve-builtins-shapes.cc (struct mmla_def): Add support for the new widening forms. * config/aarch64/aarch64-sve-builtins-sve2.def (svmmla) Add new intrinsics. * config/aarch64/aarch64-sve-builtins.cc (TYPES_cvt_narrow_s): Fix comment. * config/aarch64/aarch64-sve2.md (@aarch64_sve2_<sve_fp_op><SVE_FULL_HSF_FMMLA:mode><VNx16QI_ONLY:mode>): New instruction. (@aarch64_sve2_<sve_fp_op><VNx4SF_ONLY:mode><VNx8HF_ONLY:mode>): Likewise. * config/aarch64/aarch64.h (TARGET_F8F32MM): New macro. (TARGET_F8F16MM): Likewise. (TARGET_SVE_F16F32MM): Likewise. * config/aarch64/iterators.md (insn): Add fmmla entry. (VDQ_HSF_FMMLA): New iterator. (SVE_FULL_HSF_FMMLA): Likewise. gcc/testsuite/ChangeLog: * lib/target-supports.exp: * gcc.target/aarch64/acle/vmmlaq_f16_mf8.c: New test. * gcc.target/aarch64/acle/vmmlaq_f32_mf8.c: New test. * gcc.target/aarch64/sve2/acle/asm/fmmla_f8f16mm_sve2.c: New test. * gcc.target/aarch64/sve2/acle/asm/fmmla_f8f32mm_sve2.c: New test. * gcc.target/aarch64/sve2/acle/asm/fmmla_sve_f16f32mm.c: New test. * gcc.target/aarch64/sve/acle/general-c/mmla_1.c: Update error messages. Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 1 + gcc/config/aarch64/aarch64-c.cc | 5 +++ .../aarch64/aarch64-simd-pragma-builtins.def | 10 +++++ gcc/config/aarch64/aarch64-simd.md | 15 +++++++ gcc/config/aarch64/aarch64-sve-builtins-base.cc | 9 ++++- gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 44 +++++++++++++++----- gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 14 +++++++ gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- gcc/config/aarch64/aarch64-sve2.md | 47 ++++++++++++++++++++++ gcc/config/aarch64/aarch64.h | 7 ++++ gcc/config/aarch64/iterators.md | 11 ++++- .../gcc.target/aarch64/acle/vmmlaq_f16_mf8.c | 31 ++++++++++++++ .../gcc.target/aarch64/acle/vmmlaq_f32_mf8.c | 31 ++++++++++++++ .../gcc.target/aarch64/sve/acle/general-c/mmla_1.c | 16 ++++---- .../aarch64/sve2/acle/asm/fmmla_f8f16mm_sve2.c | 33 +++++++++++++++ .../aarch64/sve2/acle/asm/fmmla_f8f32mm_sve2.c | 33 +++++++++++++++ .../aarch64/sve2/acle/asm/fmmla_sve_f16f32mm.c | 31 ++++++++++++++ gcc/testsuite/lib/target-supports.exp | 2 +- 18 files changed, 319 insertions(+), 23 deletions(-) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index a14b44b16e0b..c2a9e3d34122 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -4178,6 +4178,7 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, case UNSPEC_FAMAX: case UNSPEC_FAMIN: + case UNSPEC_FMMLA: case UNSPEC_F1CVTL_FP8: case UNSPEC_F2CVTL_FP8: case UNSPEC_FDOT_FP8: diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index 41df1e838883..b52ea7649f9b 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -322,6 +322,11 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) cpp_undef (pfile, "__FLT_EVAL_METHOD_C99__"); builtin_define_with_int_value ("__FLT_EVAL_METHOD_C99__", c_flt_eval_method (false)); + + aarch64_def_or_undef (TARGET_F8F16MM, "__ARM_FEATURE_F8F16MM", pfile); + aarch64_def_or_undef (TARGET_F8F32MM, "__ARM_FEATURE_F8F32MM", pfile); + aarch64_def_or_undef (TARGET_SVE_F16F32MM, "__ARM_FEATURE_SVE_F16F32MM", + pfile); } /* Implement TARGET_CPU_CPP_BUILTINS. */ diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def index 0f5cfeb2700b..bd6492e2ce98 100644 --- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def @@ -296,6 +296,16 @@ ENTRY_LOAD_LANE (vld4q_lane_mf8, mf8qx4, mf8_scalar_const_ptr, mf8qx4, UNSPEC_LD4_LANE) #undef REQUIRED_EXTENSIONS +// mmla f16 mf8 +#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_F8F16MM) +ENTRY_TERNARY (vmmlaq_f16_mf8, f16q, f16q, mf8q, mf8q, UNSPEC_FMMLA, FP8) +#undef REQUIRED_EXTENSIONS + +// mmla f32 mf8 +#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_F8F32MM) +ENTRY_TERNARY (vmmlaq_f32_mf8, f32q, f32q, mf8q, mf8q, UNSPEC_FMMLA, FP8) +#undef REQUIRED_EXTENSIONS + // mov #define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) ENTRY_UNARY (vmov_n_mf8, mf8, mf8_scalar, UNSPEC_DUP, QUIET) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 20b3184a2d79..a007cfe9abbf 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -10650,3 +10650,18 @@ return "<insn>\t%0.<V4SF_ONLY:Vtype>, %2.16b, %3.b[%4]"; } ) + +(define_insn "@aarch64_<insn><mode>" + [(set (match_operand:VDQ_HSF_FMMLA 0 "register_operand") + (unspec:VDQ_HSF_FMMLA + [(match_operand:V16QI 2 "register_operand") + (match_operand:V16QI 3 "register_operand") + (match_operand:VDQ_HSF_FMMLA 1 "register_operand") + (reg:DI FPM_REGNUM)] + FMMLA))] + "" + {@ [ cons: =0 , 1 , 2 , 3 ] + [ w , 0 , w , w ] <insn>\t%0.<Vtype>, %2.16b, %3.16b + } +) + diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index f07727416b5f..e3d0f9b909a0 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -2289,7 +2289,14 @@ public: icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0)); } else - icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0)); + { + if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES) + icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0)); + else + icode = code_for_aarch64_sve2 (UNSPEC_FMMLA, + e.vector_mode (0), + e.vector_mode (1)); + } return e.use_exact_insn (icode); } }; diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index a40373c78dea..7d5376124e5b 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -1035,6 +1035,8 @@ template <unsigned int BITS> struct luti_zt_base : public nonoverloaded_base /* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t) (for integer t0) sv<t0>_t svmmla[_t0](sv<t0>_t, sv<t0>_t, sv<t0>_t) (for floating-point t0) + sv<t0>_t svmmla[_t0](sv<t0>_t, sv<t1>_t, sv<t1>_t) + (for floating-point t0, t1) The functions act like the equivalent of "ternary_qq" for integer elements and normal vector-only ternary functions for floating-point elements. */ @@ -1045,7 +1047,12 @@ struct mmla_def : public overloaded_base<0> { b.add_overloaded_functions (group, MODE_none); if (type_suffixes[group.types[0][0]].float_p) - build_all (b, "v0,v0,v0,v0", group, MODE_none); + { + if (group.types[0][1] == NUM_TYPE_SUFFIXES) + build_all (b, "v0,v0,v0,v0", group, MODE_none); + else + build_all (b, "v0,v0,v1,v1", group, MODE_none); + } else build_all (b, "v0,v0,vq0,vq0", group, MODE_none); } @@ -1054,24 +1061,39 @@ struct mmla_def : public overloaded_base<0> resolve (function_resolver &r) const override { unsigned int i, nargs; - type_suffix_index type; + type_suffix_index type1, type2; if (!r.check_gp_argument (3, i, nargs) - || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) + || (type1 = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES + || (type2 = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES) return error_mark_node; + bool float_p = type_suffixes[type1].float_p; /* Make sure that the function exists now, since not all forms follow a set pattern after this point. */ - tree res = r.resolve_to (r.mode_suffix_id, type); + tree res = (float_p && type1 != type2) + ? r.resolve_to (r.mode_suffix_id, type1, type2) + : r.resolve_to (r.mode_suffix_id, type1); if (res == error_mark_node) return res; - bool float_p = type_suffixes[type].float_p; - unsigned int modifier = float_p ? r.SAME_SIZE : r.QUARTER_SIZE; - if (!r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS, - modifier) - || !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS, - modifier)) - return error_mark_node; + if (float_p) + { + /* In the float case, require arg i+1 to have same type as i+2. */ + if (!r.require_derived_vector_type (i + 2, i + 1, type2, + r.SAME_TYPE_CLASS, r.SAME_SIZE)) + return error_mark_node; + } + else + { + /* In the int case, require arg i+1 and i+2 to have a quarter the size + of arg i. */ + if (!r.require_derived_vector_type (i + 1, i, type1, r.SAME_TYPE_CLASS, + r.QUARTER_SIZE) + || !r.require_derived_vector_type (i + 2, i, type1, + r.SAME_TYPE_CLASS, + r.QUARTER_SIZE)) + return error_mark_node; + } return res; } diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index e7142080c05a..9329c8853864 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -430,3 +430,17 @@ DEF_SVE_FUNCTION_GS_FPM (svcvtl1, unary_convert, cvt_mf8, x2, none, set) DEF_SVE_FUNCTION_GS_FPM (svcvtl2, unary_convert, cvt_mf8, x2, none, set) DEF_SVE_FUNCTION_GS (svscale, binary_int_opt_single_n, all_float, x24, none) #undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS \ + nonstreaming_sve (AARCH64_FL_SVE2 | AARCH64_FL_F8F16MM) +DEF_SVE_FUNCTION_GS_FPM (svmmla, mmla, h_float_mf8, none, none, set) +#undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS \ + nonstreaming_sve (AARCH64_FL_SVE2 | AARCH64_FL_F8F32MM) +DEF_SVE_FUNCTION_GS_FPM (svmmla, mmla, s_float_mf8, none, none, set) +#undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS nonstreaming_sve (AARCH64_FL_SVE_F16F32MM) +DEF_SVE_FUNCTION (svmmla, mmla, cvt_f32_f16, none) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 7d90501073ab..b3ebb754d767 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -478,7 +478,7 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_cvt_long(S, D, T) \ D (f32, f16), D (f64, f32) -/* _f16_f32. */ +/* _f32_f64. */ #define TYPES_cvt_narrow_s(S, D, T) \ D (f32, f64) diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 1aa885abedd5..2df93a4ba4cd 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -135,6 +135,9 @@ ;; ---- Optional AES extensions ;; ---- Optional SHA-3 extensions ;; ---- Optional SM4 extensions +;; +;; == FMMLA extensions +;; ---- [FP] Matrix multiply-accumulate widening ;; ========================================================================= ;; == Moves @@ -4656,3 +4659,47 @@ "sm4ekey\t%0.s, %1.s, %2.s" [(set_attr "type" "crypto_sm4")] ) + +;; ========================================================================= +;; == FMMLA extensions +;; ========================================================================= + +;; ------------------------------------------------------------------------- +;; ---- [FP] Matrix multiply-accumulate widening +;; ------------------------------------------------------------------------- +;; Includes: +;; - FMMLA (F8F16MM,F8F32MM,SVE_F16F32MM) +;; ------------------------------------------------------------------------- + + +(define_insn "@aarch64_sve2_<sve_fp_op><SVE_FULL_HSF_FMMLA:mode><VNx16QI_ONLY:mode>" + [(set (match_operand:SVE_FULL_HSF_FMMLA 0 "register_operand") + (unspec:SVE_FULL_HSF_FMMLA + [(match_operand:VNx16QI_ONLY 2 "register_operand") + (match_operand:VNx16QI_ONLY 3 "register_operand") + (match_operand:SVE_FULL_HSF_FMMLA 1 "register_operand") + (reg:DI FPM_REGNUM)] + FMMLA))] + "TARGET_SVE2 && TARGET_NON_STREAMING" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , 0 , w , w ; * ] fmmla\t%0.<SVE_FULL_HSF_FMMLA:Vetype>, %2.b, %3.b + [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;fmmla\t%0.<SVE_FULL_HSF_FMMLA:Vetype>, %2.b, %3.b + } + [(set_attr "sve_type" "sve_fp_mul")] +) + +(define_insn "@aarch64_sve2_<sve_fp_op><VNx4SF_ONLY:mode><VNx8HF_ONLY:mode>" + [(set (match_operand:VNx4SF_ONLY 0 "register_operand") + (unspec:VNx4SF_ONLY + [(match_operand:VNx8HF_ONLY 2 "register_operand") + (match_operand:VNx8HF_ONLY 3 "register_operand") + (match_operand:VNx4SF_ONLY 1 "register_operand")] + FMMLA))] + "TARGET_SVE2 && TARGET_SVE_F16F32MM && TARGET_NON_STREAMING" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , 0 , w , w ; * ] fmmla\t%0.s, %2.h, %3.h + [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;fmmla\t%0.s, %2.h, %3.h + } + [(set_attr "sve_type" "sve_fp_mul")] +) + diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 03802f07e1c5..1dd942f377fd 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -410,6 +410,13 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED /* PCDPHINT instructions are enabled through +pcdphint. */ #define TARGET_PCDPHINT AARCH64_HAVE_ISA (PCDPHINT) +/* F8F32MM instructions, enabled through +f8f32mm. */ +#define TARGET_F8F32MM (AARCH64_HAVE_ISA (F8F32MM)) +/* F8F16MM instructions, enabled through +f8f16mm. */ +#define TARGET_F8F16MM (AARCH64_HAVE_ISA (F8F16MM)) +/* SVE_F16F32MM instructions, enabled through +sve-f16f32mm. */ +#define TARGET_SVE_F16F32MM (AARCH64_HAVE_ISA (SVE_F16F32MM)) + /* Make sure this is always defined so we don't have to check for ifdefs but rather use normal ifs. */ #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 588c89c8a176..b425b0ed2ca3 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -209,6 +209,9 @@ (V2SF "TARGET_FP8DOT4") (V4SF "TARGET_FP8DOT4")]) +(define_mode_iterator VDQ_HSF_FMMLA [(V8HF "TARGET_F8F16MM") + (V4SF "TARGET_F8F32MM")]) + ;; Modes suitable to use as the return type of a vcond expression. (define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI]) @@ -556,6 +559,11 @@ (define_mode_iterator SVE_FULL_HSF_FP8_FDOT [(VNx4SF "TARGET_SSVE_FP8DOT4") (VNx8HF "TARGET_SSVE_FP8DOT2")]) +;; Like SVE_FULL_HSF, but selectively enables those modes that are valid +;; for the variant of the FMMLA instructions associated with that mode. +(define_mode_iterator SVE_FULL_HSF_FMMLA [(VNx4SF "TARGET_F8F32MM") + (VNx8HF "TARGET_F8F16MM")]) + ;; Partial SVE floating-point vector modes that have 16-bit or 32-bit ;; elements. (define_mode_iterator SVE_PARTIAL_HSF [VNx2HF VNx4HF VNx2SF]) @@ -4148,7 +4156,8 @@ (UNSPEC_FMLALLBT_FP8 "fmlallbt") (UNSPEC_FMLALLTB_FP8 "fmlalltb") (UNSPEC_FMLALLTT_FP8 "fmlalltt") - (UNSPEC_FSCALE "fscale")]) + (UNSPEC_FSCALE "fscale") + (UNSPEC_FMMLA "fmmla")]) ;; The optab associated with an operation. Note that for ANDF, IORF ;; and XORF, the optab pattern is not actually defined; we just use this diff --git a/gcc/testsuite/gcc.target/aarch64/acle/vmmlaq_f16_mf8.c b/gcc/testsuite/gcc.target/aarch64/acle/vmmlaq_f16_mf8.c new file mode 100644 index 000000000000..6b73e8b90b98 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/acle/vmmlaq_f16_mf8.c @@ -0,0 +1,31 @@ +/* { dg-do assemble { target aarch64_asm_f8f16mm_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_f8f16mm_ok } } } */ +/* { dg-additional-options "-O2 -march=armv8-a+f8f16mm -save-temps -moverride=tune=cheap_fpmr_write" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <arm_neon.h> + +/* +** fmmla_f16f8mm_tied: +** msr fpmr, x0 +** fmmla v0.8h, v1.16b, v2.16b +** ret +*/ +float16x8_t +fmmla_f16f8mm_tied (float16x8_t v0, mfloat8x16_t v1, mfloat8x16_t v2, fpm_t fpm0) +{ + return vmmlaq_f16_mf8 (v0, v1, v2, fpm0); +} + +/* +** fmmla_f16f8mm: +** msr fpmr, x0 +** fmmla v1.8h, v2.16b, v3.16b +** mov v0.16b, v1.16b +** ret +*/ +float16x8_t +fmmla_f16f8mm (float16x8_t v0, float16x8_t v1, mfloat8x16_t v2, mfloat8x16_t v3, fpm_t fpm0) +{ + return vmmlaq_f16_mf8 (v1, v2, v3, fpm0); +} diff --git a/gcc/testsuite/gcc.target/aarch64/acle/vmmlaq_f32_mf8.c b/gcc/testsuite/gcc.target/aarch64/acle/vmmlaq_f32_mf8.c new file mode 100644 index 000000000000..de7d7f9ed9c2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/acle/vmmlaq_f32_mf8.c @@ -0,0 +1,31 @@ +/* { dg-do assemble { target aarch64_asm_f8f32mm_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_f8f32mm_ok } } } */ +/* { dg-additional-options "-O2 -march=armv8-a+f8f32mm -save-temps -moverride=tune=cheap_fpmr_write" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <arm_neon.h> + +/* +** fmmla_f32f8mm_tied: +** msr fpmr, x0 +** fmmla v0.4s, v1.16b, v2.16b +** ret +*/ +float32x4_t +fmmla_f32f8mm_tied (float32x4_t v0, mfloat8x16_t v1, mfloat8x16_t v2, fpm_t fpm0) +{ + return vmmlaq_f32_mf8 (v0, v1, v2, fpm0); +} + +/* +** fmmla_f32f8mm: +** msr fpmr, x0 +** fmmla v1.4s, v2.16b, v3.16b +** mov v0.16b, v1.16b +** ret +*/ +float32x4_t +fmmla_f32f8mm (float32x4_t v0, float32x4_t v1, mfloat8x16_t v2, mfloat8x16_t v3, fpm_t fpm0) +{ + return vmmlaq_f32_mf8 (v1, v2, v3, fpm0); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c index ca2ab8a6f3f0..a23c45c86b14 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c @@ -43,14 +43,14 @@ f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, svint32_t s32, svmmla (u32, s32, s32); /* { dg-error {passing 'svint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */ svmmla (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */ - svmmla (f16, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */ - svmmla (f32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */ - svmmla (f32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */ - svmmla (f32, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */ - svmmla (f64, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */ - svmmla (f32, f32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */ - svmmla (f64, f32, f16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */ - svmmla (f64, f64, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */ + svmmla (f16, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' and 'svint8_t' arguments} } */ + svmmla (f32, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svfloat32_t' and 'svint8_t' arguments} } */ + svmmla (f32, s32, s32); /* { dg-error {'svmmla' has no form that takes 'svfloat32_t' and 'svint32_t' arguments} } */ + svmmla (f32, f16, f16); /* { dg-error {ACLE function 'svmmla_f32_f16' requires ISA extension 'sve-f16f32mm'} } */ + svmmla (f64, f16, f16); /* { dg-error {'svmmla' has no form that takes 'svfloat64_t' and 'svfloat16_t' arguments} } */ + svmmla (f32, f32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but argument 2 had type 'svfloat32_t'} } */ + svmmla (f64, f32, f16); /* { dg-error {'svmmla' has no form that takes 'svfloat64_t' and 'svfloat32_t' arguments} } */ + svmmla (f64, f64, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but argument 2 had type 'svfloat64_t'} } */ svmmla (f16, f16, f16); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */ svmmla (f32, f32, f32); diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/fmmla_f8f16mm_sve2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/fmmla_f8f16mm_sve2.c new file mode 100644 index 000000000000..9a333ff4b1db --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/fmmla_f8f16mm_sve2.c @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_f8f16mm_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_f8f16mm_ok } } } */ +/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +/* Binutils PR gas/33562 */ +/* { dg-prune-output "SVE `movprfx' compatible instruction expected" } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+f8f16mm" + +/* +** svmmla_f16f8mm_tied: +** msr fpmr, x0 +** fmmla z0\.h, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (svmmla_f16f8mm_tied, svfloat16_t, svmfloat8_t, + z0 = svmmla_f16_mf8_fpm (z0, z4, z5, fpm0), + z0 = svmmla_fpm (z0, z4, z5, fpm0)) + +/* +** svmmla_f16f8mm: +** msr fpmr, x0 +** movprfx z0, z1 +** fmmla z0\.h, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (svmmla_f16f8mm, svfloat16_t, svmfloat8_t, + z0 = svmmla_f16_mf8_fpm (z1, z4, z5, fpm0), + z0 = svmmla_fpm (z1, z4, z5, fpm0)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/fmmla_f8f32mm_sve2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/fmmla_f8f32mm_sve2.c new file mode 100644 index 000000000000..edd8cef3a126 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/fmmla_f8f32mm_sve2.c @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_f8f32mm_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_f8f32mm_ok } } } */ +/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +/* Binutils PR gas/33562 */ +/* { dg-prune-output "SVE `movprfx' compatible instruction expected" } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+f8f32mm" + +/* +** svmmla_f32f8mm_tied: +** msr fpmr, x0 +** fmmla z0\.s, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (svmmla_f32f8mm_tied, svfloat32_t, svmfloat8_t, + z0 = svmmla_f32_mf8_fpm (z0, z4, z5, fpm0), + z0 = svmmla_fpm (z0, z4, z5, fpm0)) + +/* +** svmmla_f32f8mm: +** msr fpmr, x0 +** movprfx z0, z1 +** fmmla z0\.s, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (svmmla_f32f8mm, svfloat32_t, svmfloat8_t, + z0 = svmmla_f32_mf8_fpm (z1, z4, z5, fpm0), + z0 = svmmla_fpm (z1, z4, z5, fpm0)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/fmmla_sve_f16f32mm.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/fmmla_sve_f16f32mm.c new file mode 100644 index 000000000000..2c4c5d2ccce0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/fmmla_sve_f16f32mm.c @@ -0,0 +1,31 @@ +/* { dg-do assemble { target aarch64_asm_sve-f16f32mm_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve-f16f32mm_ok } } } */ +/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +/* Binutils PR gas/33562 */ +/* { dg-prune-output "SVE `movprfx' compatible instruction expected" } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve-f16f32mm" + +/* +** svmmla_f32f16mm_tied: +** fmmla z0\.s, z4\.h, z5\.h +** ret +*/ +TEST_DUAL_Z (svmmla_f32f16mm_tied, svfloat32_t, svfloat16_t, + z0 = svmmla_f32_f16 (z0, z4, z5), + z0 = svmmla (z0, z4, z5)) + +/* +** svmmla_f32f16mm: +** movprfx z0, z1 +** fmmla z0\.s, z4\.h, z5\.h +** ret +*/ +TEST_DUAL_Z (svmmla_f32f16mm, svfloat32_t, svfloat16_t, + z0 = svmmla_f32_f16 (z1, z4, z5), + z0 = svmmla (z1, z4, z5)) + diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index dbcba42629fa..46c9b624254f 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -12664,7 +12664,7 @@ set exts { "bf16" "cmpbr" "crc" "crypto" "dotprod" "f32mm" "f64mm" "faminmax" "fp" "fp8" "fp8dot2" "fp8dot4" "fp8fma" "i8mm" "ls64" "lse" "lut" "sb" "simd" "sve-b16b16" "sve" "sve2" "sve-sm4" "sve-aes" "sve-bitperm" - "sve-sha3" + "sve-sha3" "f8f16mm" "f8f32mm" "sve-f16f32mm" } # We don't support SME without SVE2, so we'll use armv9 as the base
