https://gcc.gnu.org/g:68fbdf216f97ffca8aad4020784a7e91f92af57a

commit r16-6384-g68fbdf216f97ffca8aad4020784a7e91f92af57a
Author: Claudio Bantaloukas <[email protected]>
Date:   Wed Dec 24 11:41:26 2025 +0000

    aarch64: add 8-bit floating-point sum of outer products and accumulate
    
    This patch adds support for FMOPA (widening, 2-way, FP8 to FP16) when
    sme-f8f16 is enabled using svmopa_za16[_mf8]_m_fpm and for FMOPA (widening,
    4-way) when sme-f8f32 is enabled using svmopa_za32[_mf8]_m_fpm.
    
    Asm tests for the new intrinsics are added, similar to those for existing
    mopa_z16 intrinsics. Tests for the binary_za_m shape are added.
    
    gcc:
            * config/aarch64/aarch64-sme.md
            (@aarch64_sme_<optab><SME_ZA_F8F16_32:mode><VNx16QI_ONLY:mode>): Add
            new define_insn.
            * config/aarch64/aarch64-sve-builtins-shapes.cc
            (struct binary_za_m_base): Support fpm argument.
            * config/aarch64/aarch64-sve-builtins-sme.cc (svmopa_za): Extend for
            fp8.
            * config/aarch64/aarch64-sve-builtins-sme.def (svmopa): Add new
            DEF_SME_ZA_FUNCTION_GS_FPM entries.
    
    gcc/testsuite:
    
            * gcc.target/aarch64/sme/acle-asm/test_sme_acle.h: 
(TEST_UNIFORM_ZA):
            Add fpm0 parameter.
            * gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c: Add tests 
for
            variants accepting fpm.
            * gcc.target/aarch64/sme2/acle-asm/mopa_za16_mf8.c: New test.
            * gcc.target/aarch64/sme2/acle-asm/mopa_za32_mf8.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sme.md                  | 18 +++++++++++
 gcc/config/aarch64/aarch64-sve-builtins-shapes.cc  |  2 +-
 gcc/config/aarch64/aarch64-sve-builtins-sme.cc     |  2 +-
 gcc/config/aarch64/aarch64-sve-builtins-sme.def    |  2 ++
 .../aarch64/sme/acle-asm/test_sme_acle.h           |  2 +-
 .../aarch64/sme2/acle-asm/mopa_za16_mf8.c          | 36 ++++++++++++++++++++++
 .../aarch64/sme2/acle-asm/mopa_za32_mf8.c          | 36 ++++++++++++++++++++++
 .../aarch64/sve/acle/general-c/binary_za_m_1.c     | 14 +++++++++
 8 files changed, 109 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sme.md 
b/gcc/config/aarch64/aarch64-sme.md
index e93f83b39834..7201e0f0ec37 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -2370,6 +2370,8 @@
 ;; - BFMOPS (SME_B16B16)
 ;; - FMOPA
 ;; - FMOPS
+;; - FMOPA (SME_F8F16)
+;; - FMOPA (SME_F8F32)
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sme_<optab><mode><mode>"
@@ -2402,6 +2404,22 @@
   "<b><optab>\tza%0.<VNx4SI_ONLY:Vetype>, %1/m, %2/m, %3.<SVE_FULL_HF:Vetype>, 
%4.<SVE_FULL_HF:Vetype>"
 )
 
+(define_insn "@aarch64_sme_<optab><SME_ZA_F8F16_32:mode><VNx16QI_ONLY:mode>"
+  [(set (reg:SME_ZA_F8F16_32 ZA_REGNUM)
+       (unspec:SME_ZA_F8F16_32
+         [(reg:SME_ZA_F8F16_32 ZA_REGNUM)
+          (reg:DI SME_STATE_REGNUM)
+          (match_operand:DI 0 "const_int_operand")
+          (match_operand:<SME_ZA_F8F16_32:VPRED> 1 "register_operand" "Upl")
+          (match_operand:<SME_ZA_F8F16_32:VPRED> 2 "register_operand" "Upl")
+          (match_operand:VNx16QI_ONLY 3 "register_operand" "w")
+          (match_operand:VNx16QI_ONLY 4 "register_operand" "w")
+          (reg:DI FPM_REGNUM)]
+         SME_FP_MOP))]
+  "TARGET_STREAMING"
+  "<optab>\tza%0.<SME_ZA_F8F16_32:Vetype>, %1/m, %2/m, %3.b, %4.b"
+)
+
 ;; =========================================================================
 ;; == Table lookup
 ;; =========================================================================
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index 59f313d08f29..ea4be3733c25 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -692,7 +692,7 @@ struct binary_za_m_base : public overloaded_base<1>
   resolve (function_resolver &r) const override
   {
     type_suffix_index type;
-    if (!r.check_num_arguments (5)
+    if (!r.check_num_arguments (r.fpm_mode == FPM_set ? 6: 5)
        || !r.require_integer_immediate (0)
        || !r.require_vector_type (1, VECTOR_TYPE_svbool_t)
        || !r.require_vector_type (2, VECTOR_TYPE_svbool_t)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
index 43ef05c673ac..20a6ebc40590 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
@@ -651,7 +651,7 @@ FUNCTION (svmls_lane_za, sme_2mode_lane_function, 
(UNSPEC_SME_SMLS,
                                                   UNSPEC_SME_UMLS,
                                                   UNSPEC_SME_FMLS))
 FUNCTION (svmopa_za, sme_2mode_function, (UNSPEC_SME_SMOPA, UNSPEC_SME_UMOPA,
-                                         UNSPEC_SME_FMOPA))
+                                         UNSPEC_SME_FMOPA, UNSPEC_SME_FMOPA))
 FUNCTION (svmops_za, sme_2mode_function, (UNSPEC_SME_SMOPS, UNSPEC_SME_UMOPS,
                                          UNSPEC_SME_FMOPS))
 FUNCTION (svread_za, svread_za_impl,)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def 
b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
index f9ad6837f44b..6306ee33a14e 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
@@ -270,6 +270,7 @@ DEF_SME_ZA_FUNCTION_GS_FPM (svmla_lane, 
binary_za_slice_lane, za_h_mf8,
                            vg2, none, set)
 DEF_SME_ZA_FUNCTION_GS_FPM (svmla, binary_za_slice_opt_single, za_h_mf8, vg2, 
none, set)
 DEF_SME_ZA_FUNCTION_GS_FPM (svmla, binary_za_slice_opt_single, za_h_mf8, 
vg1x24, none, set)
+DEF_SME_ZA_FUNCTION_GS_FPM (svmopa, binary_za_m, za_h_mf8, none, za_m, set)
 #undef REQUIRED_EXTENSIONS
 
 #define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F8F32)
@@ -277,6 +278,7 @@ DEF_SME_ZA_FUNCTION_GS_FPM (svmla_lane, 
binary_za_slice_lane, za_s_mf8,
                            vg4, none, set)
 DEF_SME_ZA_FUNCTION_GS_FPM (svmla, binary_za_slice_opt_single, za_s_mf8, vg4, 
none, set)
 DEF_SME_ZA_FUNCTION_GS_FPM (svmla, binary_za_slice_opt_single, za_s_mf8, 
vg1x24, none, set)
+DEF_SME_ZA_FUNCTION_GS_FPM (svmopa, binary_za_m, za_s_mf8, none, za_m, set)
 #undef REQUIRED_EXTENSIONS
 
 #undef DEF_SME_ZA_FUNCTION
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h 
b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h
index aaadab2f7739..75e3413768e2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h
+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h
@@ -46,7 +46,7 @@
 
 #define TEST_UNIFORM_ZA(NAME, TYPE, CODE1, CODE2)              \
   PROTO (NAME, void, (TYPE z0, TYPE z1, svbool_t p0,           \
-                     svbool_t p1))                             \
+                     svbool_t p1, fpm_t fpm0))                 \
   {                                                            \
     INVOKE (CODE1, CODE2);                                     \
   }
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_mf8.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_mf8.c
new file mode 100644
index 000000000000..e88b7a4814c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_mf8.c
@@ -0,0 +1,36 @@
+/* { dg-do assemble { target aarch64_asm_sme-f8f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f8f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f8f16"
+/*
+** mopa_za16_mf8_0_p0_p1_z0_z1:
+**     msr     fpmr, x0
+**     fmopa   za0\.h, p0/m, p1/m, z0\.b, z1\.b
+**     ret
+*/
+TEST_UNIFORM_ZA (mopa_za16_mf8_0_p0_p1_z0_z1, svmfloat8_t,
+                svmopa_za16_mf8_m_fpm (0, p0, p1, z0, z1, fpm0),
+                svmopa_za16_m_fpm (0, p0, p1, z0, z1, fpm0))
+
+/*
+** mopa_za16_mf8_0_p1_p0_z1_z0:
+**     msr     fpmr, x0
+**     fmopa   za0\.h, p1/m, p0/m, z1\.b, z0\.b
+**     ret
+*/
+TEST_UNIFORM_ZA (mopa_za16_mf8_0_p1_p0_z1_z0, svmfloat8_t,
+                svmopa_za16_mf8_m_fpm (0, p1, p0, z1, z0, fpm0),
+                svmopa_za16_m_fpm (0, p1, p0, z1, z0, fpm0))
+
+/*
+** mopa_za16_mf8_1_p0_p1_z0_z1:
+**     msr     fpmr, x0
+**     fmopa   za1\.h, p0/m, p1/m, z0\.b, z1\.b
+**     ret
+*/
+TEST_UNIFORM_ZA (mopa_za16_mf8_1_p0_p1_z0_z1, svmfloat8_t,
+                svmopa_za16_mf8_m_fpm (1, p0, p1, z0, z1, fpm0),
+                svmopa_za16_m_fpm (1, p0, p1, z0, z1, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za32_mf8.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za32_mf8.c
new file mode 100644
index 000000000000..74a665fea6b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za32_mf8.c
@@ -0,0 +1,36 @@
+/* { dg-do assemble { target aarch64_asm_sme-f8f32_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f8f32_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f8f32"
+/*
+** mopa_za32_mf8_0_p0_p1_z0_z1:
+**     msr     fpmr, x0
+**     fmopa   za0\.s, p0/m, p1/m, z0\.b, z1\.b
+**     ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_mf8_0_p0_p1_z0_z1, svmfloat8_t,
+                svmopa_za32_mf8_m_fpm (0, p0, p1, z0, z1, fpm0),
+                svmopa_za32_m_fpm (0, p0, p1, z0, z1, fpm0))
+
+/*
+** mopa_za32_mf8_0_p1_p0_z1_z0:
+**     msr     fpmr, x0
+**     fmopa   za0\.s, p1/m, p0/m, z1\.b, z0\.b
+**     ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_mf8_0_p1_p0_z1_z0, svmfloat8_t,
+                svmopa_za32_mf8_m_fpm (0, p1, p0, z1, z0, fpm0),
+                svmopa_za32_m_fpm (0, p1, p0, z1, z0, fpm0))
+
+/*
+** mopa_za32_mf8_1_p0_p1_z0_z1:
+**     msr     fpmr, x0
+**     fmopa   za1\.s, p0/m, p1/m, z0\.b, z1\.b
+**     ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_mf8_1_p0_p1_z0_z1, svmfloat8_t,
+                svmopa_za32_mf8_m_fpm (1, p0, p1, z0, z1, fpm0),
+                svmopa_za32_m_fpm (1, p0, p1, z0, z1, fpm0))
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c
index 44c3e48e9164..5f013bd41949 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c
@@ -46,3 +46,17 @@ f4 (svbool_t pg, svint16_t s16) __arm_streaming 
__arm_inout("za")
   svmopa_za64_m (-1, pg, pg, s16, s16); /* { dg-error {passing -1 to argument 
1 of 'svmopa_za64_m', which expects a value in the range \[0, 7\]} } */
   svmopa_za64_m (8, pg, pg, s16, s16); /* { dg-error {passing 8 to argument 1 
of 'svmopa_za64_m', which expects a value in the range \[0, 7\]} } */
 }
+
+#pragma GCC target ("arch=armv9-a+sme-f8f16+sme-f8f32")
+
+void
+f5 (svbool_t pg, svmfloat8_t mf8, fpm_t fpm) __arm_streaming __arm_inout("za")
+{
+  svmopa_za16_mf8_m_fpm(0, pg, pg, mf8, mf8); /* { dg-error {too few arguments 
to function 'svmopa_za16_mf8_m_fpm'} } */
+  svmopa_za16_mf8_m_fpm(0, pg, pg, mf8, mf8, fpm);
+  svmopa_za16_mf8_m_fpm(0, pg, pg, mf8, mf8, fpm, fpm); /* { dg-error {too 
many arguments to function 'svmopa_za16_mf8_m_fpm'; expected 6, have 7} } */
+
+  svmopa_za16_mf8_m_fpm(-1, pg, pg, mf8, mf8, fpm); /* { dg-error {passing -1 
to argument 1 of 'svmopa_za16_mf8_m_fpm', which expects a value in the range 
\[0, 1\]} } */
+  svmopa_za16_mf8_m_fpm(2, pg, pg, mf8, mf8, fpm); /* { dg-error {passing 2 to 
argument 1 of 'svmopa_za16_mf8_m_fpm', which expects a value in the range \[0, 
1\]} } */
+  svmopa_za32_mf8_m_fpm(4, pg, pg, mf8, mf8, fpm); /* { dg-error {passing 4 to 
argument 1 of 'svmopa_za32_mf8_m_fpm', which expects a value in the range \[0, 
3\]} } */
+}

Reply via email to