https://gcc.gnu.org/g:df83b062fdd5258040293929891f6028afeeeee6

commit df83b062fdd5258040293929891f6028afeeeee6
Author: Surya Kumari Jangala <jskum...@linux.ibm.com>
Date:   Fri Aug 22 00:50:38 2025 -0500

    MMA+: Add float16 ger builtins
    
    Add builtins __builtin_mma_dmxvf16gerx2, __builtin_mma_dmxvf16gerx2nn,
    __builtin_mma_dmxvf16gerx2np, __builtin_mma_dmxvf16gerx2pn,
    __builtin_mma_dmxvf16gerx2pp, __builtin_mma_pmdmxvf16gerx2,
    __builtin_mma_pmdmxvf16gerx2nn, __builtin_mma_pmdmxvf16gerx2np,
    __builtin_mma_pmdmxvf16gerx2pn, __builtin_mma_pmdmxvf16gerx2pp

Diff:
---
 gcc/config/rs6000/mma.md                         |  46 +++++--
 gcc/config/rs6000/rs6000-builtins.def            |  70 ++++++++++
 gcc/testsuite/gcc.target/powerpc/dmf-builtin-2.c | 168 +++++++++++++++++++++++
 3 files changed, 276 insertions(+), 8 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index c7ec9d0fb56d..78052822e651 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -115,6 +115,16 @@
    UNSPEC_DMF_PMDMXVBF16GERX2PN
    UNSPEC_DMF_PMDMXVBF16GERX2NP
    UNSPEC_DMF_PMDMXVBF16GERX2NN
+   UNSPEC_DMF_DMXVF16GERX2
+   UNSPEC_DMF_DMXVF16GERX2PP
+   UNSPEC_DMF_DMXVF16GERX2PN
+   UNSPEC_DMF_DMXVF16GERX2NP
+   UNSPEC_DMF_DMXVF16GERX2NN
+   UNSPEC_DMF_PMDMXVF16GERX2
+   UNSPEC_DMF_PMDMXVF16GERX2PP
+   UNSPEC_DMF_PMDMXVF16GERX2PN
+   UNSPEC_DMF_PMDMXVF16GERX2NP
+   UNSPEC_DMF_PMDMXVF16GERX2NN
   ])
 
 (define_c_enum "unspecv"
@@ -159,7 +169,8 @@
 
 ;; DMF instructions with 1 vector pair and 1 vector arguments
 (define_int_iterator DMF_PV            [UNSPEC_DMF_DMXVI8GERX4
-                                        UNSPEC_DMF_DMXVBF16GERX2])
+                                        UNSPEC_DMF_DMXVBF16GERX2
+                                        UNSPEC_DMF_DMXVF16GERX2])
 
 ;; MMA instructions with 1 accumulator, 1 vector pair and 1 vector arguments
 (define_int_iterator MMA_APV           [UNSPEC_MMA_XVF64GERPP
@@ -173,7 +184,11 @@
                                         UNSPEC_DMF_DMXVBF16GERX2PP
                                         UNSPEC_DMF_DMXVBF16GERX2PN
                                         UNSPEC_DMF_DMXVBF16GERX2NP
-                                        UNSPEC_DMF_DMXVBF16GERX2NN])
+                                        UNSPEC_DMF_DMXVBF16GERX2NN
+                                        UNSPEC_DMF_DMXVF16GERX2PP
+                                        UNSPEC_DMF_DMXVF16GERX2PN
+                                        UNSPEC_DMF_DMXVF16GERX2NP
+                                        UNSPEC_DMF_DMXVF16GERX2NN])
 
 ;; MMA instructions with 2 vector, 2 4-bit and 1 8-bit arguments
 (define_int_iterator MMA_VVI4I4I8      [UNSPEC_MMA_PMXVI4GER8])
@@ -235,14 +250,19 @@
 
 ;; DMF instructions with 1 vector pair, 1 vector, 1 8-bit, 1 4-bit
 ;; and 1 2-bit arguments
-(define_int_iterator DMF_PVI8I4I2      [UNSPEC_DMF_PMDMXVBF16GERX2])
+(define_int_iterator DMF_PVI8I4I2      [UNSPEC_DMF_PMDMXVBF16GERX2
+                                        UNSPEC_DMF_PMDMXVF16GERX2])
 
 ;; DMF instructions with 1dmr, 1 vector pair, 1 vector, 1 8-bit,
 ;; 1 4-bit and 1 2-bit arguments
 (define_int_iterator DMF_DPVI8I4I2     [UNSPEC_DMF_PMDMXVBF16GERX2PP
                                         UNSPEC_DMF_PMDMXVBF16GERX2PN
                                         UNSPEC_DMF_PMDMXVBF16GERX2NP
-                                        UNSPEC_DMF_PMDMXVBF16GERX2NN])
+                                        UNSPEC_DMF_PMDMXVBF16GERX2NN
+                                        UNSPEC_DMF_PMDMXVF16GERX2PP
+                                        UNSPEC_DMF_PMDMXVF16GERX2PN
+                                        UNSPEC_DMF_PMDMXVF16GERX2NP
+                                        UNSPEC_DMF_PMDMXVF16GERX2NN])
 
 (define_int_attr acc           [(UNSPEC_MMA_XXMFACC            "xxmfacc")
                                 (UNSPEC_MMA_XXMTACC            "xxmtacc")])
@@ -275,7 +295,8 @@
 
 (define_int_attr pv            [(UNSPEC_MMA_XVF64GER           "xvf64ger")
                                 (UNSPEC_DMF_DMXVI8GERX4        "dmxvi8gerx4")
-                                (UNSPEC_DMF_DMXVBF16GERX2      
"dmxvbf16gerx2")])
+                                (UNSPEC_DMF_DMXVBF16GERX2      "dmxvbf16gerx2")
+                                (UNSPEC_DMF_DMXVF16GERX2       
"dmxvf16gerx2")])
 
 (define_int_attr apv           [(UNSPEC_MMA_XVF64GERPP         "xvf64gerpp")
                                 (UNSPEC_MMA_XVF64GERPN         "xvf64gerpn")
@@ -287,7 +308,11 @@
                                 (UNSPEC_DMF_DMXVBF16GERX2PP    
"dmxvbf16gerx2pp")
                                 (UNSPEC_DMF_DMXVBF16GERX2PN    
"dmxvbf16gerx2pn")
                                 (UNSPEC_DMF_DMXVBF16GERX2NP    
"dmxvbf16gerx2np")
-                                (UNSPEC_DMF_DMXVBF16GERX2NN    
"dmxvbf16gerx2nn")])
+                                (UNSPEC_DMF_DMXVBF16GERX2NN    
"dmxvbf16gerx2nn")
+                                (UNSPEC_DMF_DMXVF16GERX2PP     
"dmxvf16gerx2pp")
+                                (UNSPEC_DMF_DMXVF16GERX2PN     
"dmxvf16gerx2pn")
+                                (UNSPEC_DMF_DMXVF16GERX2NP     
"dmxvf16gerx2np")
+                                (UNSPEC_DMF_DMXVF16GERX2NN     
"dmxvf16gerx2nn")])
 
 ;; The "pm" prefix is not in these expansions, so that we can generate
 ;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
@@ -336,12 +361,17 @@
 (define_int_attr dpvi8i4i4     [(UNSPEC_DMF_PMDMXVI8GERX4PP    
"pmdmxvi8gerx4pp")
                                 (UNSPEC_DMF_PMDMXVI8GERX4SPP   
"pmdmxvi8gerx4spp")])
 
-(define_int_attr pvi8i4i2       [(UNSPEC_DMF_PMDMXVBF16GERX2   
"pmdmxvbf16gerx2")])
+(define_int_attr pvi8i4i2       [(UNSPEC_DMF_PMDMXVBF16GERX2   
"pmdmxvbf16gerx2")
+                                (UNSPEC_DMF_PMDMXVF16GERX2     
"pmdmxvf16gerx2")])
 
 (define_int_attr dpvi8i4i2      [(UNSPEC_DMF_PMDMXVBF16GERX2PP 
"pmdmxvbf16gerx2pp")
                                 (UNSPEC_DMF_PMDMXVBF16GERX2PN  
"pmdmxvbf16gerx2pn")
                                 (UNSPEC_DMF_PMDMXVBF16GERX2NP  
"pmdmxvbf16gerx2np")
-                                (UNSPEC_DMF_PMDMXVBF16GERX2NN  
"pmdmxvbf16gerx2nn")])
+                                (UNSPEC_DMF_PMDMXVBF16GERX2NN  
"pmdmxvbf16gerx2nn")
+                                (UNSPEC_DMF_PMDMXVF16GERX2PP   
"pmdmxvf16gerx2pp")
+                                (UNSPEC_DMF_PMDMXVF16GERX2PN   
"pmdmxvf16gerx2pn")
+                                (UNSPEC_DMF_PMDMXVF16GERX2NP   
"pmdmxvf16gerx2np")
+                                (UNSPEC_DMF_PMDMXVF16GERX2NN   
"pmdmxvf16gerx2nn")])
 
 ;; Vector pair support.  OOmode can only live in VSRs.
 (define_expand "movoo"
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 7ba1715b89cd..a107932ca0f7 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -4076,6 +4076,76 @@
                                                 const int<4>, const int<2>);
     PMDMXVBF16GERX2NN_INTERNAL dmf_pmdmxvbf16gerx2nn {dm,pair}
 
+  void __builtin_mma_dmxvf16gerx2 (dm1024 *, v256, vuc);
+    DMXVF16GERX2 nothing {dm,dmint}
+
+  dm1024 __builtin_mma_dmxvf16gerx2_internal (v256, vuc);
+    DMXVF16GERX2_INTERNAL dmf_dmxvf16gerx2 {dm}
+
+  void __builtin_mma_dmxvf16gerx2pp (dm1024 *, v256, vuc);
+    DMXVF16GERX2PP nothing {dm,dmint,dmr}
+
+  dm1024 __builtin_mma_dmxvf16gerx2pp_internal (dm1024, v256, vuc);
+    DMXVF16GERX2PP_INTERNAL dmf_dmxvf16gerx2pp {dm}
+
+  void __builtin_mma_dmxvf16gerx2pn (dm1024 *, v256, vuc);
+    DMXVF16GERX2PN nothing {dm,dmint,dmr}
+
+  dm1024 __builtin_mma_dmxvf16gerx2pn_internal (dm1024, v256, vuc);
+    DMXVF16GERX2PN_INTERNAL dmf_dmxvf16gerx2pn {dm}
+
+  void __builtin_mma_dmxvf16gerx2np (dm1024 *, v256, vuc);
+    DMXVF16GERX2NP nothing {dm,dmint,dmr}
+
+  dm1024 __builtin_mma_dmxvf16gerx2np_internal (dm1024, v256, vuc);
+    DMXVF16GERX2NP_INTERNAL dmf_dmxvf16gerx2np {dm}
+
+  void __builtin_mma_dmxvf16gerx2nn (dm1024 *, v256, vuc);
+    DMXVF16GERX2NN nothing {dm,dmint,dmr}
+
+  dm1024 __builtin_mma_dmxvf16gerx2nn_internal (dm1024, v256, vuc);
+    DMXVF16GERX2NN_INTERNAL dmf_dmxvf16gerx2nn {dm}
+
+  void __builtin_mma_pmdmxvf16gerx2 (dm1024 *, v256, vuc, const int<8>, \
+                                   const int<4>, const int<2>);
+    PMDMXVF16GERX2 nothing {dm,pair,dmint}
+
+  dm1024 __builtin_mma_pmdmxvf16gerx2_internal (v256, vuc, const int<8>, \
+                                              const int<4>, const int<2>);
+    PMDMXVF16GERX2_INTERNAL dmf_pmdmxvf16gerx2 {dm,pair}
+
+  void __builtin_mma_pmdmxvf16gerx2pp (dm1024 *, v256, vuc, const int<8>, \
+                                     const int<4>, const int<2>);
+    PMDMXVF16GERX2PP nothing {dm,pair,dmint,dmr}
+
+  dm1024 __builtin_mma_pmdmxvf16gerx2pp_internal (dm1024, v256, vuc, const 
int<8>, \
+                                                const int<4>, const int<2>);
+    PMDMXVF16GERX2PP_INTERNAL dmf_pmdmxvf16gerx2pp {dm,pair}
+
+  void __builtin_mma_pmdmxvf16gerx2pn (dm1024 *, v256, vuc, const int<8>, \
+                                     const int<4>, const int<2>);
+    PMDMXVF16GERX2PN nothing {dm,pair,dmint,dmr}
+
+  dm1024 __builtin_mma_pmdmxvf16gerx2pn_internal (dm1024, v256, vuc, const 
int<8>, \
+                                                const int<4>, const int<2>);
+    PMDMXVF16GERX2PN_INTERNAL dmf_pmdmxvf16gerx2pn {dm,pair}
+
+  void __builtin_mma_pmdmxvf16gerx2np (dm1024 *, v256, vuc, const int<8>, \
+                                     const int<4>, const int<2>);
+    PMDMXVF16GERX2NP nothing {dm,pair,dmint,dmr}
+
+  dm1024 __builtin_mma_pmdmxvf16gerx2np_internal (dm1024, v256, vuc, const 
int<8>, \
+                                                const int<4>, const int<2>);
+    PMDMXVF16GERX2NP_INTERNAL dmf_pmdmxvf16gerx2np {dm,pair}
+
+  void __builtin_mma_pmdmxvf16gerx2nn (dm1024 *, v256, vuc, const int<8>, \
+                                     const int<4>, const int<2>);
+    PMDMXVF16GERX2NN nothing {dm,pair,dmint,dmr}
+
+  dm1024 __builtin_mma_pmdmxvf16gerx2nn_internal (dm1024, v256, vuc, const 
int<8>, \
+                                                const int<4>, const int<2>);
+    PMDMXVF16GERX2NN_INTERNAL dmf_pmdmxvf16gerx2nn {dm,pair}
+
 [future]
   const signed int __builtin_saturate_subtract32 (signed int, signed int);
   SAT_SUBSI sat_subsi3 {}
diff --git a/gcc/testsuite/gcc.target/powerpc/dmf-builtin-2.c 
b/gcc/testsuite/gcc.target/powerpc/dmf-builtin-2.c
new file mode 100644
index 000000000000..b733de1f6f9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dmf-builtin-2.c
@@ -0,0 +1,168 @@
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+typedef unsigned char vec_t __attribute__((vector_size(16)));
+
+void
+foo (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __dmr1024 dmr;
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_dmsetdmrz (&dmr);
+  __builtin_mma_dmxvf16gerx2 (&dmr, vp, vec);
+  *dst = dmr;
+}
+
+void
+bar (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __dmr1024 dmr = dst[0];;
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_dmxvf16gerx2 (&dmr, vp, vec);
+  dst[1] = dmr;
+}
+
+/* { dg-final { scan-assembler-times {\mdmxvf16gerx2\M} 2 } } */
+
+void
+foo_1 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __dmr1024 dmr;
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_dmsetdmrz (&dmr);
+  __builtin_mma_dmxvf16gerx2nn (&dmr, vp, vec);
+  *dst = dmr;
+}
+
+void
+bar_1 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __dmr1024 dmr = dst[0];;
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_dmxvf16gerx2nn (&dmr, vp, vec);
+  dst[1] = dmr;
+}
+
+/* { dg-final { scan-assembler-times {\mdmxvf16gerx2nn\M} 2 } } */
+
+void
+foo_2 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __dmr1024 dmr;
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_dmsetdmrz (&dmr);
+  __builtin_mma_dmxvf16gerx2np (&dmr, vp, vec);
+  *dst = dmr;
+}
+
+void
+bar_2 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __dmr1024 dmr = dst[0];;
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_dmxvf16gerx2np (&dmr, vp, vec);
+  dst[1] = dmr;
+}
+
+/* { dg-final { scan-assembler-times {\mdmxvf16gerx2np\M} 2 } } */
+
+void
+foo_3 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __dmr1024 dmr;
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_dmsetdmrz (&dmr);
+  __builtin_mma_dmxvf16gerx2pn (&dmr, vp, vec);
+  *dst = dmr;
+}
+
+void
+bar_3 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __dmr1024 dmr = dst[0];;
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_dmxvf16gerx2pn (&dmr, vp, vec);
+  dst[1] = dmr;
+}
+
+/* { dg-final { scan-assembler-times {\mdmxvf16gerx2pn\M} 2 } } */
+
+void
+foo_4 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __dmr1024 dmr;
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_dmsetdmrz (&dmr);
+  __builtin_mma_dmxvf16gerx2pp (&dmr, vp, vec);
+  *dst = dmr;
+}
+
+void
+bar_4 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __dmr1024 dmr = dst[0];;
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_dmxvf16gerx2pp (&dmr, vp, vec);
+  dst[1] = dmr;
+}
+
+/* { dg-final { scan-assembler-times {\mdmxvf16gerx2pp\M} 2 } } */
+
+void
+foo_5 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_pmdmxvf16gerx2 (dst, vp, vec, 255, 15, 2);
+}
+
+/* { dg-final { scan-assembler-times {\mpmdmxvf16gerx2\M} 1 } } */
+
+void
+foo_6 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_pmdmxvf16gerx2nn (dst, vp, vec, 255, 15, 2);
+}
+
+/* { dg-final { scan-assembler-times {\mpmdmxvf16gerx2nn\M} 1 } } */
+
+void
+foo_7 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_pmdmxvf16gerx2np (dst, vp, vec, 255, 15, 2);
+}
+
+/* { dg-final { scan-assembler-times {\mpmdmxvf16gerx2np\M} 1 } } */
+
+void
+foo_8 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_pmdmxvf16gerx2pn (dst, vp, vec, 255, 15, 2);
+}
+
+/* { dg-final { scan-assembler-times {\mpmdmxvf16gerx2pn\M} 1 } } */
+
+void
+foo_9 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src)
+{
+  __vector_pair vp = *vpp;
+  vec_t vec = *src;
+  __builtin_mma_pmdmxvf16gerx2pp (dst, vp, vec, 255, 15, 2);
+}
+
+/* { dg-final { scan-assembler-times {\mpmdmxvf16gerx2pp\M} 1 } } */

Reply via email to