https://gcc.gnu.org/g:e280d42199f9fffb196fc4fa51ca3801dffa15e2

commit e280d42199f9fffb196fc4fa51ca3801dffa15e2
Author: Surya Kumari Jangala <jskum...@linux.ibm.com>
Date:   Mon Jul 14 07:05:58 2025 -0500

    MMA+: Add support for dmr disassemble builtins
    
    Add support for __builtin_mma_dmr_extract512 and
    __builtin_mma_disassemble_dmr.

Diff:
---
 gcc/config/rs6000/mma.md                           |  1 +
 gcc/config/rs6000/rs6000-builtin.cc                | 73 +++++++++++++++++++++-
 gcc/config/rs6000/rs6000-builtins.def              |  9 +++
 .../gcc.target/powerpc/dmf-disassemble-dmr.c       | 22 +++++++
 gcc/testsuite/gcc.target/powerpc/dmf-extract512.c  | 18 ++++++
 5 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 14f33724d69c..6de45acd4175 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -95,6 +95,7 @@
    UNSPEC_DM_INSERT512_LOWER
    UNSPEC_DM_INSERT1024
    UNSPEC_DM_EXTRACT512
+   UNSPEC_DM_EXTRACT1024
    UNSPEC_DMR_RELOAD_FROM_MEMORY
    UNSPEC_DMR_RELOAD_TO_MEMORY
    UNSPEC_DMF_DMXOR
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 00c1a6687101..a27499d20a65 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1107,11 +1107,12 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator 
*gsi,
 
   /* Each call that can be gimple-expanded has an associated built-in
      function that it will expand into.  If this one doesn't, we have
-     already expanded it!  Exceptions: lxvp and stxvp.  */
+     already expanded it!  Exceptions: lxvp, stxvp and disassemble_dmr.  */
   if (rs6000_builtin_info[fncode].assoc_bif == RS6000_BIF_NONE
       && fncode != RS6000_BIF_LXVP
       && fncode != RS6000_BIF_STXVP
-      && fncode != RS6000_BIF_DMMR)
+      && fncode != RS6000_BIF_DMMR
+      && fncode != RS6000_BIF_DISASSEMBLE_DMR)
     return false;
 
   bifdata *bd = &rs6000_builtin_info[fncode];
@@ -1119,6 +1120,74 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator 
*gsi,
   gimple *new_call;
   tree new_decl;
 
+  if (fncode == RS6000_BIF_DM_EXTRACT512
+      || fncode == RS6000_BIF_DISASSEMBLE_DMR)
+    {
+      unsigned num_extract512;
+      push_gimplify_context (true);
+      tree dst_ptr = gimple_call_arg (stmt, 0);
+      tree src_ptr = gimple_call_arg (stmt, 1);
+      tree src_type = build_pointer_type (dmr_type_node);
+
+      if (TREE_TYPE (src_ptr) != src_type)
+       src_ptr = build1 (NOP_EXPR, src_type, src_ptr);
+
+      // the following code will ensure we are sending *src as parameter
+      tree src = create_tmp_reg_or_ssa_name (TREE_TYPE (src_type));
+      gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq);
+
+      // now we should call the internal builtin 
RS6000_BIF_DM_EXTRACT512_INTERNAL
+      if (fncode == RS6000_BIF_DISASSEMBLE_DMR)
+       num_extract512 = 2;
+      else
+       num_extract512 = 1;
+
+      tree extract_decl = 
rs6000_builtin_decls[RS6000_BIF_DM_EXTRACT512_INTERNAL];
+
+      for (unsigned i = 0; i < num_extract512; i++)
+       {
+         tree const_arg;
+         if (fncode == RS6000_BIF_DISASSEMBLE_DMR)
+           const_arg = build_int_cstu (uint16_type_node, i);
+         else
+           const_arg = gimple_call_arg (stmt, 2);
+
+         // create call
+         new_call = gimple_build_call (extract_decl, 2, src, const_arg);
+         // create a tmp reg to denote lhs of call
+         tree lhs = create_tmp_reg_or_ssa_name (vector_quad_type_node);
+
+         // lhs = new_call
+         gimple_call_set_lhs (new_call, lhs);
+
+         // add gimple stmt to gimple sequence
+         gimple_seq_add_stmt (&new_seq, new_call);
+
+         // Now lhs contains the 512-bit value in vector_quad. We have to now
+         // split up the vector_quad into individual vectors
+
+         new_decl = rs6000_builtin_decls[RS6000_BIF_DISASSEMBLE_ACC_INTERNAL];
+         tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node,
+                                                      ptr_mode, true);
+
+         tree dst_base = build1 (NOP_EXPR, dst_type, dst_ptr);
+         for (unsigned j = 0; j < 4; j++)
+           {
+             tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
+                                build_int_cst (dst_type, j * 16 + i * 64));
+             tree dstssa = create_tmp_reg_or_ssa_name 
(unsigned_V16QI_type_node);
+             new_call = gimple_build_call (new_decl, 2, lhs,
+                                           build_int_cstu (uint16_type_node, 
j));
+             gimple_call_set_lhs (new_call, dstssa);
+             gimple_seq_add_stmt (&new_seq, new_call);
+             gimplify_assign (dst, dstssa, &new_seq);
+           }
+       }
+      pop_gimplify_context (NULL);
+      gsi_replace_with_seq (gsi, new_seq, true);
+      return true;
+    }
+
   /* Compatibility built-ins; we used to call these
      __builtin_mma_{dis,}assemble_pair, but now we call them
      __builtin_vsx_{dis,}assemble_pair.  Handle the old versions.  */
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 07ead4b9ffee..7ba1715b89cd 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3935,6 +3935,12 @@
   dm1024 __builtin_mma_dmsetdmrz_internal ();
     DMSETDMRZ_INTERNAL mma_dmsetdmrz {dm}
 
+  void __builtin_mma_dmr_extract512 (void *, dm1024 *, const int<2>);
+    DM_EXTRACT512 nothing {dm,dmint}
+
+  v512 __builtin_mma_dmr_extract512_internal (dm1024, const int<2>);
+    DM_EXTRACT512_INTERNAL movtdo_extract512 {dm}
+
   void __builtin_mma_dmmr (dm1024 *, dm1024 *);
     DMMR nothing {dm,dmint}
 
@@ -3953,6 +3959,9 @@
   dm1024 __builtin_mma_build_dmr_internal (vuc, vuc, vuc, vuc, vuc, vuc, vuc, 
vuc);
     BUILD_DMR_INTERNAL mma_build_dmr {dm}
 
+  void __builtin_mma_disassemble_dmr (void *, dm1024 *);
+    DISASSEMBLE_DMR nothing {dm}
+
   void __builtin_mma_dmxvi8gerx4 (dm1024 *, v256, vuc);
     DMXVI8GERX4 nothing {dm,dmint}
 
diff --git a/gcc/testsuite/gcc.target/powerpc/dmf-disassemble-dmr.c 
b/gcc/testsuite/gcc.target/powerpc/dmf-disassemble-dmr.c
new file mode 100644
index 000000000000..b1406ec380e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dmf-disassemble-dmr.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+typedef unsigned char vec_t __attribute__((vector_size(16)));
+
+void
+bar (vec_t *dst, __dmr *src)
+{
+  vec_t res[8];
+  __builtin_mma_disassemble_dmr (res, src);
+  dst[0] = res[0];
+  dst[2] = res[1];
+  dst[4] = res[2];
+  dst[6] = res[3];
+  dst[8] = res[4];
+  dst[10] = res[5];
+  dst[12] = res[6];
+  dst[14] = res[7];
+}
+
+/* { dg-final { scan-assembler-times {\mdmxxextfdmr512\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxv\M} 8 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/dmf-extract512.c 
b/gcc/testsuite/gcc.target/powerpc/dmf-extract512.c
new file mode 100644
index 000000000000..35c1cfaff509
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dmf-extract512.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+typedef unsigned char vec_t __attribute__((vector_size(16)));
+
+void
+bar (vec_t *dst, __dmr *src)
+{
+  vec_t res[4];
+  __builtin_mma_dmr_extract512 (res, src);
+  dst[0] = res[0];
+  dst[2] = res[1];
+  dst[4] = res[2];
+  dst[6] = res[3];
+}
+
+/* { dg-final { scan-assembler-times {\mdmxxextfdmr512\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxv\M} 4 } } */

Reply via email to