This is part six of the dense math register patches for the PowerPC.

This is an optional patch that on dense math systems changes the XV* MMA
instructions to DMXV*.  The assembler will generate the same object code for
either instruction.  This is tell the user looking at assembly code that we are
compiling MMA code to use dense math registers.

I have committed all of the patches in my backlog (dense math registers, other
-mcpu=future instructions, random bug fixes, support for _Float16 and
__bfloat16, and optimizations for vector logical operations on power10/power11)
into the IBM vendor branch:

        vendors/ibm/gcc-17-future

I have built bootstrap little endian compilers on power10 systems, and
big endian compiler on power9 systems.  There were no regression in the
tests.  Can I add the patches to the GCC trunk?

2026-07-01  Michael Meissner  <[email protected]>

gcc/

        * config/rs6000/mma.md (vvi4i4i8): Eliminate using the 'pm' prefix here,
        so we can emit pmdm* on dense math systems.
        (avvi4i4i8): Likewise.
        (vvi4i4i2): Likewise.
        (avvi4i4i2): Likewise.
        (vvi4i4): Likewise.
        (avvi4i4): Likewise.
        (pvi4i2): Likewise.
        (apvi4i2): Likewise.
        (vvi4i4i4): Likewise.
        (mma_<vv>): If -mdesne-math, emit 'dmxv*' form of the instruction
        instead of 'xv*'.
        (mma_<avv>): Likewise.
        (mma_<pv>): Likewise.
        (mma_<apv>): Likewise.
        (mma_pm<vvi4i4i8>): If -mdense-math, emit 'pmdm*' instead of 'pm*'.
        (mma_pm<avvi4i4i8>): Likewise.
        (mma_pm<vvi4i4i2>): Likewise.
        (mma_pm<avvi4i4i2>): Likewise.
        (mma_pm<vvi4i4>): Likewise.
        (mma_pm<avvi4i4>): Likewise.
        (mma_pm<pvi4i2>): Likewise.
        (mma_pm<apvi4i2>): Likewise.
        (mma_pm<vvi4i4i4>): Likewise.
        (mma_pm<avvi4i4i4>): Likewise.
        * config/rs6000/rs6000.cc (print_operand): For %!, print 'dm' if
        -mdense-math.
        * config/rs6000/rs6000.h (PRINT_OPERAND_PUNCT_VALID_P): Allow %!.

---
 gcc/config/rs6000/mma.md    | 108 ++++++++++++++++++------------------
 gcc/config/rs6000/rs6000.cc |   6 ++
 gcc/config/rs6000/rs6000.h  |   2 +-
 3 files changed, 62 insertions(+), 54 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 95cee85925b..16a7bb26e0e 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -230,44 +230,46 @@ (define_int_attr apv              [(UNSPEC_MMA_XVF64GERPP 
        "xvf64gerpp")
                                 (UNSPEC_MMA_XVF64GERNP         "xvf64gernp")
                                 (UNSPEC_MMA_XVF64GERNN         "xvf64gernn")])
 
-(define_int_attr vvi4i4i8      [(UNSPEC_MMA_PMXVI4GER8         "pmxvi4ger8")])
+;; The follwoing instructions do not have the 'pm' prefix, so that on dense
+;; math systems, we can change the pm to pmdm.
+(define_int_attr vvi4i4i8      [(UNSPEC_MMA_PMXVI4GER8         "xvi4ger8")])
 
-(define_int_attr avvi4i4i8     [(UNSPEC_MMA_PMXVI4GER8PP       
"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8     [(UNSPEC_MMA_PMXVI4GER8PP       "xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2      [(UNSPEC_MMA_PMXVI16GER2        "pmxvi16ger2")
-                                (UNSPEC_MMA_PMXVI16GER2S       "pmxvi16ger2s")
-                                (UNSPEC_MMA_PMXVF16GER2        "pmxvf16ger2")
-                                (UNSPEC_MMA_PMXVBF16GER2       
"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2      [(UNSPEC_MMA_PMXVI16GER2        "xvi16ger2")
+                                (UNSPEC_MMA_PMXVI16GER2S       "xvi16ger2s")
+                                (UNSPEC_MMA_PMXVF16GER2        "xvf16ger2")
+                                (UNSPEC_MMA_PMXVBF16GER2       "xvbf16ger2")])
 
-(define_int_attr avvi4i4i2     [(UNSPEC_MMA_PMXVI16GER2PP      "pmxvi16ger2pp")
-                                (UNSPEC_MMA_PMXVI16GER2SPP     
"pmxvi16ger2spp")
-                                (UNSPEC_MMA_PMXVF16GER2PP      "pmxvf16ger2pp")
-                                (UNSPEC_MMA_PMXVF16GER2PN      "pmxvf16ger2pn")
-                                (UNSPEC_MMA_PMXVF16GER2NP      "pmxvf16ger2np")
-                                (UNSPEC_MMA_PMXVF16GER2NN      "pmxvf16ger2nn")
-                                (UNSPEC_MMA_PMXVBF16GER2PP     
"pmxvbf16ger2pp")
-                                (UNSPEC_MMA_PMXVBF16GER2PN     
"pmxvbf16ger2pn")
-                                (UNSPEC_MMA_PMXVBF16GER2NP     
"pmxvbf16ger2np")
-                                (UNSPEC_MMA_PMXVBF16GER2NN     
"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2     [(UNSPEC_MMA_PMXVI16GER2PP      "xvi16ger2pp")
+                                (UNSPEC_MMA_PMXVI16GER2SPP     "xvi16ger2spp")
+                                (UNSPEC_MMA_PMXVF16GER2PP      "xvf16ger2pp")
+                                (UNSPEC_MMA_PMXVF16GER2PN      "xvf16ger2pn")
+                                (UNSPEC_MMA_PMXVF16GER2NP      "xvf16ger2np")
+                                (UNSPEC_MMA_PMXVF16GER2NN      "xvf16ger2nn")
+                                (UNSPEC_MMA_PMXVBF16GER2PP     "xvbf16ger2pp")
+                                (UNSPEC_MMA_PMXVBF16GER2PN     "xvbf16ger2pn")
+                                (UNSPEC_MMA_PMXVBF16GER2NP     "xvbf16ger2np")
+                                (UNSPEC_MMA_PMXVBF16GER2NN     
"xvbf16ger2nn")])
 
-(define_int_attr vvi4i4                [(UNSPEC_MMA_PMXVF32GER         
"pmxvf32ger")])
+(define_int_attr vvi4i4                [(UNSPEC_MMA_PMXVF32GER         
"xvf32ger")])
 
-(define_int_attr avvi4i4       [(UNSPEC_MMA_PMXVF32GERPP       "pmxvf32gerpp")
-                                (UNSPEC_MMA_PMXVF32GERPN       "pmxvf32gerpn")
-                                (UNSPEC_MMA_PMXVF32GERNP       "pmxvf32gernp")
-                                (UNSPEC_MMA_PMXVF32GERNN       
"pmxvf32gernn")])
+(define_int_attr avvi4i4       [(UNSPEC_MMA_PMXVF32GERPP       "xvf32gerpp")
+                                (UNSPEC_MMA_PMXVF32GERPN       "xvf32gerpn")
+                                (UNSPEC_MMA_PMXVF32GERNP       "xvf32gernp")
+                                (UNSPEC_MMA_PMXVF32GERNN       "xvf32gernn")])
 
-(define_int_attr pvi4i2                [(UNSPEC_MMA_PMXVF64GER         
"pmxvf64ger")])
+(define_int_attr pvi4i2                [(UNSPEC_MMA_PMXVF64GER         
"xvf64ger")])
 
-(define_int_attr apvi4i2       [(UNSPEC_MMA_PMXVF64GERPP       "pmxvf64gerpp")
-                                (UNSPEC_MMA_PMXVF64GERPN       "pmxvf64gerpn")
-                                (UNSPEC_MMA_PMXVF64GERNP       "pmxvf64gernp")
-                                (UNSPEC_MMA_PMXVF64GERNN       
"pmxvf64gernn")])
+(define_int_attr apvi4i2       [(UNSPEC_MMA_PMXVF64GERPP       "xvf64gerpp")
+                                (UNSPEC_MMA_PMXVF64GERPN       "xvf64gerpn")
+                                (UNSPEC_MMA_PMXVF64GERNP       "xvf64gernp")
+                                (UNSPEC_MMA_PMXVF64GERNN       "xvf64gernn")])
 
-(define_int_attr vvi4i4i4      [(UNSPEC_MMA_PMXVI8GER4         "pmxvi8ger4")])
+(define_int_attr vvi4i4i4      [(UNSPEC_MMA_PMXVI8GER4         "xvi8ger4")])
 
-(define_int_attr avvi4i4i4     [(UNSPEC_MMA_PMXVI8GER4PP       "pmxvi8ger4pp")
-                                (UNSPEC_MMA_PMXVI8GER4SPP      
"pmxvi8ger4spp")])
+(define_int_attr avvi4i4i4     [(UNSPEC_MMA_PMXVI8GER4PP       "xvi8ger4pp")
+                                (UNSPEC_MMA_PMXVI8GER4SPP      "xvi8ger4spp")])
 
 
 ;; Vector pair support.  OOmode can only live in VSRs.
@@ -620,7 +622,7 @@ (define_insn "mma_<vv>"
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
                    MMA_VV))]
   "TARGET_MMA"
-  "<vv> %A0,%x1,%x2"
+  "%!<vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 ;; Instructions:
@@ -636,7 +638,7 @@ (define_insn "mma_<avv>"
                    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
                    MMA_AVV))]
   "TARGET_MMA"
-  "<avv> %A0,%x2,%x3"
+  "%!<avv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 ;; Instruction: xvf64ger
@@ -647,7 +649,7 @@ (define_insn "mma_<pv>"
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
                    MMA_PV))]
   "TARGET_MMA"
-  "<pv> %A0,%x1,%x2"
+  "%!<pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 ;; Instructions: xvf64gerpp xvf64gerpn xvf64gernp xvf64gernn
@@ -659,12 +661,12 @@ (define_insn "mma_<apv>"
                    (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
                    MMA_APV))]
   "TARGET_MMA"
-  "<apv> %A0,%x2,%x3"
+  "%!<apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 ;; Instruction: pmxvi4ger8
 
-(define_insn "mma_<vvi4i4i8>"
+(define_insn "mma_pm<vvi4i4i8>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -673,13 +675,13 @@ (define_insn "mma_<vvi4i4i8>"
                    (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
                    MMA_VVI4I4I8))]
   "TARGET_MMA"
-  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+  "pm%!<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
 ;; Instruction: pmxvi4ger8pp
 
-(define_insn "mma_<avvi4i4i8>"
+(define_insn "mma_pm<avvi4i4i8>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -689,14 +691,14 @@ (define_insn "mma_<avvi4i4i8>"
                    (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
                    MMA_AVVI4I4I8))]
   "TARGET_MMA"
-  "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
+  "pm%!<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
 ;; Instructions:
 ;; pmxvi16ger2 pmxvi16ger2s pmxvf16ger2 pmxvbf16ger2
 
-(define_insn "mma_<vvi4i4i2>"
+(define_insn "mma_pm<vvi4i4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -705,7 +707,7 @@ (define_insn "mma_<vvi4i4i2>"
                    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
                    MMA_VVI4I4I2))]
   "TARGET_MMA"
-  "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+  "pm%!<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
@@ -714,7 +716,7 @@ (define_insn "mma_<vvi4i4i2>"
 ;; pmxvf16ger2np  pmxvf16ger2nn  pmxvbf16ger2pp pmxvbf16ger2pn
 ;; pmxvbf16ger2np pmxvbf16ger2nn
 
-(define_insn "mma_<avvi4i4i2>"
+(define_insn "mma_pm<avvi4i4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -724,13 +726,13 @@ (define_insn "mma_<avvi4i4i2>"
                    (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
                    MMA_AVVI4I4I2))]
   "TARGET_MMA"
-  "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+  "pm%!<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
 ;; Instruction: pmxvf32ger
 
-(define_insn "mma_<vvi4i4>"
+(define_insn "mma_pm<vvi4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -738,13 +740,13 @@ (define_insn "mma_<vvi4i4>"
                    (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
                    MMA_VVI4I4))]
   "TARGET_MMA"
-  "<vvi4i4> %A0,%x1,%x2,%3,%4"
+  "pm%!<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
 ;; Instructions: pmxvf32gerpp pmxvf32gerpn pmxvf32gernp pmxvf32gernn
 
-(define_insn "mma_<avvi4i4>"
+(define_insn "mma_pm<avvi4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -753,13 +755,13 @@ (define_insn "mma_<avvi4i4>"
                    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
                    MMA_AVVI4I4))]
   "TARGET_MMA"
-  "<avvi4i4> %A0,%x2,%x3,%4,%5"
+  "pm%!<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
 ;; Instruction: pmxvf64ger
 
-(define_insn "mma_<pvi4i2>"
+(define_insn "mma_pm<pvi4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -767,13 +769,13 @@ (define_insn "mma_<pvi4i2>"
                    (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
                    MMA_PVI4I2))]
   "TARGET_MMA"
-  "<pvi4i2> %A0,%x1,%x2,%3,%4"
+  "pm%!<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
 ;; Instructions: pmxvf64gerpp pmxvf64gerpn pmxvf64gernp pmxvf64gernn
 
-(define_insn "mma_<apvi4i2>"
+(define_insn "mma_pm<apvi4i2>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
                    (match_operand:OO 2 "vsx_register_operand" "v,?wa")
@@ -782,13 +784,13 @@ (define_insn "mma_<apvi4i2>"
                    (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
                    MMA_APVI4I2))]
   "TARGET_MMA"
-  "<apvi4i2> %A0,%x2,%x3,%4,%5"
+  "pm%!<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
 ;; Instruction: pmxvi8ger4
 
-(define_insn "mma_<vvi4i4i4>"
+(define_insn "mma_pm<vvi4i4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -797,13 +799,13 @@ (define_insn "mma_<vvi4i4i4>"
                    (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
                    MMA_VVI4I4I4))]
   "TARGET_MMA"
-  "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+  "pm%!<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
 ;; Instructions: pmxvi8ger4pp pmxvi8ger4spp
 
-(define_insn "mma_<avvi4i4i4>"
+(define_insn "mma_pm<avvi4i4i4>"
   [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
        (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
                    (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
@@ -813,7 +815,7 @@ (define_insn "mma_<avvi4i4i4>"
                    (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
                    MMA_AVVI4I4I4))]
   "TARGET_MMA"
-  "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+  "pm%!<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
 
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index c47729e7c3a..d5a82ef0044 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -14833,6 +14833,12 @@ print_operand (FILE *file, rtx x, int code)
                                "local dynamic TLS references");
       return;
 
+      /* Print out 'dm' if dense math registers are available.  */
+    case '!':
+      if (TARGET_DENSE_MATH)
+       fputs ("dm", file);
+      return;
+
     default:
       output_operand_lossage ("invalid %%xn code");
     }
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 62f014f1951..eef25768b5a 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2208,7 +2208,7 @@ extern char rs6000_reg_names[][8];        /* register 
names (0 vs. %r0).  */
 
 /* Define which CODE values are valid.  */
 
-#define PRINT_OPERAND_PUNCT_VALID_P(CODE)  ((CODE) == '&')
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE)  ((CODE) == '&' || (CODE) == '!')
 
 /* Print a memory address as an operand to reference that memory location.  */
 
-- 
2.54.0


-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: [email protected]

Reply via email to