>From 9b1c869be8b3eb756a3d7b581c4b304c71b02180 Mon Sep 17 00:00:00 2001 From: Michael Meissner <[email protected]> Date: Thu, 25 Jun 2026 00:38:26 -0400 Subject: Switch to use wD constraint in mma.md.
This is part two of the dense math register patches for the PowerPC. This is the 7th version of the dense math patches. Version 6 of the dense math register patches were posted on April 21st, 2026. * https://gcc.gnu.org/pipermail/gcc-patches/2026-April/713352.html * https://gcc.gnu.org/pipermail/gcc-patches/2026-April/713353.html * https://gcc.gnu.org/pipermail/gcc-patches/2026-April/713354.html * https://gcc.gnu.org/pipermail/gcc-patches/2026-April/713355.html * https://gcc.gnu.org/pipermail/gcc-patches/2026-April/713356.html * https://gcc.gnu.org/pipermail/gcc-patches/2026-April/713357.html This patch needs the -mcpu=future patch posted on April 8th, 2026: * https://gcc.gnu.org/pipermail/gcc-patches/2026-April/712532.html This patch changes mma.md to use the wD constraint and accumulator_operand predicate that were added in the previous patch instead of using the d constrant and vsx_register_operand predicate. This is in anticipation of adding dense math registers in a future patch. In addition, I added a comment in front of each insn to indicate which instructions are being generated. Originaly, these changes were in patch #4 in the V6 patches. I have removed these patches switching to use wD from the other part of the patch adding dense math register support. I have committed all of the patches in my backlog (dense math registers, other -mcpu=future instructions, random bug fixes, support for _Float16 and __bfloat16, and optimizations for vector logical operations on power10/power11) into the IBM vendor branch: vendors/ibm/gcc-17-future I have built bootstrap little endian compilers on power10 systems, and big endian compiler on power9 systems. There were no regression in the tests. Can I add the patches to the GCC trunk? 2026-07-01 Michael Meissner <[email protected]> gcc/ * config/rs6000/mma.md (mma_<vv>): Use the wD constraint and accumulator_operand predicate for all MMA instructions taking accumulator operands. (mma_<avv>): Likewise. (mma_<pv>"): Likewise. (mma_<apv>): Likewise. (mma_<vvi4i4i8>): Likewise. (mma_<avvi4i4i8>): Likewise. (mma_<vvi4i4i2>"): Likewise. (mma_<avvi4i4i2>): Likewise. (mma_<vvi4i4>): Likewise. (mma_<avvi4i4>): Likewise. (mma_<pvi4i2>): Likewise. (mma_<apvi4i2>): Likewise. (mma_<vvi4i4i4>): Likewise. (mma_<avvi4i4i4>): Likewise. --- gcc/config/rs6000/mma.md | 83 ++++++++++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 21 deletions(-) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 1103f1fc037..87139dd41e8 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -522,8 +522,15 @@ (define_insn "mma_xxsetaccz" "xxsetaccz %A0" [(set_attr "type" "mma")]) + +;; MMA operations below. + +;; Instructions: +;; xvi4ger8 xvi8ger4 xvi16ger2 xvi16ger2s xvf16ger2 +;; xvbf16ger2 xvf32ger + (define_insn "mma_<vv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_VV))] @@ -531,9 +538,15 @@ (define_insn "mma_<vv>" "<vv> %A0,%x1,%x2" [(set_attr "type" "mma")]) +;; Instructions: +;; xvi4ger8pp xvi8ger4pp xvi8ger4spp xvi16ger2pp xvi16ger2spp +;; xvf16ger2pp xvf16ger2pn xvf16ger2np xvf16ger2nn xvbf16ger2pp +;; xvbf16ger2pn xvbf16ger2np xvbf16ger2nn xvf32gerpp xvf32gerpn +;; xvf32gernp xvf32gernn + (define_insn "mma_<avv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_AVV))] @@ -541,8 +554,10 @@ (define_insn "mma_<avv>" "<avv> %A0,%x2,%x3" [(set_attr "type" "mma")]) +;; Instruction: xvf64ger + (define_insn "mma_<pv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_PV))] @@ -550,9 +565,11 @@ (define_insn "mma_<pv>" "<pv> %A0,%x1,%x2" [(set_attr "type" "mma")]) +;; Instructions: xvf64gerpp xvf64gerpn xvf64gernp xvf64gernn + (define_insn "mma_<apv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:OO 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_APV))] @@ -560,8 +577,10 @@ (define_insn "mma_<apv>" "<apv> %A0,%x2,%x3" [(set_attr "type" "mma")]) +;; Instruction: pmxvi4ger8 + (define_insn "mma_<vvi4i4i8>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -573,9 +592,11 @@ (define_insn "mma_<vvi4i4i8>" [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instruction: pmxvi4ger8pp + (define_insn "mma_<avvi4i4i8>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -587,8 +608,11 @@ (define_insn "mma_<avvi4i4i8>" [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instructions: +;; pmxvi16ger2 pmxvi16ger2s pmxvf16ger2 pmxvbf16ger2 + (define_insn "mma_<vvi4i4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -600,9 +624,14 @@ (define_insn "mma_<vvi4i4i2>" [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instructions: +;; pmxvi16ger2pp pmxvi16ger2spp pmxvf16ger2pp pmxvf16ger2pn +;; pmxvf16ger2np pmxvf16ger2nn pmxvbf16ger2pp pmxvbf16ger2pn +;; pmxvbf16ger2np pmxvbf16ger2nn + (define_insn "mma_<avvi4i4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -614,8 +643,10 @@ (define_insn "mma_<avvi4i4i2>" [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instruction: pmxvf32ger + (define_insn "mma_<vvi4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -626,9 +657,11 @@ (define_insn "mma_<vvi4i4>" [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instructions: pmxvf32gerpp pmxvf32gerpn pmxvf32gernp pmxvf32gernn + (define_insn "mma_<avvi4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -639,8 +672,10 @@ (define_insn "mma_<avvi4i4>" [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instruction: pmxvf64ger + (define_insn "mma_<pvi4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -651,9 +686,11 @@ (define_insn "mma_<pvi4i2>" [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instructions: pmxvf64gerpp pmxvf64gerpn pmxvf64gernp pmxvf64gernn + (define_insn "mma_<apvi4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:OO 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -664,8 +701,10 @@ (define_insn "mma_<apvi4i2>" [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instruction: pmxvi8ger4 + (define_insn "mma_<vvi4i4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -677,9 +716,11 @@ (define_insn "mma_<vvi4i4i4>" [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instructions: pmxvi8ger4pp pmxvi8ger4spp + (define_insn "mma_<avvi4i4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") -- 2.54.0 -- Michael Meissner, IBM PO Box 98, Ayer, Massachusetts, USA, 01432 email: [email protected]
