Hi Mike,

on 2024/1/6 07:39, Michael Meissner wrote:
> This patch changes the MMA instructions to use either FPR registers
> (-mcpu=power10) or DMRs (-mcpu=future).  In this patch, the existing MMA
> instruction names are used.
> 
> A macro (__PPC_DMR__) is defined if the MMA instructions use the DMRs.
> 
> The patches have been tested on both little and big endian systems.  Can I 
> check
> it into the master branch?
> 
> 2024-01-05   Michael Meissner  <meiss...@linux.ibm.com>
> 
> gcc/
> 
>       * config/rs6000/mma.md (mma_<acc>): New define_expand to handle
>       mma_<acc> for dense math and non dense math.
>       (mma_<acc> insn): Restrict to non dense math.
>       (mma_xxsetaccz): Convert to define_expand to handle non dense math and
>       dense math.
>       (mma_xxsetaccz_vsx): Rename from mma_xxsetaccz and restrict usage to non
>       dense math.
>       (mma_xxsetaccz_dm): Dense math version of mma_xxsetaccz.
>       (mma_<vv>): Add support for dense math.
>       (mma_<avv>): Likewise.
>       (mma_<pv>): Likewise.
>       (mma_<apv>): Likewise.
>       (mma_<vvi4i4i8>): Likewise.
>       (mma_<avvi4i4i8>): Likewise.
>       (mma_<vvi4i4i2>): Likewise.
>       (mma_<avvi4i4i2>): Likewise.
>       (mma_<vvi4i4>): Likewise.
>       (mma_<avvi4i4>): Likewise.
>       (mma_<pvi4i2>): Likewise.
>       (mma_<apvi4i2>): Likewise.
>       (mma_<vvi4i4i4>): Likewise.
>       (mma_<avvi4i4i4>): Likewise.
>       * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Define
>       __PPC_DMR__ if we have dense math instructions.
>       * config/rs6000/rs6000.cc (print_operand): Make %A handle only DMRs if
>       dense math and only FPRs if not dense math.
>       (rs6000_split_multireg_move): Do not generate the xxmtacc instruction to
>       prime the DMR registers or the xxmfacc instruction to de-prime
>       instructions if we have dense math register support.
> ---
>  gcc/config/rs6000/mma.md      | 247 +++++++++++++++++++++-------------
>  gcc/config/rs6000/rs6000-c.cc |   3 +
>  gcc/config/rs6000/rs6000.cc   |  35 ++---
>  3 files changed, 176 insertions(+), 109 deletions(-)
> 
> diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
> index bb898919ab5..525a85146ff 100644
> --- a/gcc/config/rs6000/mma.md
> +++ b/gcc/config/rs6000/mma.md
> @@ -559,190 +559,249 @@ (define_insn "*mma_disassemble_acc_dm"
>    "dmxxextfdmr256 %0,%1,2"
>    [(set_attr "type" "mma")])
>  
> -(define_insn "mma_<acc>"
> +;; MMA instructions that do not use their accumulators as an input, still 
> must
> +;; not allow their vector operands to overlap the registers used by the
> +;; accumulator.  We enforce this by marking the output as early clobber.  If 
> we
> +;; have dense math, we don't need the whole prime/de-prime action, so just 
> make
> +;; thse instructions be NOPs.

typo: thse.

> +
> +(define_expand "mma_<acc>"
> +  [(set (match_operand:XO 0 "register_operand")
> +     (unspec:XO [(match_operand:XO 1 "register_operand")]

s/register_operand/accumulator_operand/?

> +                MMA_ACC))]
> +  "TARGET_MMA"
> +{
> +  if (TARGET_DENSE_MATH)
> +    {
> +      if (!rtx_equal_p (operands[0], operands[1]))
> +     emit_move_insn (operands[0], operands[1]);
> +      DONE;
> +    }
> +
> +  /* Generate the prime/de-prime code.  */
> +})
> +
> +(define_insn "*mma_<acc>"

May be better to name with "*mma_<acc>_nodm"?

>    [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
>       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
>                   MMA_ACC))]
> -  "TARGET_MMA"
> +  "TARGET_MMA && !TARGET_DENSE_MATH"

I found that "TARGET_MMA && !TARGET_DENSE_MATH" is used much (like changes in 
function
rs6000_split_multireg_move in this patch and some places in previous patches), 
maybe we
can introduce a macro named as TARGET_MMA_NODM short for it?

>    "<acc> %A0"
>    [(set_attr "type" "mma")])
>  
>  ;; We can't have integer constants in XOmode so we wrap this in an
> -;; UNSPEC_VOLATILE.
> +;; UNSPEC_VOLATILE for the non-dense math case.  For dense math, we don't 
> need
> +;; to disable optimization and we can do a normal UNSPEC.
>  
> -(define_insn "mma_xxsetaccz"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
> +(define_expand "mma_xxsetaccz"
> +  [(set (match_operand:XO 0 "register_operand")

s/register_operand/accumulator_operand/?

>       (unspec_volatile:XO [(const_int 0)]
>                           UNSPECV_MMA_XXSETACCZ))]
>    "TARGET_MMA"
> +{
> +  if (TARGET_DENSE_MATH)
> +    {
> +      emit_insn (gen_mma_xxsetaccz_dm (operands[0]));
> +      DONE;
> +    }
> +})
> +
> +(define_insn "*mma_xxsetaccz_vsx"

s/vsx/nodm/

> +  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
> +     (unspec_volatile:XO [(const_int 0)]
> +                         UNSPECV_MMA_XXSETACCZ))]
> +  "TARGET_MMA && !TARGET_DENSE_MATH"
>    "xxsetaccz %A0"
>    [(set_attr "type" "mma")])
>  
> +
> +(define_insn "mma_xxsetaccz_dm"
> +  [(set (match_operand:XO 0 "dmr_operand" "=wD")
> +     (unspec:XO [(const_int 0)]
> +                UNSPECV_MMA_XXSETACCZ))]
> +  "TARGET_DENSE_MATH"
> +  "dmsetdmrz %0"
> +  [(set_attr "type" "mma")])
> +
>  (define_insn "mma_<vv>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
>                   MMA_VV))]
>    "TARGET_MMA"
>    "<vv> %A0,%x1,%x2"
> -  [(set_attr "type" "mma")])
> +  [(set_attr "type" "mma")
> +   (set_attr "isa" "dm,not_dm,not_dm")])

Like what's suggested in previous patches, s/not_dm/nodm/

The others look good to me, thanks!

BR,
Kewen

>  
>  (define_insn "mma_<avv>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
> -                 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
> +                 (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
>                   MMA_AVV))]
>    "TARGET_MMA"
>    "<avv> %A0,%x2,%x3"
> -  [(set_attr "type" "mma")])
> +  [(set_attr "type" "mma")
> +   (set_attr "isa" "dm,not_dm,not_dm")])
>  
>  (define_insn "mma_<pv>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
>                   MMA_PV))]
>    "TARGET_MMA"
>    "<pv> %A0,%x1,%x2"
> -  [(set_attr "type" "mma")])
> +  [(set_attr "type" "mma")
> +   (set_attr "isa" "dm,not_dm,not_dm")])
>  
>  (define_insn "mma_<apv>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
> -                 (match_operand:OO 2 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
> +                 (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
>                   MMA_APV))]
>    "TARGET_MMA"
>    "<apv> %A0,%x2,%x3"
> -  [(set_attr "type" "mma")])
> +  [(set_attr "type" "mma")
> +   (set_attr "isa" "dm,not_dm,not_dm")])
>  
>  (define_insn "mma_<vvi4i4i8>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
> -                 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
>                   MMA_VVI4I4I8))]
>    "TARGET_MMA"
>    "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
>    [(set_attr "type" "mma")
> -   (set_attr "prefixed" "yes")])
> +   (set_attr "prefixed" "yes")
> +   (set_attr "isa" "dm,not_dm,not_dm")])
>  
>  (define_insn "mma_<avvi4i4i8>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
> -                 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
> -                 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 5 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
> +                 (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 6 "u8bit_cint_operand" "n,n,n")]
>                   MMA_AVVI4I4I8))]
>    "TARGET_MMA"
>    "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
>    [(set_attr "type" "mma")
> -   (set_attr "prefixed" "yes")])
> +   (set_attr "prefixed" "yes")
> +   (set_attr "isa" "dm,not_dm,not_dm")])
>  
>  (define_insn "mma_<vvi4i4i2>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
> -                 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
>                   MMA_VVI4I4I2))]
>    "TARGET_MMA"
>    "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
>    [(set_attr "type" "mma")
> -   (set_attr "prefixed" "yes")])
> +   (set_attr "prefixed" "yes")
> +   (set_attr "isa" "dm,not_dm,not_dm")])
>  
>  (define_insn "mma_<avvi4i4i2>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
> -                 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
> -                 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 5 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
> +                 (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
>                   MMA_AVVI4I4I2))]
>    "TARGET_MMA"
>    "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
>    [(set_attr "type" "mma")
> -   (set_attr "prefixed" "yes")])
> +   (set_attr "prefixed" "yes")
> +   (set_attr "isa" "dm,not_dm,not_dm")])
>  
>  (define_insn "mma_<vvi4i4>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
> -                 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
>                   MMA_VVI4I4))]
>    "TARGET_MMA"
>    "<vvi4i4> %A0,%x1,%x2,%3,%4"
>    [(set_attr "type" "mma")
> -   (set_attr "prefixed" "yes")])
> +   (set_attr "prefixed" "yes")
> +   (set_attr "isa" "dm,not_dm,not_dm")])
>  
>  (define_insn "mma_<avvi4i4>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
> -                 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
> -                 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
> +                 (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
>                   MMA_AVVI4I4))]
>    "TARGET_MMA"
>    "<avvi4i4> %A0,%x2,%x3,%4,%5"
>    [(set_attr "type" "mma")
> -   (set_attr "prefixed" "yes")])
> +   (set_attr "prefixed" "yes")
> +   (set_attr "isa" "dm,not_dm,not_dm")])
>  
>  (define_insn "mma_<pvi4i2>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
> -                 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
>                   MMA_PVI4I2))]
>    "TARGET_MMA"
>    "<pvi4i2> %A0,%x1,%x2,%3,%4"
>    [(set_attr "type" "mma")
> -   (set_attr "prefixed" "yes")])
> +   (set_attr "prefixed" "yes")
> +   (set_attr "isa" "dm,not_dm,not_dm")])
>  
>  (define_insn "mma_<apvi4i2>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
> -                 (match_operand:OO 2 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
> -                 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
> +                 (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
>                   MMA_APVI4I2))]
>    "TARGET_MMA"
>    "<apvi4i2> %A0,%x2,%x3,%4,%5"
>    [(set_attr "type" "mma")
> -   (set_attr "prefixed" "yes")])
> +   (set_attr "prefixed" "yes")
> +   (set_attr "isa" "dm,not_dm,not_dm")])
>  
>  (define_insn "mma_<vvi4i4i4>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
> -                 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
>                   MMA_VVI4I4I4))]
>    "TARGET_MMA"
>    "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
>    [(set_attr "type" "mma")
> -   (set_attr "prefixed" "yes")])
> +   (set_attr "prefixed" "yes")
> +   (set_attr "isa" "dm,not_dm,not_dm")])
>  
>  (define_insn "mma_<avvi4i4i4>"
> -  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
> -     (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
> -                 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
> -                 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
> -                 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 5 "const_0_to_15_operand" "n,n")
> -                 (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
> +  [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d")
> +     (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0")
> +                 (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")
> +                 (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")
> +                 (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
>                   MMA_AVVI4I4I4))]
>    "TARGET_MMA"
>    "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
>    [(set_attr "type" "mma")
> -   (set_attr "prefixed" "yes")])
> +   (set_attr "prefixed" "yes")
> +   (set_attr "isa" "dm,not_dm,not_dm")])
> diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
> index f2fb5bef678..4342620f87f 100644
> --- a/gcc/config/rs6000/rs6000-c.cc
> +++ b/gcc/config/rs6000/rs6000-c.cc
> @@ -600,6 +600,9 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
> flags)
>    /* Tell the user if we support the MMA instructions.  */
>    if ((flags & OPTION_MASK_MMA) != 0)
>      rs6000_define_or_undefine_macro (define_p, "__MMA__");
> +  /* Tell the user if we support the dense math instructions.  */
> +  if ((flags & OPTION_MASK_DENSE_MATH) != 0)
> +    rs6000_define_or_undefine_macro (define_p, "__PPC_DMR__");
>    /* Whether pc-relative code is being generated.  */
>    if ((flags & OPTION_MASK_PCREL) != 0)
>      rs6000_define_or_undefine_macro (define_p, "__PCREL__");
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 83e32f7a43a..59517c8608d 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -14264,8 +14264,13 @@ print_operand (FILE *file, rtx x, int code)
>        overlapping with the FPR registers.  */
>        if (!REG_P (x))
>       output_operand_lossage ("invalid %%A value");
> -      else if (TARGET_DENSE_MATH && DMR_REGNO_P (REGNO (x)))
> -     fprintf (file, "%d", REGNO (x) - FIRST_DMR_REGNO);
> +      else if (TARGET_DENSE_MATH)
> +     {
> +       if (DMR_REGNO_P (REGNO (x)))
> +         fprintf (file, "%d", REGNO (x) - FIRST_DMR_REGNO);
> +       else
> +         output_operand_lossage ("%%A operand is not a DMR");
> +     }
>        else if (!FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
>       output_operand_lossage ("invalid %%A value");
>        else
> @@ -27719,7 +27724,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
>  
>         /* If we are reading an accumulator register, we have to
>            deprime it before we can access it.  */
> -       if (TARGET_MMA
> +       if (TARGET_MMA && !TARGET_DENSE_MATH
>             && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
>           emit_insn (gen_mma_xxmfacc (src, src));
>  
> @@ -27751,9 +27756,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
>             emit_insn (gen_rtx_SET (dst2, src2));
>           }
>  
> -       /* If we are writing an accumulator register, we have to
> -          prime it after we've written it.  */
> -       if (TARGET_MMA
> +       /* If we are writing an accumulator register that overlaps with the
> +          FPR registers, we have to prime it after we've written it.  */
> +       if (TARGET_MMA && !TARGET_DENSE_MATH
>             && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
>           emit_insn (gen_mma_xxmtacc (dst, dst));
>  
> @@ -27822,9 +27827,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
>             emit_insn (gen_rtx_SET (dst_i, op));
>           }
>  
> -       /* We are writing an accumulator register, so we have to
> -          prime it after we've written it.  */
> -       if (GET_MODE (src) == XOmode)
> +       /* On systems without dense math where accumulators overlap with the
> +          vector registers, we have to prime it after we've written it.  */
> +       if (GET_MODE (src) == XOmode && !TARGET_DENSE_MATH)
>           emit_insn (gen_mma_xxmtacc (dst, dst));
>  
>         return;
> @@ -27835,9 +27840,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
>  
>    if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
>      {
> -      /* If we are reading an accumulator register, we have to
> -      deprime it before we can access it.  */
> -      if (TARGET_MMA
> +      /* If we are reading an accumulator register and we don't have dense
> +      math, we have to deprime it before we can access it.  */
> +      if (TARGET_MMA && !TARGET_DENSE_MATH
>         && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
>       emit_insn (gen_mma_xxmfacc (src, src));
>  
> @@ -27865,7 +27870,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
>  
>        /* If we are writing an accumulator register, we have to
>        prime it after we've written it.  */
> -      if (TARGET_MMA
> +      if (TARGET_MMA && !TARGET_DENSE_MATH
>         && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
>       emit_insn (gen_mma_xxmtacc (dst, dst));
>      }
> @@ -28002,7 +28007,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
>  
>        /* If we are reading an accumulator register, we have to
>        deprime it before we can access it.  */
> -      if (TARGET_MMA && REG_P (src)
> +      if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (src)
>         && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
>       emit_insn (gen_mma_xxmfacc (src, src));
>  
> @@ -28034,7 +28039,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
>  
>        /* If we are writing an accumulator register, we have to
>        prime it after we've written it.  */
> -      if (TARGET_MMA && REG_P (dst)
> +      if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (dst)
>         && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
>       emit_insn (gen_mma_xxmtacc (dst, dst));
>  
≈

Reply via email to