This patch gives GCC to use the accumulator VGPR registers on CDNA1 and later architectures. The backend does not yet attempt to make use of the matrix acceleration instructions, but the new registers are still useful as fast space for register spills. And they can now be used in inline assembly statements.

I haven't written a dedicated testcase for this - just building libgcc and libgfortran seems to have thoroughly exercised the code paths involved.

I have a test run in progress - assuming that this doesn't find any breakage, OK to commit?

gcc/ChangeLog:

    * config/gcn/constraints.md: Add AVGPR constraints.
    * config/gcn/gcn-valu.md (*mov<mode>, mov<mode>_sgprbase)
    (reload_in<mode>, reload_out<mode>): Add AVGPR alternatives.
    (gather<mode>_insn_1offset<exec>, gather<mode>_insn_1offset_ds<exec>)
    (gather<mode>_insn_2offsets<exec>)
    (scatter_store<mode>_insn_1offset<exec_scatter)
    (scatter<mode>_insn_1offset_ds<exec_scatter>)
    (scatter<mode>_insn_2offsets<exec_scatter>): Allow use of AVGPRs.
    * config/gcn/gcn.cc (MAX_NORMAL_AVGPR_COUNT): Define.
    (gcn_class_max_nregs): Handle AVGPR_REGS.
    (gcn_hard_regno_mode_ok): Likewise.
    (gcn_spill_class): Allow spilling to AVGPRs on TARGET_CDNA2_PLUS.
    (gcn_sgpr_move_p): Handle AVGPRs.
    (gcn_secondary_reload): Reload AVGPRs via VGPRs.
    (gcn_conditional_register_usage): Handle AVGPRs.
    (gcn_vgpr_equivalent_register_operand): New function.
    (gcn_valid_move_p): Check for validity of AVGPR moves.
    (gcn_memory_move_cost): Handle AVGPRs.
    (gcn_register_move_cost): Liekwise.
    (gcn_vmem_insn_p): Handle TYPE_VOP3P_MAI.
    (gcn_hsa_declare_function_name): Handle AVGPRs.
    (print_reg): Likewise.
    (gcn_dwarf_register_numbe): Likewise.
    * config/gcn/gcn.h (FIRST_AVGPR_REG, AVGPR_REGNO, LAST_AVGPR_REG):
    Define.
    (SOFT_ARG_REG, FRAME_POINTER_REGNUM, DWARF_LINK_REGISTER)
    (FIRST_PSEUDO_REGISTER): Update.
    (AVGPR_REGNO_P): Define.
    (FIXED_REGISTERS, CALL_USED_REGISTERS): Add AVGPRs.
    (enum reg_class, REG_CLASS_NAMES): Add AVGPR_REGS and ALL_VGPR_REGS.
    (REG_CLASS_CONTENTS): Add new register classes and add entries for
    AVGPRs to all classes.
    (REGISTER_NAMES): Add AVGPRs.
    * config/gcn/gcn.md (FIRST_AVGPR_REG, LAST_AVGPR_REG): Define.
    (AP_REGNUM, FP_REGNUM): Update.
    (define_attr "type"): Add vop3p_mai.
    (*mov<mode>_insn, *movti_insn): Add AVGPR alternatives.
    * gcc/config/gcn/predicates.md (gcn_avgpr_register_operand)
    (gcn_avgpr_hard_register_operand): New predicates.
diff --git a/gcc/config/gcn/constraints.md b/gcc/config/gcn/constraints.md
index efe462a0bd6..33fbce552ca 100644
--- a/gcc/config/gcn/constraints.md
+++ b/gcc/config/gcn/constraints.md
@@ -77,6 +77,11 @@
 (define_register_constraint "v" "VGPR_REGS"
   "VGPR registers")
 
+(define_register_constraint "a" "TARGET_CDNA1_PLUS ? AVGPR_REGS : NO_REGS"
+  "Accumulator VGPR registers")
+
+(define_register_constraint "b" "TARGET_CDNA2_PLUS ? AVGPR_REGS : NO_REGS")
+
 (define_register_constraint "Sg" "SGPR_REGS"
   "SGPR registers")
 
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 75e9a59600b..6e9a8463e34 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -389,12 +389,17 @@
    (set_attr "length" "0")])
 
 (define_insn "*mov<mode>"
-  [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v")
-       (match_operand:V_1REG 1 "general_operand"      "vA,B"))]
+  [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v,$a, b")
+       (match_operand:V_1REG 1 "general_operand"      "vA,B, a, v, b"))]
   ""
-  "v_mov_b32\t%0, %1"
-  [(set_attr "type" "vop1,vop1")
-   (set_attr "length" "4,8")])
+  "@
+   v_mov_b32\t%0, %1
+   v_mov_b32\t%0, %1
+   v_accvgpr_read_b32\t%0, %1
+   v_accvgpr_write_b32\t%0, %1
+   v_accvgpr_mov_b32\t%0, %1"
+  [(set_attr "type" "vop1,vop1,vop3p_mai,vop3p_mai,vop1")
+   (set_attr "length" "4,8,8,8,4")])
 
 (define_insn "mov<mode>_exec"
   [(set (match_operand:V_1REG 0 "nonimmediate_operand"  "=v, v, v, v, v, m")
@@ -435,17 +440,28 @@
 ;   (set_attr "length" "4,8,16,16")])
 
 (define_insn "*mov<mode>"
-  [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v")
-       (match_operand:V_2REG 1 "general_operand"      "vDB"))]
+  [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v,  v,$a, b")
+       (match_operand:V_2REG 1 "general_operand"      "vDB, a, v, b"))]
   ""
-  {
-    if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
-      return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
-    else
-      return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
-  }
-  [(set_attr "type" "vmult")
-   (set_attr "length" "16")])
+  "@
+   * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
+       return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
+     else \
+       return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
+   * if (REGNO (operands[0]) <= REGNO (operands[1])) \
+       return \"v_accvgpr_read_b32\t%L0, %L1\;v_accvgpr_read_b32\t%H0, %H1\"; \
+     else \
+       return \"v_accvgpr_read_b32\t%H0, %H1\;v_accvgpr_read_b32\t%L0, %L1\";
+   * if (REGNO (operands[0]) <= REGNO (operands[1])) \
+       return \"v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, 
%H1\"; \
+     else \
+       return \"v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%L0, %L1\";
+   * if (REGNO (operands[0]) <= REGNO (operands[1])) \
+       return \"v_accvgpr_mov_b32\t%L0, %L1\;v_accvgpr_mov_b32\t%H0, %H1\"; \
+     else \
+       return \"v_accvgpr_mov_b32\t%H0, %H1\;v_accvgpr_mov_b32\t%L0, %L1\";"
+  [(set_attr "type" "vmult,vmult,vmult,vmult")
+   (set_attr "length" "16,16,16,8")])
 
 (define_insn "mov<mode>_exec"
   [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v,   v,   v, v, m")
@@ -521,26 +537,28 @@
 ;   flat_load v, vT
 
 (define_insn "mov<mode>_sgprbase"
-  [(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m")
+  [(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m, b, m")
        (unspec:V_1REG
-         [(match_operand:V_1REG 1 "general_operand"   " vA,vB, m, v")]
+         [(match_operand:V_1REG 1 "general_operand"   " vA,vB, m, v, m, b")]
          UNSPEC_SGPRBASE))
-   (clobber (match_operand:<VnDI> 2 "register_operand"  "=&v,&v,&v,&v"))]
+   (clobber (match_operand:<VnDI> 2 "register_operand"  "=&v,&v,&v,&v,&v,&v"))]
   "lra_in_progress || reload_completed"
   "@
    v_mov_b32\t%0, %1
    v_mov_b32\t%0, %1
    #
+   #
+   #
    #"
-  [(set_attr "type" "vop1,vop1,*,*")
-   (set_attr "length" "4,8,12,12")])
+  [(set_attr "type" "vop1,vop1,*,*,*,*")
+   (set_attr "length" "4,8,12,12,12,12")])
 
 (define_insn "mov<mode>_sgprbase"
-  [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m")
+  [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m, b, m")
        (unspec:V_2REG
-         [(match_operand:V_2REG 1 "general_operand"   "vDB, m, v")]
+         [(match_operand:V_2REG 1 "general_operand"   "vDB, m, v, m, b")]
          UNSPEC_SGPRBASE))
-   (clobber (match_operand:<VnDI> 2 "register_operand"  "=&v,&v,&v"))]
+   (clobber (match_operand:<VnDI> 2 "register_operand"  "=&v,&v,&v,&v,&v"))]
   "lra_in_progress || reload_completed"
   "@
    * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
@@ -548,17 +566,19 @@
      else \
        return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
    #
+   #
+   #
    #"
-  [(set_attr "type" "vmult,*,*")
-   (set_attr "length" "8,12,12")])
+  [(set_attr "type" "vmult,*,*,*,*")
+   (set_attr "length" "8,12,12,12,12")])
 
 ; reload_in was once a standard name, but here it's only referenced by
 ; gcn_secondary_reload.  It allows a reload with a scratch register.
 
 (define_expand "reload_in<mode>"
-  [(set (match_operand:V_ALL 0 "register_operand"     "= v")
-       (match_operand:V_ALL 1 "memory_operand"       "  m"))
-   (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
+  [(set (match_operand:V_ALL 0 "register_operand"     "= v, b")
+       (match_operand:V_ALL 1 "memory_operand"       "  m, m"))
+   (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v"))]
   ""
   {
     emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
@@ -568,9 +588,9 @@
 ; reload_out is similar to reload_in, above.
 
 (define_expand "reload_out<mode>"
-  [(set (match_operand:V_ALL 0 "memory_operand"              "= m")
-       (match_operand:V_ALL 1 "register_operand"     "  v"))
-   (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
+  [(set (match_operand:V_ALL 0 "memory_operand"              "= m, m")
+       (match_operand:V_ALL 1 "register_operand"     "  v, b"))
+   (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v"))]
   ""
   {
     emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
@@ -953,7 +973,7 @@
     {})
 
 (define_insn "gather<mode>_insn_1offset<exec>"
-  [(set (match_operand:V_ALL 0 "register_operand"                 "=v")
+  [(set (match_operand:V_ALL 0 "register_operand"                 "=vb")
        (unspec:V_ALL
          [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
                        (vec_duplicate:<VnDI>
@@ -992,7 +1012,7 @@
    (set_attr "length" "12")])
 
 (define_insn "gather<mode>_insn_1offset_ds<exec>"
-  [(set (match_operand:V_ALL 0 "register_operand"                 "=v")
+  [(set (match_operand:V_ALL 0 "register_operand"                 "=vb")
        (unspec:V_ALL
          [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v")
                        (vec_duplicate:<VnSI>
@@ -1014,7 +1034,7 @@
    (set_attr "length" "12")])
 
 (define_insn "gather<mode>_insn_2offsets<exec>"
-  [(set (match_operand:V_ALL 0 "register_operand"                      "=v")
+  [(set (match_operand:V_ALL 0 "register_operand"                      "=vb")
        (unspec:V_ALL
          [(plus:<VnDI>
             (plus:<VnDI>
@@ -1085,7 +1105,7 @@
          [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v")
                        (vec_duplicate:<VnDI>
                          (match_operand 1 "immediate_operand"     "n")))
-          (match_operand:V_ALL 2 "register_operand"               "v")
+          (match_operand:V_ALL 2 "register_operand"               "vb")
           (match_operand 3 "immediate_operand"                    "n")
           (match_operand 4 "immediate_operand"                    "n")]
          UNSPEC_SCATTER))]
@@ -1123,7 +1143,7 @@
          [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v")
                        (vec_duplicate:<VnSI>
                          (match_operand 1 "immediate_operand"     "n")))
-          (match_operand:V_ALL 2 "register_operand"               "v")
+          (match_operand:V_ALL 2 "register_operand"               "vb")
           (match_operand 3 "immediate_operand"                    "n")
           (match_operand 4 "immediate_operand"                    "n")]
          UNSPEC_SCATTER))]
@@ -1149,7 +1169,7 @@
               (sign_extend:<VnDI>
                 (match_operand:<VnSI> 1 "register_operand"             " v")))
             (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n")))
-          (match_operand:V_ALL 3 "register_operand"                    " v")
+          (match_operand:V_ALL 3 "register_operand"                    " vb")
           (match_operand 4 "immediate_operand"                         " n")
           (match_operand 5 "immediate_operand"                         " n")]
          UNSPEC_SCATTER))]
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 3d71c2f6526..64bcc897603 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -96,6 +96,7 @@ static hash_map<tree, int> lds_allocs;
 
 #define MAX_NORMAL_SGPR_COUNT  62  // i.e. 64 with VCC
 #define MAX_NORMAL_VGPR_COUNT  24
+#define MAX_NORMAL_AVGPR_COUNT 24
 
 /* }}}  */
 /* {{{ Initialization and options.  */
@@ -480,7 +481,7 @@ gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
 {
   /* Scalar registers are 32bit, vector registers are in fact tuples of
      64 lanes.  */
-  if (rclass == VGPR_REGS)
+  if (rclass == VGPR_REGS || rclass == AVGPR_REGS)
     {
       if (vgpr_1reg_mode_p (mode))
        return 1;
@@ -571,7 +572,7 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode 
mode)
     return (sgpr_1reg_mode_p (mode)
            || (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
            || (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
-  if (VGPR_REGNO_P (regno))
+  if (VGPR_REGNO_P (regno) || (AVGPR_REGNO_P (regno) && TARGET_CDNA1_PLUS))
     /* Vector instructions do not care about the alignment of register
        pairs, but where there is no 64-bit instruction, many of the
        define_split do not work if the input and output registers partially
@@ -611,6 +612,8 @@ gcn_regno_reg_class (int regno)
     }
   if (VGPR_REGNO_P (regno))
     return VGPR_REGS;
+  if (AVGPR_REGNO_P (regno))
+    return AVGPR_REGS;
   if (SGPR_REGNO_P (regno))
     return SGPR_REGS;
   if (regno < FIRST_VGPR_REG)
@@ -801,7 +804,7 @@ gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
       || c == VCC_CONDITIONAL_REG || c == EXEC_MASK_REG)
     return SGPR_REGS;
   else
-    return NO_REGS;
+    return c == VGPR_REGS && TARGET_CDNA2_PLUS ? AVGPR_REGS : NO_REGS;
 }
 
 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
@@ -2363,11 +2366,11 @@ gcn_sgpr_move_p (rtx op0, rtx op1)
   if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
     return true;
   if (!REG_P (op0) || REGNO (op0) >= FIRST_PSEUDO_REGISTER
-      || VGPR_REGNO_P (REGNO (op0)))
+      || VGPR_REGNO_P (REGNO (op0)) || AVGPR_REGNO_P (REGNO (op0)))
     return false;
   if (REG_P (op1)
       && REGNO (op1) < FIRST_PSEUDO_REGISTER
-      && !VGPR_REGNO_P (REGNO (op1)))
+      && !VGPR_REGNO_P (REGNO (op1)) && !AVGPR_REGNO_P (REGNO (op1)))
     return true;
   return immediate_operand (op1, VOIDmode) || memory_operand (op1, VOIDmode);
 }
@@ -2441,6 +2444,14 @@ gcn_secondary_reload (bool in_p, rtx x, reg_class_t 
rclass,
          result = (rclass == VGPR_REGS ? NO_REGS : VGPR_REGS);
          break;
        }
+
+      /* CDNA1 doesn't have an instruction for going between the accumulator
+        registers and memory.  Go via a VGPR in this case.  */
+      if (TARGET_CDNA1 && rclass == AVGPR_REGS && result != VGPR_REGS)
+       {
+         result = VGPR_REGS;
+         sri->icode = CODE_FOR_nothing;
+       }
     }
 
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -2462,7 +2473,8 @@ gcn_conditional_register_usage (void)
 
   if (cfun->machine->normal_function)
     {
-      /* Restrict the set of SGPRs and VGPRs used by non-kernel functions.  */
+      /* Restrict the set of SGPRs, VGPRs and AVGPRs used by non-kernel
+        functions.  */
       for (int i = SGPR_REGNO (MAX_NORMAL_SGPR_COUNT);
           i <= LAST_SGPR_REG; i++)
        fixed_regs[i] = 1, call_used_regs[i] = 1;
@@ -2471,6 +2483,9 @@ gcn_conditional_register_usage (void)
           i <= LAST_VGPR_REG; i++)
        fixed_regs[i] = 1, call_used_regs[i] = 1;
 
+      for (int i = AVGPR_REGNO (MAX_NORMAL_AVGPR_COUNT);
+          i <= LAST_AVGPR_REG; i++)
+       fixed_regs[i] = 1, call_used_regs[i] = 1;
       return;
     }
 
@@ -2524,6 +2539,16 @@ gcn_conditional_register_usage (void)
     fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG]] = 1;
 }
 
+static bool
+gcn_vgpr_equivalent_register_operand (rtx x, machine_mode mode)
+{
+  if (gcn_vgpr_register_operand (x, mode))
+    return true;
+  if (TARGET_CDNA2_PLUS && gcn_avgpr_register_operand (x, mode))
+    return true;
+  return false;
+}
+
 /* Determine if a load or store is valid, according to the register classes
    and address space.  Used primarily by the machine description to decide
    when to split a move into two steps.  */
@@ -2532,21 +2557,35 @@ bool
 gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
 {
   if (!MEM_P (dest) && !MEM_P (src))
-    return true;
+    {
+      if (gcn_vgpr_register_operand (src, mode) &&
+         gcn_avgpr_register_operand (dest, mode))
+       return true;
+      if (gcn_avgpr_register_operand (src, mode) &&
+         gcn_vgpr_register_operand (dest, mode))
+       return true;
+      if (TARGET_CDNA2_PLUS && gcn_avgpr_register_operand (src, mode) &&
+         gcn_avgpr_register_operand (dest, mode))
+       return true;
+      if (gcn_avgpr_hard_register_operand (src, mode) ||
+         gcn_avgpr_hard_register_operand (dest, mode))
+       return false;
+      return true;
+    }
 
   if (MEM_P (dest)
       && AS_FLAT_P (MEM_ADDR_SPACE (dest))
       && (gcn_flat_address_p (XEXP (dest, 0), mode)
          || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
          || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
-      && gcn_vgpr_register_operand (src, mode))
+      && gcn_vgpr_equivalent_register_operand (src, mode))
     return true;
   else if (MEM_P (src)
           && AS_FLAT_P (MEM_ADDR_SPACE (src))
           && (gcn_flat_address_p (XEXP (src, 0), mode)
               || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
               || GET_CODE (XEXP (src, 0)) == LABEL_REF)
-          && gcn_vgpr_register_operand (dest, mode))
+          && gcn_vgpr_equivalent_register_operand (dest, mode))
     return true;
 
   if (MEM_P (dest)
@@ -2554,14 +2593,14 @@ gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
       && (gcn_global_address_p (XEXP (dest, 0))
          || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
          || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
-      && gcn_vgpr_register_operand (src, mode))
+      && gcn_vgpr_equivalent_register_operand (src, mode))
     return true;
   else if (MEM_P (src)
           && AS_GLOBAL_P (MEM_ADDR_SPACE (src))
           && (gcn_global_address_p (XEXP (src, 0))
               || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
               || GET_CODE (XEXP (src, 0)) == LABEL_REF)
-          && gcn_vgpr_register_operand (dest, mode))
+          && gcn_vgpr_equivalent_register_operand (dest, mode))
     return true;
 
   if (MEM_P (dest)
@@ -2582,12 +2621,12 @@ gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
   if (MEM_P (dest)
       && AS_ANY_DS_P (MEM_ADDR_SPACE (dest))
       && gcn_ds_address_p (XEXP (dest, 0))
-      && gcn_vgpr_register_operand (src, mode))
+      && gcn_vgpr_equivalent_register_operand (src, mode))
     return true;
   else if (MEM_P (src)
           && AS_ANY_DS_P (MEM_ADDR_SPACE (src))
           && gcn_ds_address_p (XEXP (src, 0))
-          && gcn_vgpr_register_operand (dest, mode))
+          && gcn_vgpr_equivalent_register_operand (dest, mode))
     return true;
 
   return false;
@@ -3919,6 +3958,11 @@ gcn_memory_move_cost (machine_mode mode, reg_class_t 
regclass, bool in)
       if (in)
        return (LOAD_COST + 2) * nregs;
       return STORE_COST * nregs;
+    case AVGPR_REGS:
+    case ALL_VGPR_REGS:
+      if (in)
+       return (LOAD_COST + (TARGET_CDNA2_PLUS ? 2 : 4)) * nregs;
+      return (STORE_COST + (TARGET_CDNA2_PLUS ? 0 : 2)) * nregs;
     case ALL_REGS:
     case ALL_GPR_REGS:
     case SRCDST_REGS:
@@ -3938,6 +3982,15 @@ gcn_memory_move_cost (machine_mode mode, reg_class_t 
regclass, bool in)
 static int
 gcn_register_move_cost (machine_mode, reg_class_t dst, reg_class_t src)
 {
+  if (src == AVGPR_REGS)
+    {
+      if (dst == AVGPR_REGS)
+       return TARGET_CDNA1 ? 6 : 2;
+      if (dst != VGPR_REGS)
+       return 6;
+    }
+  if (dst == AVGPR_REGS && src != VGPR_REGS)
+    return 6;
   /* Increase cost of moving from and to vector registers.  While this is
      fast in hardware (I think), it has hidden cost of setting up the exec
      flags.  */
@@ -5372,6 +5425,7 @@ gcn_vmem_insn_p (attr_type type)
     case TYPE_VOPC:
     case TYPE_VOP3A:
     case TYPE_VOP3B:
+    case TYPE_VOP3P_MAI:
     case TYPE_VOP_SDWA:
     case TYPE_VOP_DPP:
     case TYPE_MULT:
@@ -6098,7 +6152,7 @@ output_file_start (void)
 void
 gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
 {
-  int sgpr, vgpr;
+  int sgpr, vgpr, avgpr;
   bool xnack_enabled = false;
 
   fputs ("\n\n", file);
@@ -6123,6 +6177,10 @@ gcn_hsa_declare_function_name (FILE *file, const char 
*name, tree)
     if (df_regs_ever_live_p (FIRST_VGPR_REG + vgpr))
       break;
   vgpr++;
+  for (avgpr = 255; avgpr >= 0; avgpr--)
+    if (df_regs_ever_live_p (FIRST_AVGPR_REG + avgpr))
+      break;
+  avgpr++;
 
   if (!leaf_function_p ())
     {
@@ -6131,6 +6189,8 @@ gcn_hsa_declare_function_name (FILE *file, const char 
*name, tree)
        vgpr = MAX_NORMAL_VGPR_COUNT;
       if (sgpr < MAX_NORMAL_SGPR_COUNT)
        sgpr = MAX_NORMAL_SGPR_COUNT;
+      if (avgpr < MAX_NORMAL_AVGPR_COUNT)
+       avgpr = MAX_NORMAL_AVGPR_COUNT;
     }
 
   /* The gfx90a accum_offset field can't represent 0 registers.  */
@@ -6234,8 +6294,8 @@ gcn_hsa_declare_function_name (FILE *file, const char 
*name, tree)
           cfun->machine->kernarg_segment_alignment,
           LDS_SIZE,
           sgpr, vgpr);
-  if (gcn_arch == PROCESSOR_GFX90a)
-    fprintf (file, "            .agpr_count: 0\n"); // AGPRs are not used, yet
+  if (gcn_arch == PROCESSOR_GFX90a || gcn_arch == PROCESSOR_GFX908)
+    fprintf (file, "            .agpr_count: %i\n", avgpr);
   fputs ("        .end_amdgpu_metadata\n", file);
 #endif
 
@@ -6331,6 +6391,9 @@ print_reg (FILE *file, rtx x)
       else if (VGPR_REGNO_P (REGNO (x)))
        fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
                 REGNO (x) - FIRST_VGPR_REG + 1);
+      else if (AVGPR_REGNO_P (REGNO (x)))
+       fprintf (file, "a[%i:%i]", REGNO (x) - FIRST_AVGPR_REG,
+                REGNO (x) - FIRST_AVGPR_REG + 1);
       else if (REGNO (x) == FLAT_SCRATCH_REG)
        fprintf (file, "flat_scratch");
       else if (REGNO (x) == EXEC_REG)
@@ -6349,6 +6412,9 @@ print_reg (FILE *file, rtx x)
       else if (VGPR_REGNO_P (REGNO (x)))
        fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
                 REGNO (x) - FIRST_VGPR_REG + 3);
+      else if (AVGPR_REGNO_P (REGNO (x)))
+       fprintf (file, "a[%i:%i]", REGNO (x) - FIRST_AVGPR_REG,
+                REGNO (x) - FIRST_AVGPR_REG + 3);
       else
        gcc_unreachable ();
     }
@@ -7262,6 +7328,8 @@ gcn_dwarf_register_number (unsigned int regno)
     }
   else if (VGPR_REGNO_P (regno))
     return (regno - FIRST_VGPR_REG + 2560);
+  else if (AVGPR_REGNO_P (regno))
+    return (regno - FIRST_AVGPR_REG + 3072);
 
   /* Otherwise, there's nothing sensible to do.  */
   return regno + 100000;
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index 4ff9a5d4d12..ff44c66c57c 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -142,6 +142,9 @@
 #define FIRST_VGPR_REG     160
 #define VGPR_REGNO(N)      ((N)+FIRST_VGPR_REG)
 #define LAST_VGPR_REG      415
+#define FIRST_AVGPR_REG     416
+#define AVGPR_REGNO(N)      ((N)+FIRST_AVGPR_REG)
+#define LAST_AVGPR_REG      671
 
 /* Frame Registers, and other registers */
 
@@ -153,10 +156,10 @@
 #define RETURN_VALUE_REG         168   /* Must be divisible by 4.  */
 #define STATIC_CHAIN_REGNUM      30
 #define WORK_ITEM_ID_Z_REG       162
-#define SOFT_ARG_REG             416
-#define FRAME_POINTER_REGNUM     418
-#define DWARF_LINK_REGISTER      420
-#define FIRST_PSEUDO_REGISTER    421
+#define SOFT_ARG_REG             672
+#define FRAME_POINTER_REGNUM     674
+#define DWARF_LINK_REGISTER      676
+#define FIRST_PSEUDO_REGISTER    677
 
 #define FIRST_PARM_REG (FIRST_SGPR_REG + 24)
 #define FIRST_VPARM_REG (FIRST_VGPR_REG + 8)
@@ -172,6 +175,7 @@
 #define SGPR_OR_VGPR_REGNO_P(N) ((N)>=FIRST_VGPR_REG && (N) <= LAST_SGPR_REG)
 #define SGPR_REGNO_P(N)                ((N) <= LAST_SGPR_REG)
 #define VGPR_REGNO_P(N)                ((N)>=FIRST_VGPR_REG && (N) <= 
LAST_VGPR_REG)
+#define AVGPR_REGNO_P(N)        ((N)>=FIRST_AVGPR_REG && (N) <= LAST_AVGPR_REG)
 #define SSRC_REGNO_P(N)                ((N) <= SCC_REG && (N) != VCCZ_REG)
 #define SDST_REGNO_P(N)                ((N) <= EXEC_HI_REG && (N) != VCCZ_REG)
 #define CC_REG_P(X)            (REG_P (X) && CC_REGNO_P (REGNO (X)))
@@ -202,7 +206,7 @@
     1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, \
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1,                  \
-    /* VGRPs */                                            \
+    /* VGPRs */                                            \
     0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
@@ -219,6 +223,23 @@
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    /* Accumulation VGPRs */                       \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     /* Other registers.  */                        \
     1, 1, 1, 1, 1                                  \
 }
@@ -240,7 +261,7 @@
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1,                  \
-    /* VGRPs */                                            \
+    /* VGPRs */                                            \
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
@@ -257,6 +278,23 @@
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    /* Accumulation VGPRs */                       \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
     /* Other registers.  */                        \
     1, 1, 1, 1, 1                                  \
 }
@@ -316,6 +354,8 @@ enum reg_class
   SGPR_SRC_REGS,
   GENERAL_REGS,
   VGPR_REGS,
+  AVGPR_REGS,
+  ALL_VGPR_REGS,
   ALL_GPR_REGS,
   SRCDST_REGS,
   AFP_REGS,
@@ -341,6 +381,8 @@ enum reg_class
    "SGPR_SRC_REGS",        \
    "GENERAL_REGS",         \
    "VGPR_REGS",                    \
+   "AVGPR_REGS",           \
+   "ALL_VGPR_REGS",        \
    "ALL_GPR_REGS",         \
    "SRCDST_REGS",          \
    "AFP_REGS",             \
@@ -353,40 +395,58 @@ enum reg_class
 #define REG_CLASS_CONTENTS {                                              \
     /* NO_REGS.  */                                                       \
     {0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
      0, 0, 0, 0,                                                          \
      0, 0, 0, 0, 0, 0},                                                        
   \
     /* SCC_CONDITIONAL_REG.  */                                                
   \
     {0, 0, 0, 0,                                                          \
      NAMED_REG_MASK2 (SCC_REG), 0, 0, 0,                                  \
-     0, 0, 0, 0, 0},                                                      \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0, 0, 0},                                                        
   \
     /* VCCZ_CONDITIONAL_REG.  */                                          \
     {0, 0, 0, NAMED_REG_MASK (VCCZ_REG),                                  \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
      0, 0, 0, 0,                                                          \
      0, 0, 0, 0, 0, 0},                                                        
   \
     /* VCC_CONDITIONAL_REG.  */                                                
   \
     {0, 0, 0, NAMED_REG_MASK (VCC_LO_REG)|NAMED_REG_MASK (VCC_HI_REG),    \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
      0, 0, 0, 0,                                                          \
      0, 0, 0, 0, 0, 0},                                                        
   \
     /* EXECZ_CONDITIONAL_REG.  */                                         \
     {0, 0, 0, 0,                                                          \
      NAMED_REG_MASK2 (EXECZ_REG), 0, 0, 0,                                \
-     0, 0, 0, 0, 0},                                                      \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0, 0, 0},                                                        
   \
     /* ALL_CONDITIONAL_REGS.  */                                          \
     {0, 0, 0, NAMED_REG_MASK (VCCZ_REG),                                  \
      NAMED_REG_MASK2 (EXECZ_REG) | NAMED_REG_MASK2 (SCC_REG), 0, 0, 0,    \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
      0, 0, 0, 0, 0, 0},                                                        
   \
     /* EXEC_MASK_REG.  */                                                 \
     {0, 0, 0, NAMED_REG_MASK (EXEC_LO_REG) | NAMED_REG_MASK (EXEC_HI_REG), \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
      0, 0, 0, 0,                                                          \
      0, 0, 0, 0, 0, 0},                                                        
   \
     /* SGPR_REGS.  */                                                     \
     {0xffffffff, 0xffffffff, 0xffffffff, 0xf1,                            \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
      0, 0, 0, 0,                                                          \
      0, 0, 0, 0, 0, 0},                                                        
   \
     /* SGPR_EXEC_REGS. */                                                 \
     {0xffffffff, 0xffffffff, 0xffffffff,                                  \
       0xf1 | NAMED_REG_MASK (EXEC_LO_REG) | NAMED_REG_MASK (EXEC_HI_REG),  \
      0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
      0, 0, 0, 0, 0, 0},                                                        
   \
     /* SGPR_VOP_SRC_REGS.  */                                             \
     {0xffffffff, 0xffffffff, 0xffffffff,                                  \
@@ -394,12 +454,16 @@ enum reg_class
        -NAMED_REG_MASK (EXEC_LO_REG)                                      \
        -NAMED_REG_MASK (EXEC_HI_REG),                                     \
      NAMED_REG_MASK2 (SCC_REG), 0, 0, 0,                                  \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
      0, 0, 0, 0, 0, 0},                                                        
   \
     /* SGPR_MEM_SRC_REGS.  */                                             \
     {0xffffffff, 0xffffffff, 0xffffffff,                                  \
      0xffffffff-NAMED_REG_MASK (VCCZ_REG)-NAMED_REG_MASK (M0_REG)         \
      -NAMED_REG_MASK (EXEC_LO_REG)-NAMED_REG_MASK (EXEC_HI_REG),          \
      0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
      0, 0, 0, 0, 0, 0},                                                        
   \
     /* SGPR_DST_REGS.  */                                                 \
     {0xffffffff, 0xffffffff, 0xffffffff,                                  \
@@ -409,30 +473,56 @@ enum reg_class
     /* SGPR_SRC_REGS.  */                                                 \
     {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,                      \
      NAMED_REG_MASK2 (EXECZ_REG) | NAMED_REG_MASK2 (SCC_REG), 0, 0, 0,    \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
      0, 0, 0, 0, 0, 0},                                                        
   \
     /* GENERAL_REGS.  */                                                  \
     {0xffffffff, 0xffffffff, 0xffffffff, 0xf1,                            \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
      0, 0, 0, 0,                                                          \
      0, 0, 0, 0, 0, 0},                                                        
   \
     /* VGPR_REGS.  */                                                     \
     {0, 0, 0, 0,                                                          \
      0,                 0xffffffff, 0xffffffff, 0xffffffff,                    
   \
+     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,                      \
+     0xffffffff, 0, 0, 0,                                                 \
+     0, 0, 0, 0, 0, 0},                                                        
   \
+    /* AVGPR_REGS.  */                                                    \
+    {0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
+     0,                 0xffffffff, 0xffffffff, 0xffffffff,                    
   \
+     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0},      \
+    /* ALL_VGPR_REGS.  */                                                 \
+    {0, 0, 0, 0,                                                          \
+     0,          0xffffffff, 0xffffffff, 0xffffffff,                      \
+     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,                      \
+     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,                      \
      0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0},      \
     /* ALL_GPR_REGS.  */                                                  \
     {0xffffffff, 0xffffffff, 0xffffffff, 0xf1,                            \
      0,                 0xffffffff, 0xffffffff, 0xffffffff,                    
   \
-     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0},      \
+     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,                      \
+     0xffffffff, 0, 0, 0,                                                 \
+     0, 0, 0, 0, 0, 0},                                                        
   \
     /* SRCDST_REGS.  */                                                        
   \
     {0xffffffff, 0xffffffff, 0xffffffff,                                  \
      0xffffffff-NAMED_REG_MASK (VCCZ_REG),                                \
      0,                 0xffffffff, 0xffffffff, 0xffffffff,                    
   \
-     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0},      \
+     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,                      \
+     0xffffffff, 0, 0, 0,                                                 \
+     0, 0, 0, 0, 0, 0},                                                        
   \
     /* AFP_REGS.  */                                                      \
     {0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
+     0, 0, 0, 0,                                                          \
      0, 0, 0, 0,                                                          \
      0, 0, 0, 0, 0, 0xf},                                                 \
     /* ALL_REGS.  */                                                      \
     {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,                      \
+     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,                      \
+     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,                      \
      0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,                      \
      0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0 }}
 
@@ -537,6 +627,34 @@ enum gcn_address_spaces
     "v236", "v237", "v238", "v239", "v240", "v241", "v242", "v243", "v244", \
     "v245", "v246", "v247", "v248", "v249", "v250", "v251", "v252", "v253", \
     "v254", "v255",                                                        \
+    "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10",     \
+    "a11", "a12", "a13", "a14", "a15", "a16", "a17", "a18", "a19", "a20",   \
+    "a21", "a22", "a23", "a24", "a25", "a26", "a27", "a28", "a29", "a30",   \
+    "a31", "a32", "a33", "a34", "a35", "a36", "a37", "a38", "a39", "a40",   \
+    "a41", "a42", "a43", "a44", "a45", "a46", "a47", "a48", "a49", "a50",   \
+    "a51", "a52", "a53", "a54", "a55", "a56", "a57", "a58", "a59", "a60",   \
+    "a61", "a62", "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70",   \
+    "a71", "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",   \
+    "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", "a90",   \
+    "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", "a99", "a100",  \
+    "a101", "a102", "a103", "a104", "a105", "a106", "a107", "a108", "a109", \
+    "a110", "a111", "a112", "a113", "a114", "a115", "a116", "a117", "a118", \
+    "a119", "a120", "a121", "a122", "a123", "a124", "a125", "a126", "a127", \
+    "a128", "a129", "a130", "a131", "a132", "a133", "a134", "a135", "a136", \
+    "a137", "a138", "a139", "a140", "a141", "a142", "a143", "a144", "a145", \
+    "a146", "a147", "a148", "a149", "a150", "a151", "a152", "a153", "a154", \
+    "a155", "a156", "a157", "a158", "a159", "a160", "a161", "a162", "a163", \
+    "a164", "a165", "a166", "a167", "a168", "a169", "a170", "a171", "a172", \
+    "a173", "a174", "a175", "a176", "a177", "a178", "a179", "a180", "a181", \
+    "a182", "a183", "a184", "a185", "a186", "a187", "a188", "a189", "a190", \
+    "a191", "a192", "a193", "a194", "a195", "a196", "a197", "a198", "a199", \
+    "a200", "a201", "a202", "a203", "a204", "a205", "a206", "a207", "a208", \
+    "a209", "a210", "a211", "a212", "a213", "a214", "a215", "a216", "a217", \
+    "a218", "a219", "a220", "a221", "a222", "a223", "a224", "a225", "a226", \
+    "a227", "a228", "a229", "a230", "a231", "a232", "a233", "a234", "a235", \
+    "a236", "a237", "a238", "a239", "a240", "a241", "a242", "a243", "a244", \
+    "a245", "a246", "a247", "a248", "a249", "a250", "a251", "a252", "a253", \
+    "a254", "a255",                                                        \
     "?ap0", "?ap1", "?fp0", "?fp1", "?dwlr" }
 
 #define PRINT_OPERAND(FILE, X, CODE)  print_operand(FILE, X, CODE)
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 10d2b874cce..24b9f521275 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -51,13 +51,15 @@
    (EXECZ_REG                   128)
    (SCC_REG                     129)
    (FIRST_VGPR_REG              160)
-   (LAST_VGPR_REG               415)])
+   (LAST_VGPR_REG               415)
+   (FIRST_AVGPR_REG             416)
+   (LAST_AVGPR_REG              671)])
 
 (define_constants
   [(SP_REGNUM 16)
    (LR_REGNUM 18)
-   (AP_REGNUM 416)
-   (FP_REGNUM 418)])
+   (AP_REGNUM 672)
+   (FP_REGNUM 674)])
 
 (define_c_enum "unspecv" [
   UNSPECV_PROLOGUE_USE
@@ -163,6 +165,11 @@
 ;       vdst: vgpr0-255
 ;       sdst: sgpr0-103/vcc/tba/tma/ttmp0-11
 ;
+; vop3p_mai - vector, three inputs, one vector output
+;        vsrc0,vsrc1,vsrc2: inline constant -16 to -64, fp inline immediate,
+;        (acc or arch) vgpr0-255
+;        vdst: (acc or arch) vgpr0-255
+;
 ; vop_sdwa - second dword for vop1/vop2/vopc for specifying sub-dword address
 ;       src0: vgpr0-255
 ;       dst_sel: BYTE_0-3, WORD_0-1, DWORD
@@ -221,7 +228,8 @@
 
 (define_attr "type"
             "unknown,sop1,sop2,sopk,sopc,sopp,smem,ds,vop2,vop1,vopc,
-             vop3a,vop3b,vop_sdwa,vop_dpp,mubuf,mtbuf,flat,mult,vmult"
+             vop3a,vop3b,vop3p_mai,vop_sdwa,vop_dpp,mubuf,mtbuf,flat,mult,
+             vmult"
             (const_string "unknown"))
 
 ; Set if instruction is executed in scalar or vector unit
@@ -530,9 +538,9 @@
 
 (define_insn "*mov<mode>_insn"
   [(set (match_operand:SISF 0 "nonimmediate_operand"
-                 "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG,   v,SD, v,RM")
+                 "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v,vb,RF,v,RLRG,   v,SD,vb,RM, v, 
a, b")
        (match_operand:SISF 1 "gcn_load_operand"
-                 "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B,   v,RLRG, Y,RM, v"))]
+                 "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF,vb,B,   v,RLRG, Y,RM,vb,^a, 
v, b"))]
   ""
   "@
   s_mov_b32\t%0, %1
@@ -552,20 +560,23 @@
   ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
   s_mov_b32\t%0, %1
   global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  global_store_dword\t%A0, %1%O0%g0"
+  global_store_dword\t%A0, %1%O0%g0
+  v_accvgpr_read_b32\t%0, %1
+  v_accvgpr_write_b32\t%0, %1
+  v_accvgpr_mov_b32\t%0, %1"
   [(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat,
-                    flat,vop1,ds,ds,sop1,flat,flat")
-   (set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*")
-   (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")])
+                    flat,vop1,ds,ds,sop1,flat,flat,vop3p_mai,vop3p_mai,vop1")
+   (set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12,8,8,4")])
 
 ; 8/16bit move pattern
 ; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on
 
 (define_insn "*mov<mode>_insn"
   [(set (match_operand:QIHI 0 "nonimmediate_operand"
-                                "=SD,SD,SD,v,Sg, v, v,RF,v,RLRG,   v, v,RM")
+                                "=SD,SD,SD,v,Sg, v,vb,RF,v,RLRG,   v,vb,RM, v, 
a, b")
        (match_operand:QIHI 1 "gcn_load_operand"
-                                "SSA, J, B,v, v,Sv,RF, v,B,   v,RLRG,RM, v"))]
+                                "SSA, J, B,v, v,Sv,RF,vb,B,   v,RLRG,RM,vb,^a, 
v, b"))]
   "gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
   "@
   s_mov_b32\t%0, %1
@@ -580,19 +591,22 @@
   ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
   global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  global_store%s0\t%A0, %1%O0%g0"
+  global_store%s0\t%A0, %1%O0%g0
+  v_accvgpr_read_b32\t%0, %1
+  v_accvgpr_write_b32\t%0, %1
+  v_accvgpr_mov_b32\t%0, %1"
   [(set_attr "type"
-            "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat")
-   (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*")
-   (set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")])
+            "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat, 
vop3p_mai,vop3p_mai,vop1")
+   (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12,8,8,4")])
 
 ; 64bit move pattern
 
 (define_insn_and_split "*mov<mode>_insn"
   [(set (match_operand:DIDF 0 "nonimmediate_operand"
-                         "=SD,SD,SD,RS,Sm,v, v,Sg, v, v,RF,RLRG,   v, v,RM")
+                         "=SD,SD,SD,RS,Sm,v, v,Sg, v,vb,RF,RLRG,   v,vb,RM, v, 
a, b")
        (match_operand:DIDF 1 "general_operand"
-                         "SSA, C,DB,Sm,RS,v,DB, v,Sv,RF, v,   v,RLRG,RM, v"))]
+                         "SSA, C,DB,Sm,RS,v,DB, v,Sv,RF,vb,   v,RLRG,RM,vb,^a, 
v, b"))]
   "GET_CODE(operands[1]) != SYMBOL_REF"
   "@
   s_mov_b64\t%0, %1
@@ -609,7 +623,10 @@
   ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
   global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  global_store_dwordx2\t%A0, %1%O0%g0"
+  global_store_dwordx2\t%A0, %1%O0%g0
+  #
+  #
+  #"
   "reload_completed
    && ((!MEM_P (operands[0]) && !MEM_P (operands[1])
         && !gcn_sgpr_move_p (operands[0], operands[1]))
@@ -640,16 +657,16 @@
       }
   }
   [(set_attr "type" "sop1,sop1,mult,smem,smem,vmult,vmult,vmult,vmult,flat,
-                    flat,ds,ds,flat,flat")
-   (set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12")])
+                    flat,ds,ds,flat,flat,vmult,vmult,vmult")
+   (set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12,*,*,*")])
 
 ; 128-bit move.
 
 (define_insn_and_split "*movti_insn"
   [(set (match_operand:TI 0 "nonimmediate_operand"
-                                     "=SD,RS,Sm,RF, v,v, v,SD,RM, v,RL, v")
+                                     "=SD,RS,Sm,RF,vb,v, v,SD,RM,vb,RL, v, v, 
a, b")
        (match_operand:TI 1 "general_operand"  
-                                     "SSB,Sm,RS, v,RF,v,Sv, v, v,RM, v,RL"))]
+                                     "SSB,Sm,RS,vb,RF,v,Sv, v,vb,RM, v,RL,^a, 
v, b"))]
   ""
   "@
   #
@@ -663,7 +680,10 @@
   global_store_dwordx4\t%A0, %1%O0%g0
   global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
   ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
-  ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
+  ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+  #
+  #
+  #"
   "reload_completed
    && REG_P (operands[0])
    && (REG_P (operands[1]) || GET_CODE (operands[1]) == CONST_INT)"
@@ -684,9 +704,9 @@
     operands[1] = gcn_operand_part (TImode, operands[1], 0);
   }
   [(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\
-                    ds,ds")
-   (set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*")
-   (set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")])
+                    ds,ds,vmult,vmult,vmult")
+   (set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*,*,*,*")
+   (set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12,*,*,*")])
 
 ;; }}}
 ;; {{{ Prologue/Epilogue
diff --git a/gcc/config/gcn/predicates.md b/gcc/config/gcn/predicates.md
index a20acf7e9ef..4dcb2589a77 100644
--- a/gcc/config/gcn/predicates.md
+++ b/gcc/config/gcn/predicates.md
@@ -70,6 +70,30 @@
   return VGPR_REGNO_P (REGNO (op)) || REGNO (op) >= FIRST_PSEUDO_REGISTER;
 })
 
+(define_predicate "gcn_avgpr_register_operand"
+  (match_operand 0 "register_operand")
+  {
+    if (GET_CODE (op) == SUBREG)
+        op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+      return false;
+
+  return AVGPR_REGNO_P (REGNO (op)) || REGNO (op) >= FIRST_PSEUDO_REGISTER;
+})
+
+(define_predicate "gcn_avgpr_hard_register_operand"
+  (match_operand 0 "register_operand")
+    {
+        if (GET_CODE (op) == SUBREG)
+               op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+        return false;
+
+  return AVGPR_REGNO_P (REGNO (op));
+})
+
 (define_predicate "gcn_inline_immediate_operand"
   (match_code "const_int,const_double,const_vector")
 {

Reply via email to