https://gcc.gnu.org/g:02213d07210c22fb4408b1b35b6b4261ce7d466d

commit r16-2503-g02213d07210c22fb4408b1b35b6b4261ce7d466d
Author: Tobias Burnus <tbur...@baylibre.com>
Date:   Fri Jul 25 11:24:22 2025 +0200

    gcn: Add "s_nop"s for MI300
    
    MI300 requires some additional s_nop to be added between some instructions.
    * As 'v_readlane' and 'v_writelane' have to be distinguished, the
      'laneselect' attribute was changed from no/yes to no/read/write.
    * Add some missing 'laneselect' attributes for v_(read,write)lane.
    * Replace 'delayeduse' by 'flatmemaccess' which is more explicit,
      especially as some uses have to destinguished more details.
      (Alongside, one off-by-two delayeduse has been fixed.)
    
    On the other hand, RDNA 2, 3, and 3.5 do not require any added s_nop;
    thus, there is no need to walk the instructions for them to insert
    pointless S_NOP. (RDNA4 (not yet in GCC) requires it in a few cases.)
    
    gcc/ChangeLog:
    
            * config/gcn/gcn-opts.h (TARGET_NO_MANUAL_NOPS,
            TARGET_CDNA3_NOPS): Define.
            * config/gcn/gcn.md (define_attr "laneselect): Change 'yes' to
            'read' and 'write'.
            (define_attr "flatmemaccess"): Add with values store, storex34,
            load, atomic, atomicwait, cmpswapx2, and no. Replacing ...
            (define_attr "delayeduse"): Remove.
            (define_attr "transop"): Add with values yes and no.
            (various insns): Update 'laneselect', add flatmemaccess and transop,
            remove delayeduse; fixing an issue for s_load_dwordx4 vs.
            flat_store_dwordx4 related to delayeduse (now: flatmemaccess).
            * config/gcn/gcn-valu.md: Update laneselect attribute and add
            flatmemaccess.
            * config/gcn/gcn.cc (gcn_cmpx_insn_p): New.
            (gcn_md_reorg): Update for MI300 to add additional s_nop.
            Skip s_nop-insertion part for RDNA{2,3}; add "VALU writes EXEC
            followed by VALU DPP" unconditionally for CDNA2/CDNA3/GCN5.

Diff:
---
 gcc/config/gcn/gcn-opts.h  |   5 +
 gcc/config/gcn/gcn-valu.md |  27 +++--
 gcc/config/gcn/gcn.cc      | 168 ++++++++++++++++++++++++++++--
 gcc/config/gcn/gcn.md      | 249 ++++++++++++++++++++++++---------------------
 4 files changed, 312 insertions(+), 137 deletions(-)

diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index 0bfc7869eefe..fe68678bd024 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -82,8 +82,13 @@ enum hsaco_attr_type
 #define TARGET_DPP_FULL !TARGET_RDNA2_PLUS
 #define TARGET_DPP16 TARGET_RDNA2_PLUS
 #define TARGET_DPP8 TARGET_RDNA2_PLUS
+/* Device requires no manually inserted wait states; that's the
+   case for RDNA 2, 3 and 3.5 (but not for RNDA 4).  */
+#define TARGET_NO_MANUAL_NOPS TARGET_RDNA2_PLUS
 /* Device requires CDNA1-style manually inserted wait states for AVGPRs.  */
 #define TARGET_AVGPR_CDNA1_NOPS TARGET_CDNA1
+/* Device requires CDNA3-style manually inserted wait states.  */
+#define TARGET_CDNA3_NOPS TARGET_CDNA3
 /* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0) flag
    for non-scalar memory operations. The string starts on purpose with a space.
    Note: for scalar memory operations (i.e. 's_...'), 'glc' is still used.
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 3899117f2719..099432932938 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -811,7 +811,7 @@
   [(set_attr "type" "vop3a")
    (set_attr "length" "8")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "write")])
 
 ; FIXME: 64bit operations really should be splitters, but I am not sure how
 ; to represent vertical subregs.
@@ -828,7 +828,7 @@
   [(set_attr "type" "vmult")
    (set_attr "length" "16")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "write")])
 
 (define_expand "vec_set<mode>"
   [(set (match_operand:V_MOV 0 "register_operand")
@@ -854,7 +854,7 @@
   [(set_attr "type" "vop3a")
    (set_attr "length" "8")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "write")])
 
 (define_insn "*vec_set<mode>_1"
   [(set (match_operand:V_2REG 0 "register_operand"                "=v")
@@ -871,7 +871,7 @@
   [(set_attr "type" "vmult")
    (set_attr "length" "16")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "write")])
 
 (define_insn "vec_duplicate<mode><exec>"
   [(set (match_operand:V_1REG 0 "register_operand"        "=v")
@@ -910,7 +910,7 @@
   [(set_attr "type" "vop3a")
    (set_attr "length" "8")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "read")])
 
 (define_insn "vec_extract<mode><scalar_mode>"
   [(set (match_operand:<SCALAR_MODE> 0 "register_operand"  "=&Sg")
@@ -922,7 +922,7 @@
   [(set_attr "type" "vmult")
    (set_attr "length" "16")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "read")])
 
 (define_insn "vec_extract<mode><scalar_mode>"
   [(set (match_operand:<SCALAR_MODE> 0 "register_operand"  "=&Sg")
@@ -934,7 +934,7 @@
   [(set_attr "type" "vmult")
    (set_attr "length" "32")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "read")])
 
 (define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop"
   [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v")
@@ -1192,6 +1192,7 @@
     return buf;
   }
   [(set_attr "type" "flat")
+   (set_attr "flatmemaccess" "load")
    (set_attr "length" "12")
    (set_attr "cdna" "*,cdna2,*,cdna2")
    (set_attr "xnack" "off,off,on,on")])
@@ -1250,6 +1251,7 @@
     return buf;
   }
   [(set_attr "type" "flat")
+   (set_attr "flatmemaccess" "load")
    (set_attr "length" "12")
    (set_attr "cdna" "*,cdna2,*,cdna2")
    (set_attr "xnack" "off,off,on,on")])
@@ -1335,6 +1337,7 @@
     return buf;
   }
   [(set_attr "type" "flat")
+   (set_attr "flatmemaccess" "store")
    (set_attr "length" "12")
    (set_attr "cdna" "*,cdna2")])
 
@@ -1390,6 +1393,7 @@
     return buf;
   }
   [(set_attr "type" "flat")
+   (set_attr "flatmemaccess" "store")
    (set_attr "length" "12")
    (set_attr "cdna" "*,cdna2")])
 
@@ -3260,7 +3264,8 @@
   "flag_unsafe_math_optimizations"
   "v_sqrt%i0\t%0, %1"
   [(set_attr "type" "vop1")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "transop" "yes")])
 
 (define_insn "sqrt<mode>2"
   [(set (match_operand:FP 0 "register_operand"  "=  v")
@@ -3269,7 +3274,8 @@
   "flag_unsafe_math_optimizations"
   "v_sqrt%i0\t%0, %1"
   [(set_attr "type" "vop1")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "transop" "yes")])
 
 ; These FP unops have f64, f32 and f16 versions.
 (define_int_iterator MATH_UNOP_1OR2REG
@@ -3559,7 +3565,8 @@
   ""
   "v_rcp%i0\t%0, %1"
   [(set_attr "type" "vop1")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "transop" "yes")])
 
 ;; v_div_scale takes a numerator (op2) and denominator (op1) and returns the
 ;; one that matches op3 adjusted for best results in reciprocal division.
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 6cd17d91f8a7..8959118b869e 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -5792,6 +5792,42 @@ gcn_libc_has_function (enum function_class fn_class,
 /* }}}  */
 /* {{{ md_reorg pass.  */
 
+/* Identify V_CMPX from the "type" attribute;
+   note: this will also match 'v_cmp %E1 vcc'.  */
+
+static bool
+gcn_cmpx_insn_p (attr_type type)
+{
+  switch (type)
+    {
+    case TYPE_VOPC:
+      return true;
+    case TYPE_MUBUF:
+    case TYPE_MTBUF:
+    case TYPE_FLAT:
+    case TYPE_VOP3P_MAI:
+    case TYPE_UNKNOWN:
+    case TYPE_SOP1:
+    case TYPE_SOP2:
+    case TYPE_SOPK:
+    case TYPE_SOPC:
+    case TYPE_SOPP:
+    case TYPE_SMEM:
+    case TYPE_DS:
+    case TYPE_VOP2:
+    case TYPE_VOP1:
+    case TYPE_VOP3A:
+    case TYPE_VOP3B:
+    case TYPE_VOP_SDWA:
+    case TYPE_VOP_DPP:
+    case TYPE_MULT:
+    case TYPE_VMULT:
+      return false;
+    }
+  gcc_unreachable ();
+  return false;
+}
+
 /* Identify VMEM instructions from their "type" attribute.  */
 
 static bool
@@ -6152,12 +6188,22 @@ gcn_md_reorg (void)
      detects the missed cases, and inserts the documented number of NOPs
      required for correct execution.  */
 
+  /* RDNA4 (not yet implemented) differs from RNDA 2/3/3.5 and requires some
+     s_nop, see 5.7 and esp. 5.7.2. in its ISA manual.
+     The assert here is a reminder to add those.  */
+  STATIC_ASSERT (ISA_CDNA1 - ISA_RDNA3 == 1);
+
+  if (TARGET_NO_MANUAL_NOPS)
+    return;
+
   const int max_waits = 5;
   struct ilist
   {
     rtx_insn *insn;
     attr_unit unit;
-    attr_delayeduse delayeduse;
+    attr_type type;
+    attr_flatmemaccess flatmemaccess;
+    bool delayeduse;
     HARD_REG_SET writes;
     HARD_REG_SET reads;
     int age;
@@ -6178,7 +6224,29 @@ gcn_md_reorg (void)
 
       attr_type itype = get_attr_type (insn);
       attr_unit iunit = get_attr_unit (insn);
-      attr_delayeduse idelayeduse = get_attr_delayeduse (insn);
+      attr_flatmemaccess iflatmemaccess = get_attr_flatmemaccess (insn);
+      bool delayeduse;
+      if (TARGET_CDNA3_NOPS)
+       switch (iflatmemaccess)
+         {
+         case FLATMEMACCESS_STORE:
+         case FLATMEMACCESS_STOREX34:
+         case FLATMEMACCESS_ATOMIC:
+         case FLATMEMACCESS_CMPSWAPX2:
+           delayeduse = true;
+           break;
+         case FLATMEMACCESS_LOAD:
+         case FLATMEMACCESS_ATOMICWAIT:
+         case FLATMEMACCESS_NO:
+           delayeduse = false;
+           break;
+         default:
+           gcc_unreachable ();
+         }
+      else
+       delayeduse = (iflatmemaccess == FLATMEMACCESS_CMPSWAPX2
+                     || iflatmemaccess == FLATMEMACCESS_STOREX34);
+
       int ivccwait = get_attr_vccwait (insn);
       HARD_REG_SET ireads, iwrites;
       CLEAR_HARD_REG_SET (ireads);
@@ -6223,16 +6291,26 @@ gcn_md_reorg (void)
                   && TEST_HARD_REG_BIT (ireads, VCCZ_REG))))
            nops_rqd = 5 - prev_insn->age;
 
-         /* VALU writes SGPR/VCC followed by v_{read,write}lane using
-            SGPR/VCC as lane select requires 4 wait states.  */
+         /* VALU writes SGPR/VCC followed by
+            - v_{read,write}lane using SGPR/VCC as lane select requires
+              4 wait states
+            - [CDNA3] VALU reads SGPR as constant requires 1 wait state
+            - [CDNA3] VALU reads SGPR as carry-in requires no wait states  */
          if ((prev_insn->age + nops_rqd) < 4
              && prev_insn->unit == UNIT_VECTOR
-             && get_attr_laneselect (insn) == LANESELECT_YES
+             && get_attr_laneselect (insn) != LANESELECT_NO
              && (hard_reg_set_intersect_p
                    (depregs, reg_class_contents[(int) SGPR_REGS])
                  || hard_reg_set_intersect_p
                       (depregs, reg_class_contents[(int) 
VCC_CONDITIONAL_REG])))
            nops_rqd = 4 - prev_insn->age;
+         else if (TARGET_CDNA3_NOPS
+                  && (prev_insn->age + nops_rqd) < 1
+                  && prev_insn->unit == UNIT_VECTOR
+                  && iunit == UNIT_VECTOR
+                  && hard_reg_set_intersect_p
+                       (depregs, reg_class_contents[(int) SGPR_REGS]))
+           nops_rqd = 1 - prev_insn->age;
 
          /* VALU writes VGPR followed by VALU_DPP reading that VGPR
             requires 2 wait states.  */
@@ -6245,22 +6323,88 @@ gcn_md_reorg (void)
                nops_rqd = 2 - prev_insn->age;
            }
 
+         /* VALU writes EXEC followed by VALU DPP op requires 5 nop.  */
+         if ((prev_insn->age + nops_rqd) < 5
+             && itype == TYPE_VOP_DPP
+             && prev_insn->unit == UNIT_VECTOR
+             && TEST_HARD_REG_BIT (prev_insn->writes, EXECZ_REG))
+           nops_rqd = 5 - prev_insn->age;
+
          /* Store that requires input registers are not overwritten by
-            following instruction.  */
-         if ((prev_insn->age + nops_rqd) < 1
-             && prev_insn->delayeduse == DELAYEDUSE_YES
+            following instruction.
+            For CDNA3, only, VALU writes require 2 not 1 nop.
+            CDNA3 additionally requires that 1 or 2 nop for global & scatch
+            store/atomic.  */
+         if (TARGET_CDNA3_NOPS
+             && (prev_insn->age + nops_rqd) < 2
+             && prev_insn->delayeduse
+             && iunit == UNIT_VECTOR
+             && ((hard_reg_set_intersect_p
+                  (prev_insn->reads, iwrites))))
+           nops_rqd = 2 - prev_insn->age;
+         else if ((prev_insn->age + nops_rqd) < 1
+             && prev_insn->delayeduse
              && ((hard_reg_set_intersect_p
                   (prev_insn->reads, iwrites))))
            nops_rqd = 1 - prev_insn->age;
 
-         /* Instruction that requires VCC is not written too close before
-            using it.  */
+         /* Instruction (such as v_div_fmas) that requires VCC is not written
+            too close before using it  */
          if (prev_insn->age < ivccwait
              && (hard_reg_set_intersect_p
                  (prev_insn->writes,
                   reg_class_contents[(int)VCC_CONDITIONAL_REG])))
            nops_rqd = ivccwait - prev_insn->age;
 
+         /* CDNA3: v_cmpx followed by
+            - V_readlane, v_readfirstlane, v_writelane requires 4 wait states
+            - VALU reads EXEC as constant requires 2 wait states
+            - other VALU requires no wait state  */
+         if (TARGET_CDNA3_NOPS
+             && (prev_insn->age + nops_rqd) < 4
+             && gcn_cmpx_insn_p (prev_insn->type)
+             && get_attr_laneselect (insn) != LANESELECT_NO)
+           nops_rqd = 4 - prev_insn->age;
+         else if (TARGET_CDNA3_NOPS
+                  && (prev_insn->age + nops_rqd) < 2
+                  && iunit == UNIT_VECTOR
+                  && gcn_cmpx_insn_p (prev_insn->type)
+                  && TEST_HARD_REG_BIT (ireads, EXECZ_REG))
+           nops_rqd = 2 - prev_insn->age;
+
+         /* CDNA3: VALU writes VGPR followed by v_readlane vsrc0 reads VGPRn
+            requires 1 wait state. */
+         if (TARGET_CDNA3_NOPS
+             && (prev_insn->age + nops_rqd) < 1
+             && prev_insn->unit == UNIT_VECTOR
+             && prev_insn->flatmemaccess != FLATMEMACCESS_LOAD
+             && get_attr_laneselect (insn) == LANESELECT_READ
+             && hard_reg_set_intersect_p
+                   (depregs, reg_class_contents[(int) VGPR_REGS]))
+           nops_rqd = 1 - prev_insn->age;
+
+         /* CDNA3: VALU op which uses OPSEL or SDWA with changes the result's
+            bit position followed by VALU op consumes result of that op
+            requires 1 wait state.
+            FIXME: Handle OPSEL, once used.  */
+         if (TARGET_CDNA3_NOPS
+             && (prev_insn->age + nops_rqd) < 1
+             && prev_insn->unit == UNIT_VECTOR
+             && prev_insn->type == TYPE_VOP_SDWA
+             && !hard_reg_set_empty_p (depregs))
+           nops_rqd = 1 - prev_insn->age;
+
+         /* CNDA3: VALU Trans Op (such as v_rcp_f64) followed by non-trans VALU
+            op consumes result of that op requires 1 wait state.  */
+         if (TARGET_CDNA3_NOPS
+             && (prev_insn->age + nops_rqd) < 1
+             && prev_insn->unit == UNIT_VECTOR
+             && iunit == UNIT_VECTOR
+             && get_attr_transop (prev_insn->insn) == TRANSOP_YES
+             && get_attr_transop (insn) == TRANSOP_NO
+             && !hard_reg_set_empty_p (depregs))
+           nops_rqd = 1 - prev_insn->age;
+
          /* CDNA1: write VGPR before v_accvgpr_write reads it.  */
          if (TARGET_AVGPR_CDNA1_NOPS
              && (prev_insn->age + nops_rqd) < 2
@@ -6316,7 +6460,9 @@ gcn_md_reorg (void)
       /* Track the current instruction as a previous instruction.  */
       back[oldest].insn = insn;
       back[oldest].unit = iunit;
-      back[oldest].delayeduse = idelayeduse;
+      back[oldest].type = itype;
+      back[oldest].flatmemaccess = iflatmemaccess;
+      back[oldest].delayeduse = delayeduse;
       back[oldest].writes = iwrites;
       back[oldest].reads = ireads;
       back[oldest].age = 0;
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 9193461ed49a..fad42e6a6bf8 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -312,18 +312,28 @@
 ; We need to be able to identify v_readlane and v_writelane with
 ; SGPR lane selection in order to handle "Manually Inserted Wait States".
 
-(define_attr "laneselect" "yes,no" (const_string "no"))
+(define_attr "laneselect" "write,read,no" (const_string "no"))
 
-; Identify instructions that require a "Manually Inserted Wait State" if
-; their inputs are overwritten by subsequent instructions.
+; Global or flat memory access using store or load followed by waitcnt
+; and using flat/global atomic access, possibly followed by a waitcnt.
+; 'storex34' denotes FLAT_STORE_X{3,4}.
+; 'cmpswapx2' denotes FLAT_ATOMIC_{F}CMPSWAP_X2
+; Used to handle "Manually Inserted Wait State".
 
-(define_attr "delayeduse" "yes,no" (const_string "no"))
+(define_attr "flatmemaccess"
+             "store,storex34,load,atomic,atomicwait,cmpswapx2,no"
+             (const_string "no"))
 
 ; Identify instructions that require "Manually Inserted Wait State" if
 ; a previous instruction writes to VCC.  The number gives the number of NOPs.
 
 (define_attr "vccwait" "" (const_int 0))
 
+; Mark trans ops such as v_{exp,rsq,sqrt,sin,cos,log,...}_F{16,32,64}
+; for later conditional s_nop insertion.
+
+(define_attr "transop" "yes,no" (const_string "no"))
+
 ;; }}}
 ;; {{{ Iterators useful across the wole machine description
 
@@ -555,9 +565,11 @@
   }
   [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat,
                     flat,flat,flat,flat")
+   (set_attr "flatmemaccess" 
"*,*,*,*,*,*,*,*,*,load,load,store,load,load,store")
    (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*")
    (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12")
-   (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")])
+   (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")
+   (set_attr "laneselect" "*,*,read,*,*,*,*,*,*,*,*,*,*,*,*")])
 
 ; 32bit move pattern
 
@@ -565,38 +577,38 @@
   [(set (match_operand:SISF 0 "nonimmediate_operand")
        (match_operand:SISF 1 "gcn_load_operand"))]
   ""
-  {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack]
-   [SD  ,SSA ;sop1 ,*   ,4 ,*    ,*  ] s_mov_b32\t%0, %1
-   [SD  ,J   ;sopk ,*   ,4 ,*    ,*  ] s_movk_i32\t%0, %1
-   [SD  ,B   ;sop1 ,*   ,8 ,*    ,*  ] s_mov_b32\t%0, %1
-   [SD  ,RB  ;smem ,*   ,12,*    ,off] s_buffer_load%s0\t%0, s[0:3], 
%1\;s_waitcnt\tlgkmcnt(0)
-   [&SD ,RB  ;smem ,*   ,12,*    ,on ] ^
-   [RB  ,Sm  ;smem ,*   ,12,*    ,*  ] s_buffer_store%s1\t%1, s[0:3], %0
-   [Sm  ,RS  ;smem ,*   ,12,*    ,off] s_load_dword\t%0, 
%A1\;s_waitcnt\tlgkmcnt(0)
-   [&Sm ,RS  ;smem ,*   ,12,*    ,on ] ^
-   [RS  ,Sm  ;smem ,*   ,12,*    ,*  ] s_store_dword\t%1, %A0
-   [v   ,v   ;vop1 ,*   ,4 ,*    ,*  ] v_mov_b32\t%0, %1
-   [Sg  ,v   ;vop3a,none,8 ,*    ,*  ] v_readlane_b32\t%0, %1, 0
-   [v   ,Sv  ;vop3a,none,8 ,*    ,*  ] v_writelane_b32\t%0, %1, 0
-   [v   ,^a  ;vop3p_mai,*,8,*    ,*  ] v_accvgpr_read_b32\t%0, %1
-   [a   ,v   ;vop3p_mai,*,8,*    ,*  ] v_accvgpr_write_b32\t%0, %1
-   [a   ,a   ;vop1 ,*    ,4,cdna2,*  ] v_accvgpr_mov_b32\t%0, %1
-   [v   ,RF  ;flat ,*   ,12,*    ,off] flat_load_dword\t%0, 
%A1%O1%g1\;s_waitcnt\t0
-   [&v  ,RF  ;flat ,*   ,12,*    ,on ] ^
-   [^a  ,RF  ;flat ,*   ,12,cdna2,off] ^
-   [&^a ,RF  ;flat ,*   ,12,cdna2,on ] ^
-   [RF  ,v   ;flat ,*   ,12,*    ,*  ] flat_store_dword\t%A0, %1%O0%g0
-   [RF  ,a   ;flat ,*   ,12,cdna2,*  ] ^
-   [v   ,B   ;vop1 ,*   ,8 ,*    ,*  ] v_mov_b32\t%0, %1
-   [RLRG,v   ;ds   ,*   ,12,*    ,*  ] ds_write_b32\t%A0, 
%1%O0\;s_waitcnt\tlgkmcnt(0)
-   [v   ,RLRG;ds   ,*   ,12,*    ,*  ] ds_read_b32\t%0, 
%A1%O1\;s_waitcnt\tlgkmcnt(0)
-   [SD  ,Y   ;sop1 ,*   ,8 ,*    ,*  ] s_mov_b32\t%0, %1
-   [v   ,RM  ;flat ,*   ,12,*    ,off] global_load_dword\t%0, 
%A1%O1%g1\;s_waitcnt\tvmcnt(0)
-   [&v  ,RM  ;flat ,*   ,12,*    ,on ] ^
-   [^a  ,RM  ;flat ,*   ,12,cdna2,off] ^
-   [&^a ,RM  ;flat ,*   ,12,cdna2,on ] ^
-   [RM  ,v   ;flat ,*   ,12,*    ,*  ] global_store_dword\t%A0, %1%O0%g0
-   [RM  ,a   ;flat ,*   ,12,cdna2,*  ] ^
+  {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack, laneselect, 
flatmemaccess]
+   [SD  ,SSA ;sop1 ,*   ,4 ,*    ,*  ,*    ,*    ] s_mov_b32\t%0, %1
+   [SD  ,J   ;sopk ,*   ,4 ,*    ,*  ,*    ,*    ] s_movk_i32\t%0, %1
+   [SD  ,B   ;sop1 ,*   ,8 ,*    ,*  ,*    ,*    ] s_mov_b32\t%0, %1
+   [SD  ,RB  ;smem ,*   ,12,*    ,off,*    ,*    ] s_buffer_load%s0\t%0, 
s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
+   [&SD ,RB  ;smem ,*   ,12,*    ,on ,*    ,*    ] ^
+   [RB  ,Sm  ;smem ,*   ,12,*    ,*  ,*    ,*    ] s_buffer_store%s1\t%1, 
s[0:3], %0
+   [Sm  ,RS  ;smem ,*   ,12,*    ,off,*    ,*    ] s_load_dword\t%0, 
%A1\;s_waitcnt\tlgkmcnt(0)
+   [&Sm ,RS  ;smem ,*   ,12,*    ,on ,*    ,*    ] ^
+   [RS  ,Sm  ;smem ,*   ,12,*    ,*  ,*    ,*    ] s_store_dword\t%1, %A0
+   [v   ,v   ;vop1 ,*   ,4 ,*    ,*  ,*    ,*    ] v_mov_b32\t%0, %1
+   [Sg  ,v   ;vop3a,none,8 ,*    ,*  ,read ,*    ] v_readlane_b32\t%0, %1, 0
+   [v   ,Sv  ;vop3a,none,8 ,*    ,*  ,write,*    ] v_writelane_b32\t%0, %1, 0
+   [v   ,^a  ;vop3p_mai,*,8,*    ,*  ,*    ,*    ] v_accvgpr_read_b32\t%0, %1
+   [a   ,v   ;vop3p_mai,*,8,*    ,*  ,*    ,*    ] v_accvgpr_write_b32\t%0, %1
+   [a   ,a   ;vop1 ,*    ,4,cdna2,*  ,*    ,*    ] v_accvgpr_mov_b32\t%0, %1
+   [v   ,RF  ;flat ,*   ,12,*    ,off,*    ,load ] flat_load_dword\t%0, 
%A1%O1%g1\;s_waitcnt\t0
+   [&v  ,RF  ;flat ,*   ,12,*    ,on ,*    ,load ] ^
+   [^a  ,RF  ;flat ,*   ,12,cdna2,off,*    ,load ] ^
+   [&^a ,RF  ;flat ,*   ,12,cdna2,on ,*    ,load ] ^
+   [RF  ,v   ;flat ,*   ,12,*    ,*  ,*    ,store] flat_store_dword\t%A0, 
%1%O0%g0
+   [RF  ,a   ;flat ,*   ,12,cdna2,*  ,*    ,store] ^
+   [v   ,B   ;vop1 ,*   ,8 ,*    ,*  ,*    ,*    ] v_mov_b32\t%0, %1
+   [RLRG,v   ;ds   ,*   ,12,*    ,*  ,*    ,*    ] ds_write_b32\t%A0, 
%1%O0\;s_waitcnt\tlgkmcnt(0)
+   [v   ,RLRG;ds   ,*   ,12,*    ,*  ,*    ,*    ] ds_read_b32\t%0, 
%A1%O1\;s_waitcnt\tlgkmcnt(0)
+   [SD  ,Y   ;sop1 ,*   ,8 ,*    ,*  ,*    ,*    ] s_mov_b32\t%0, %1
+   [v   ,RM  ;flat ,*   ,12,*    ,off,*    ,load ] global_load_dword\t%0, 
%A1%O1%g1\;s_waitcnt\tvmcnt(0)
+   [&v  ,RM  ;flat ,*   ,12,*    ,on ,*    ,load ] ^
+   [^a  ,RM  ;flat ,*   ,12,cdna2,off,*    ,load ] ^
+   [&^a ,RM  ;flat ,*   ,12,cdna2,on ,*    ,load ] ^
+   [RM  ,v   ;flat ,*   ,12,*    ,*  ,*    ,store] global_store_dword\t%A0, 
%1%O0%g0
+   [RM  ,a   ;flat ,*   ,12,cdna2,*  ,*    ,store] ^
   })
 
 ; 8/16bit move pattern
@@ -606,31 +618,31 @@
   [(set (match_operand:QIHI 0 "nonimmediate_operand")
        (match_operand:QIHI 1 "gcn_load_operand"))]
   "gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
-  {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack]
-  [SD  ,SSA ;sop1 ,*   ,4 ,*    ,*  ] s_mov_b32\t%0, %1
-  [SD  ,J   ;sopk ,*   ,4 ,*    ,*  ] s_movk_i32\t%0, %1
-  [SD  ,B   ;sop1 ,*   ,8 ,*    ,*  ] s_mov_b32\t%0, %1
-  [v   ,v   ;vop1 ,*   ,4 ,*    ,*  ] v_mov_b32\t%0, %1
-  [Sg  ,v   ;vop3a,none,4 ,*    ,*  ] v_readlane_b32\t%0, %1, 0
-  [v   ,Sv  ;vop3a,none,4 ,*    ,*  ] v_writelane_b32\t%0, %1, 0
-  [v   ,^a  ;vop3p_mai,*,8,*    ,*  ] v_accvgpr_read_b32\t%0, %1
-  [a   ,v   ;vop3p_mai,*,8,*    ,*  ] v_accvgpr_write_b32\t%0, %1
-  [a   ,a   ;vop1 ,*    ,8,cdna2,*  ] v_accvgpr_mov_b32\t%0, %1
-  [v   ,RF  ;flat ,*   ,12,*    ,off] flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
-  [&v  ,RF  ;flat ,*   ,12,*    ,on ] ^
-  [^a  ,RF  ;flat ,*   ,12,cdna2,off] ^
-  [&^a ,RF  ;flat ,*   ,12,cdna2,on ] ^
-  [RF  ,v   ;flat ,*   ,12,*    ,*  ] flat_store%s0\t%A0, %1%O0%g0
-  [RF  ,a   ;flat ,*   ,12,cdna2,*  ] ^
-  [v   ,B   ;vop1 ,*   ,8 ,*    ,*  ] v_mov_b32\t%0, %1
-  [RLRG,v   ;ds   ,*   ,12,*    ,*  ] ds_write%b0\t%A0, 
%1%O0\;s_waitcnt\tlgkmcnt(0)
-  [v   ,RLRG;ds   ,*   ,12,*    ,*  ] ds_read%u1\t%0, 
%A1%O1\;s_waitcnt\tlgkmcnt(0)
-  [v   ,RM  ;flat ,*   ,12,*    ,off] global_load%o1\t%0, 
%A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  [&v  ,RM  ;flat ,*   ,12,*    ,on ] ^
-  [^a  ,RM  ;flat ,*   ,12,cdna2,off] ^
-  [&^a ,RM  ;flat ,*   ,12,cdna2,on ] ^
-  [RM  ,v   ;flat ,*   ,12,*    ,*  ] global_store%s0\t%A0, %1%O0%g0
-  [RM  ,a   ;flat ,*   ,12,cdna2,*  ] ^
+  {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack, laneselect, 
flatmemaccess]
+  [SD  ,SSA ;sop1 ,*   ,4 ,*    ,*  ,*    ,*    ] s_mov_b32\t%0, %1
+  [SD  ,J   ;sopk ,*   ,4 ,*    ,*  ,*    ,*    ] s_movk_i32\t%0, %1
+  [SD  ,B   ;sop1 ,*   ,8 ,*    ,*  ,*    ,*    ] s_mov_b32\t%0, %1
+  [v   ,v   ;vop1 ,*   ,4 ,*    ,*  ,*    ,*    ] v_mov_b32\t%0, %1
+  [Sg  ,v   ;vop3a,none,4 ,*    ,*  ,read ,*    ] v_readlane_b32\t%0, %1, 0
+  [v   ,Sv  ;vop3a,none,4 ,*    ,*  ,write,*    ] v_writelane_b32\t%0, %1, 0
+  [v   ,^a  ;vop3p_mai,*,8,*    ,*  ,*    ,*    ] v_accvgpr_read_b32\t%0, %1
+  [a   ,v   ;vop3p_mai,*,8,*    ,*  ,*    ,*    ] v_accvgpr_write_b32\t%0, %1
+  [a   ,a   ;vop1 ,*    ,8,cdna2,*  ,*    ,*    ] v_accvgpr_mov_b32\t%0, %1
+  [v   ,RF  ;flat ,*   ,12,*    ,off,*    ,load ] flat_load%o1\t%0, 
%A1%O1%g1\;s_waitcnt\t0
+  [&v  ,RF  ;flat ,*   ,12,*    ,on ,*    ,load ] ^
+  [^a  ,RF  ;flat ,*   ,12,cdna2,off,*    ,load ] ^
+  [&^a ,RF  ;flat ,*   ,12,cdna2,on ,*    ,load ] ^
+  [RF  ,v   ;flat ,*   ,12,*    ,*  ,*    ,store] flat_store%s0\t%A0, %1%O0%g0
+  [RF  ,a   ;flat ,*   ,12,cdna2,*  ,*    ,store] ^
+  [v   ,B   ;vop1 ,*   ,8 ,*    ,*  ,*    ,*    ] v_mov_b32\t%0, %1
+  [RLRG,v   ;ds   ,*   ,12,*    ,*  ,*    ,*    ] ds_write%b0\t%A0, 
%1%O0\;s_waitcnt\tlgkmcnt(0)
+  [v   ,RLRG;ds   ,*   ,12,*    ,*  ,*    ,*    ] ds_read%u1\t%0, 
%A1%O1\;s_waitcnt\tlgkmcnt(0)
+  [v   ,RM  ;flat ,*   ,12,*    ,off,*    ,load ] global_load%o1\t%0, 
%A1%O1%g1\;s_waitcnt\tvmcnt(0)
+  [&v  ,RM  ;flat ,*   ,12,*    ,on ,*    ,load ] ^
+  [^a  ,RM  ;flat ,*   ,12,cdna2,off,*    ,load ] ^
+  [&^a ,RM  ;flat ,*   ,12,cdna2,on ,*    ,load ] ^
+  [RM  ,v   ;flat ,*   ,12,*    ,*  ,*    ,store] global_store%s0\t%A0, 
%1%O0%g0
+  [RM  ,a   ;flat ,*   ,12,cdna2,*  ,*    ,store] ^
   })
 
 ; 64bit move pattern
@@ -639,34 +651,34 @@
   [(set (match_operand:DIDF 0 "nonimmediate_operand")
        (match_operand:DIDF 1 "general_operand"))]
   "GET_CODE(operands[1]) != SYMBOL_REF"
-  {@ [cons: =0, 1; attrs: type, length, cdna, xnack]
-  [SD  ,SSA ;sop1 ,4 ,*    ,*  ] s_mov_b64\t%0, %1
-  [SD  ,C   ;sop1 ,8 ,*    ,*  ] ^
-  [SD  ,DB  ;mult ,* ,*    ,*  ] #
-  [RS  ,Sm  ;smem ,12,*    ,*  ] s_store_dwordx2\t%1, %A0
-  [Sm  ,RS  ;smem ,12,*    ,off] s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
-  [&Sm ,RS  ;smem ,12,*    ,on ] ^
-  [v   ,v   ;vmult,* ,*    ,*  ] #
-  [v   ,DB  ;vmult,* ,*    ,*  ] #
-  [Sg  ,v   ;vmult,* ,*    ,*  ] #
-  [v   ,Sv  ;vmult,* ,*    ,*  ] #
-  [v   ,^a  ;vmult,* ,*    ,*  ] #
-  [a   ,v   ;vmult,* ,*    ,*  ] #
-  [a   ,a   ;vmult,* ,cdna2,*  ] #
-  [v   ,RF  ;flat ,12,*    ,off] flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
-  [&v  ,RF  ;flat ,12,*    ,on ] ^
-  [^a  ,RF  ;flat ,12,cdna2,off] ^
-  [&^a ,RF  ;flat ,12,cdna2,on ] ^
-  [RF  ,v   ;flat ,12,*    ,*  ] flat_store_dwordx2\t%A0, %1%O0%g0
-  [RF  ,a   ;flat ,12,cdna2,*  ] ^
-  [RLRG,v   ;ds   ,12,*    ,*  ] ds_write_b64\t%A0, 
%1%O0\;s_waitcnt\tlgkmcnt(0)
-  [v   ,RLRG;ds   ,12,*    ,*  ] ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
-  [v   ,RM  ;flat ,12,*    ,off] global_load_dwordx2\t%0, 
%A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  [&v  ,RM  ;flat ,12,*    ,on ] ^
-  [^a  ,RM  ;flat ,12,cdna2,off] ^
-  [&^a ,RM  ;flat ,12,cdna2,on ] ^
-  [RM  ,v   ;flat ,12,*    ,*  ] global_store_dwordx2\t%A0, %1%O0%g0
-  [RM  ,a   ;flat ,12,cdna2,*  ] ^
+  {@ [cons: =0, 1; attrs: type, length, cdna, xnack, flatmemaccess]
+  [SD  ,SSA ;sop1 ,4 ,*    ,*  ,*    ] s_mov_b64\t%0, %1
+  [SD  ,C   ;sop1 ,8 ,*    ,*  ,*    ] ^
+  [SD  ,DB  ;mult ,* ,*    ,*  ,*    ] #
+  [RS  ,Sm  ;smem ,12,*    ,*  ,*    ] s_store_dwordx2\t%1, %A0
+  [Sm  ,RS  ;smem ,12,*    ,off,*    ] s_load_dwordx2\t%0, 
%A1\;s_waitcnt\tlgkmcnt(0)
+  [&Sm ,RS  ;smem ,12,*    ,on ,*    ] ^
+  [v   ,v   ;vmult,* ,*    ,*  ,*    ] #
+  [v   ,DB  ;vmult,* ,*    ,*  ,*    ] #
+  [Sg  ,v   ;vmult,* ,*    ,*  ,*    ] #
+  [v   ,Sv  ;vmult,* ,*    ,*  ,*    ] #
+  [v   ,^a  ;vmult,* ,*    ,*  ,*    ] #
+  [a   ,v   ;vmult,* ,*    ,*  ,*    ] #
+  [a   ,a   ;vmult,* ,cdna2,*  ,*    ] #
+  [v   ,RF  ;flat ,12,*    ,off,load ] flat_load_dwordx2\t%0, 
%A1%O1%g1\;s_waitcnt\t0
+  [&v  ,RF  ;flat ,12,*    ,on ,load ] ^
+  [^a  ,RF  ;flat ,12,cdna2,off,load ] ^
+  [&^a ,RF  ;flat ,12,cdna2,on ,load ] ^
+  [RF  ,v   ;flat ,12,*    ,*  ,store] flat_store_dwordx2\t%A0, %1%O0%g0
+  [RF  ,a   ;flat ,12,cdna2,*  ,store] ^
+  [RLRG,v   ;ds   ,12,*    ,*  ,*    ] ds_write_b64\t%A0, 
%1%O0\;s_waitcnt\tlgkmcnt(0)
+  [v   ,RLRG;ds   ,12,*    ,*  ,*    ] ds_read_b64\t%0, 
%A1%O1\;s_waitcnt\tlgkmcnt(0)
+  [v   ,RM  ;flat ,12,*    ,off,load ] global_load_dwordx2\t%0, 
%A1%O1%g1\;s_waitcnt\tvmcnt(0)
+  [&v  ,RM  ;flat ,12,*    ,on ,load ] ^
+  [^a  ,RM  ;flat ,12,cdna2,off,load ] ^
+  [&^a ,RM  ;flat ,12,cdna2,on ,load ] ^
+  [RM  ,v   ;flat ,12,*    ,*  ,store] global_store_dwordx2\t%A0, %1%O0%g0
+  [RM  ,a   ;flat ,12,cdna2,*  ,store] ^
   }
   "reload_completed
    && ((!MEM_P (operands[0]) && !MEM_P (operands[1])
@@ -704,31 +716,31 @@
   [(set (match_operand:TI 0 "nonimmediate_operand")
        (match_operand:TI 1 "general_operand"  ))]
   ""
-  {@ [cons: =0, 1; attrs: type, delayeduse, length, cdna, xnack]
-  [SD ,SSB;mult ,*  ,* ,*    ,*  ] #
-  [RS ,Sm ;smem ,*  ,12,*    ,*  ] s_store_dwordx4\t%1, %A0
-  [Sm ,RS ;smem ,yes,12,*    ,off] s_load_dwordx4\t%0, 
%A1\;s_waitcnt\tlgkmcnt(0)
-  [&Sm,RS ;smem ,yes,12,*    ,on ] ^
-  [RF ,v  ;flat ,*  ,12,*    ,*  ] flat_store_dwordx4\t%A0, %1%O0%g0
-  [RF ,a  ;flat ,*  ,12,cdna2,*  ] ^
-  [v  ,RF ;flat ,*  ,12,*    ,off] flat_load_dwordx4\t%0, 
%A1%O1%g1\;s_waitcnt\t0
-  [&v ,RF ;flat ,*  ,12,*    ,on ] ^
-  [^a ,RF ;flat ,*  ,12,cdna2,off] ^
-  [&^a,RF ;flat ,*  ,12,cdna2,on ] ^
-  [v  ,v  ;vmult,*  ,* ,*    ,*  ] #
-  [v  ,Sv ;vmult,*  ,* ,*    ,*  ] #
-  [SD ,v  ;vmult,*  ,* ,*    ,*  ] #
-  [RM ,v  ;flat ,yes,12,*    ,*  ] global_store_dwordx4\t%A0, %1%O0%g0
-  [RM ,a  ;flat ,yes,12,cdna2,*  ] ^
-  [v  ,RM ;flat ,*  ,12,*    ,off] global_load_dwordx4\t%0, 
%A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  [&v ,RM ;flat ,*  ,12,*    ,on ] ^
-  [^a ,RM ;flat ,*  ,12,cdna2,off] ^
-  [&^a,RM ;flat ,*  ,12,cdna2,on ] ^
-  [RL ,v  ;ds   ,*  ,12,*    ,*  ] ds_write_b128\t%A0, 
%1%O0\;s_waitcnt\tlgkmcnt(0)
-  [v  ,RL ;ds   ,*  ,12,*    ,*  ] ds_read_b128\t%0, 
%A1%O1\;s_waitcnt\tlgkmcnt(0)
-  [v  ,^a ;vmult,*  ,* ,*    ,*  ] #
-  [a  ,v  ;vmult,*  ,* ,*    ,*  ] #
-  [a  ,a  ;vmult,*  ,* ,cdna2,*  ] #
+  {@ [cons: =0, 1; attrs: type, length, cdna, xnack, flatmemaccess]
+  [SD ,SSB;mult ,* ,*    ,*  ,*       ] #
+  [RS ,Sm ;smem ,12,*    ,*  ,*       ] s_store_dwordx4\t%1, %A0
+  [Sm ,RS ;smem ,12,*    ,off,*       ] s_load_dwordx4\t%0, 
%A1\;s_waitcnt\tlgkmcnt(0)
+  [&Sm,RS ;smem ,12,*    ,on ,*       ] ^
+  [RF ,v  ;flat ,12,*    ,*  ,storex34] flat_store_dwordx4\t%A0, %1%O0%g0
+  [RF ,a  ;flat ,12,cdna2,*  ,storex34] ^
+  [v  ,RF ;flat ,12,*    ,off,load    ] flat_load_dwordx4\t%0, 
%A1%O1%g1\;s_waitcnt\t0
+  [&v ,RF ;flat ,12,*    ,on ,load    ] ^
+  [^a ,RF ;flat ,12,cdna2,off,load    ] ^
+  [&^a,RF ;flat ,12,cdna2,on ,load    ] ^
+  [v  ,v  ;vmult,* ,*    ,*  ,*       ] #
+  [v  ,Sv ;vmult,* ,*    ,*  ,*       ] #
+  [SD ,v  ;vmult,* ,*    ,*  ,*       ] #
+  [RM ,v  ;flat ,12,*    ,*  ,storex34] global_store_dwordx4\t%A0, %1%O0%g0
+  [RM ,a  ;flat ,12,cdna2,*  ,storex34] ^
+  [v  ,RM ;flat ,12,*    ,off,load    ] global_load_dwordx4\t%0, 
%A1%O1%g1\;s_waitcnt\tvmcnt(0)
+  [&v ,RM ;flat ,12,*    ,on ,load    ] ^
+  [^a ,RM ;flat ,12,cdna2,off,load    ] ^
+  [&^a,RM ;flat ,12,cdna2,on ,load    ] ^
+  [RL ,v  ;ds   ,12,*    ,*  ,*       ] ds_write_b128\t%A0, 
%1%O0\;s_waitcnt\tlgkmcnt(0)
+  [v  ,RL ;ds   ,12,*    ,*  ,*       ] ds_read_b128\t%0, 
%A1%O1\;s_waitcnt\tlgkmcnt(0)
+  [v  ,^a ;vmult,* ,*    ,*  ,*       ] #
+  [a  ,v  ;vmult,* ,*    ,*  ,*       ] #
+  [a  ,a  ;vmult,* ,cdna2,*  ,*       ] #
   }
   "reload_completed
    && REG_P (operands[0])
@@ -1985,6 +1997,7 @@
    flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 %G2\;s_waitcnt\t0
    global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
   [(set_attr "type" "smem,flat,flat")
+   (set_attr "flatmemaccess" "*,atomicwait,atomicwait")
    (set_attr "length" "12")])
 
 ; FIXME: These patterns are disabled because the instructions don't
@@ -2006,6 +2019,7 @@
    flat_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\t0
    global_atomic_<bare_mnemonic><X>\t%A0, %1%O0\;s_waitcnt\tvmcnt(0)"
   [(set_attr "type" "smem,flat,flat")
+   (set_attr "flatmemaccess" "*,atomicwait,atomicwait")
    (set_attr "length" "12")])
 
 (define_mode_attr x2 [(SI "DI") (DI "TI")])
@@ -2053,7 +2067,7 @@
    global_atomic_cmpswap<X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
   [(set_attr "type" "smem,flat,flat")
    (set_attr "length" "12")
-   (set_attr "delayeduse" "*,yes,yes")])
+   (set_attr "flatmemaccess" "*,cmpswapx2,cmpswapx2")])
 
 (define_insn "sync_compare_and_swap<mode>_lds_insn"
   [(set (match_operand:SIDI 0 "register_operand"    "= v")
@@ -2173,6 +2187,7 @@
     gcc_unreachable ();
   }
   [(set_attr "type" "smem,flat,flat")
+   (set_attr "flatmemaccess" "*,load,load")
    (set_attr "length" "28")
    (set_attr "rdna" "no,*,*")])
 
@@ -2257,6 +2272,7 @@
     gcc_unreachable ();
   }
   [(set_attr "type" "smem,flat,flat")
+   (set_attr "flatmemaccess" "*,store,store")
    (set_attr "length" "28")
    (set_attr "rdna" "no,*,*")])
 
@@ -2389,6 +2405,7 @@
     gcc_unreachable ();
   }
   [(set_attr "type" "smem,flat,flat")
+   (set_attr "flatmemaccess" "*,atomicwait,atomicwait")
    (set_attr "length" "28")
    (set_attr "rdna" "no,*,*")])

Reply via email to