Andrew Stubbs:
You still seem to have the unrelated preload bits in this patch, but other than that, this looks fine.
Now committed with that one removed: r16-1661-g750bc2899844d6

In principle, we could use %Gn everywhere and use the address space from the MEM to determine which cache to use, but that's probably overkill until we need it.

I think it would good to properly use the right sc0/sc1/nt for memory access, but I concur that's something for a bit later.

Tobias
commit 750bc2899844d662aee93476f2da63fce68535d9
Author: Tobias Burnus <tbur...@baylibre.com>
Date:   Tue Jun 24 23:55:27 2025 +0200

    gcn: Fix glc vs. sc0 handling for scalar memory access
    
    gfx942 still uses glc for scalar access ('s_...') and only uses
    sc0/nt/sc1 for vector access.
    
    gcc/ChangeLog:
    
            * config/gcn/gcn-opts.h (TARGET_GLC_NAME): Fix and extend the
            description in the comment.
            * config/gcn/gcn.cc (print_operand): Extend the comment about
            'G' and 'g'.
            * config/gcn/gcn.md: Use 'glc' instead of %G where appropriate.
---
 gcc/config/gcn/gcn-opts.h |  7 +++++--
 gcc/config/gcn/gcn.cc     |  2 ++
 gcc/config/gcn/gcn.md     | 30 +++++++++++++++---------------
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index bcea14f3fe7..0bfc7869eef 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -84,8 +84,11 @@ enum hsaco_attr_type
 #define TARGET_DPP8 TARGET_RDNA2_PLUS
 /* Device requires CDNA1-style manually inserted wait states for AVGPRs.  */
 #define TARGET_AVGPR_CDNA1_NOPS TARGET_CDNA1
-/* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0, sc1) flag
-   for scalar memory operations. The string starts on purpose with a space.  */
+/* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0) flag
+   for non-scalar memory operations. The string starts on purpose with a space.
+   Note: for scalar memory operations (i.e. 's_...'), 'glc' is still used.
+   CDNA3 also uses 'nt' instead of 'slc' and 'sc1' instead of 'scc'; however,
+   there is no non-scalar user so far.  */
 #define TARGET_GLC_NAME (TARGET_CDNA3 ? " sc0" : " glc")
 /* The metadata on different devices need different granularity.  */
 #define TARGET_VGPR_GRANULARITY \
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 2d8dfa3232e..0ce5a29fbb5 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -7103,6 +7103,8 @@ print_operand_address (FILE *file, rtx mem)
    O - print offset:n for data share operations.
    G - print "glc" (or for gfx94x: sc0) unconditionally [+ indep. of regnum]
    g - print "glc" (or for gfx94x: sc0), if appropriate for given MEM
+       NOTE: Do not use 'G' or 'g with scalar memory access ('s_...') as those
+       require "glc" also with gfx94x.
    L - print low-part of a multi-reg value
    H - print second part of a multi-reg value (high-part of 2-reg value)
    J - print third part of a multi-reg value
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 1998931e052..2ce2e054fbf 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -206,7 +206,7 @@ (define_c_enum "unspec" [
 ;	 vdata: vgpr0-255
 ;	 srsrc: sgpr0-102
 ;	 soffset: sgpr0-102
-;	 flags: offen, idxen, %G, lds, slc, tfe
+;	 flags: offen, idxen, glc, lds, slc, tfe
 ;
 ; mtbuf - Typed memory buffer operation. Two words
 ;	 offset: 12-bit constant
@@ -216,10 +216,10 @@ (define_c_enum "unspec" [
 ;	 vdata: vgpr0-255
 ;	 srsrc: sgpr0-102
 ;	 soffset: sgpr0-102
-;	 flags: offen, idxen, %G, lds, slc, tfe
+;	 flags: offen, idxen, glc, lds, slc, tfe
 ;
 ; flat - flat or global memory operations
-;	 flags: %G, slc
+;	 flags: {CDNA3: sc0, nt, sc1 | otherwise: glc, slc, scc}
 ;	 addr: vgpr0-255
 ;	 data: vgpr0-255
 ;	 vdst: vgpr0-255
@@ -1987,7 +1987,7 @@ (define_insn "atomic_fetch_<bare_mnemonic><mode>"
    (use (match_operand 3 "const_int_operand"))]
   "0 /* Disabled.  */"
   "@
-   s_atomic_<bare_mnemonic><X>\t%0, %1, %2 %G2\;s_waitcnt\tlgkmcnt(0)
+   s_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
    flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 %G2\;s_waitcnt\t0
    global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
   [(set_attr "type" "smem,flat,flat")
@@ -2054,7 +2054,7 @@ (define_insn "sync_compare_and_swap<mode>_insn"
 	  UNSPECV_ATOMIC))]
   ""
   "@
-   s_atomic_cmpswap<X>\t%0, %1, %2 %G2\;s_waitcnt\tlgkmcnt(0)
+   s_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
    flat_atomic_cmpswap<X>\t%0, %1, %2 %G2\;s_waitcnt\t0
    global_atomic_cmpswap<X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
   [(set_attr "type" "smem,flat,flat")
@@ -2096,7 +2096,7 @@ (define_insn "atomic_load<mode>"
 	switch (which_alternative)
 	  {
 	  case 0:
-	    return "s_load%o0\t%0, %A1 %G1\;s_waitcnt\tlgkmcnt(0)";
+	    return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)";
 	  case 1:
 	    return (TARGET_RDNA2 /* Not GFX11.  */
 		    ? "flat_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\t0"
@@ -2113,7 +2113,7 @@ (define_insn "atomic_load<mode>"
 	switch (which_alternative)
 	  {
 	  case 0:
-	    return "s_load%o0\t%0, %A1 %G1\;s_waitcnt\tlgkmcnt(0)\;"
+	    return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;"
 		   "s_dcache_wb_vol";
 	  case 1:
 	    return (TARGET_RDNA2
@@ -2147,7 +2147,7 @@ (define_insn "atomic_load<mode>"
 	switch (which_alternative)
 	  {
 	  case 0:
-	    return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 %G1\;"
+	    return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;"
 		   "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
 	  case 1:
 	    return (TARGET_RDNA2
@@ -2196,7 +2196,7 @@ (define_insn "atomic_store<mode>"
 	switch (which_alternative)
 	  {
 	  case 0:
-	    return "s_store%o1\t%1, %A0 %G1\;s_waitcnt\tlgkmcnt(0)";
+	    return "s_store%o1\t%1, %A0 glc\;s_waitcnt\tlgkmcnt(0)";
 	  case 1:
 	    return "flat_store%o1\t%A0, %1%O0 %G1\;s_waitcnt\t0";
 	  case 2:
@@ -2208,7 +2208,7 @@ (define_insn "atomic_store<mode>"
 	switch (which_alternative)
 	  {
 	  case 0:
-	    return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 %G1";
+	    return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc";
 	  case 1:
 	    return (TARGET_GLn_CACHE
 		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 %G1"
@@ -2233,7 +2233,7 @@ (define_insn "atomic_store<mode>"
 	switch (which_alternative)
 	  {
 	  case 0:
-	    return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 %G1\;"
+	    return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;"
 		   "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
 	  case 1:
 	    return (TARGET_GLn_CACHE
@@ -2282,7 +2282,7 @@ (define_insn "atomic_exchange<mode>"
 	switch (which_alternative)
 	  {
 	  case 0:
-	    return "s_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\tlgkmcnt(0)";
+	    return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)";
 	  case 1:
 	    return "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0";
 	  case 2:
@@ -2296,7 +2296,7 @@ (define_insn "atomic_exchange<mode>"
 	switch (which_alternative)
 	  {
 	  case 0:
-	    return "s_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\tlgkmcnt(0)\;"
+	    return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;"
 		   "s_dcache_wb_vol\;s_dcache_inv_vol";
 	  case 1:
 	    return (TARGET_GLn_CACHE
@@ -2327,7 +2327,7 @@ (define_insn "atomic_exchange<mode>"
 	switch (which_alternative)
 	  {
 	  case 0:
-	    return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 %G1\;"
+	    return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
 		   "s_waitcnt\tlgkmcnt(0)";
 	  case 1:
 	    return (TARGET_GLn_CACHE
@@ -2362,7 +2362,7 @@ (define_insn "atomic_exchange<mode>"
 	switch (which_alternative)
 	  {
 	  case 0:
-	    return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 %G1\;"
+	    return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
 		   "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
 	  case 1:
 	    return (TARGET_GLn_CACHE

Reply via email to