Andrew Stubbs:
You still seem to have the unrelated preload bits in this patch, but other than that, this looks fine.
Now committed with that one removed: r16-1661-g750bc2899844d6
In principle, we could use %Gn everywhere and use the address space from the MEM to determine which cache to use, but that's probably overkill until we need it.
I think it would good to properly use the right sc0/sc1/nt for memory access, but I concur that's something for a bit later.
Tobias
commit 750bc2899844d662aee93476f2da63fce68535d9 Author: Tobias Burnus <tbur...@baylibre.com> Date: Tue Jun 24 23:55:27 2025 +0200 gcn: Fix glc vs. sc0 handling for scalar memory access gfx942 still uses glc for scalar access ('s_...') and only uses sc0/nt/sc1 for vector access. gcc/ChangeLog: * config/gcn/gcn-opts.h (TARGET_GLC_NAME): Fix and extend the description in the comment. * config/gcn/gcn.cc (print_operand): Extend the comment about 'G' and 'g'. * config/gcn/gcn.md: Use 'glc' instead of %G where appropriate. --- gcc/config/gcn/gcn-opts.h | 7 +++++-- gcc/config/gcn/gcn.cc | 2 ++ gcc/config/gcn/gcn.md | 30 +++++++++++++++--------------- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h index bcea14f3fe7..0bfc7869eef 100644 --- a/gcc/config/gcn/gcn-opts.h +++ b/gcc/config/gcn/gcn-opts.h @@ -84,8 +84,11 @@ enum hsaco_attr_type #define TARGET_DPP8 TARGET_RDNA2_PLUS /* Device requires CDNA1-style manually inserted wait states for AVGPRs. */ #define TARGET_AVGPR_CDNA1_NOPS TARGET_CDNA1 -/* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0, sc1) flag - for scalar memory operations. The string starts on purpose with a space. */ +/* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0) flag + for non-scalar memory operations. The string starts on purpose with a space. + Note: for scalar memory operations (i.e. 's_...'), 'glc' is still used. + CDNA3 also uses 'nt' instead of 'slc' and 'sc1' instead of 'scc'; however, + there is no non-scalar user so far. */ #define TARGET_GLC_NAME (TARGET_CDNA3 ? " sc0" : " glc") /* The metadata on different devices need different granularity. */ #define TARGET_VGPR_GRANULARITY \ diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 2d8dfa3232e..0ce5a29fbb5 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -7103,6 +7103,8 @@ print_operand_address (FILE *file, rtx mem) O - print offset:n for data share operations. G - print "glc" (or for gfx94x: sc0) unconditionally [+ indep. of regnum] g - print "glc" (or for gfx94x: sc0), if appropriate for given MEM + NOTE: Do not use 'G' or 'g with scalar memory access ('s_...') as those + require "glc" also with gfx94x. L - print low-part of a multi-reg value H - print second part of a multi-reg value (high-part of 2-reg value) J - print third part of a multi-reg value diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 1998931e052..2ce2e054fbf 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -206,7 +206,7 @@ (define_c_enum "unspec" [ ; vdata: vgpr0-255 ; srsrc: sgpr0-102 ; soffset: sgpr0-102 -; flags: offen, idxen, %G, lds, slc, tfe +; flags: offen, idxen, glc, lds, slc, tfe ; ; mtbuf - Typed memory buffer operation. Two words ; offset: 12-bit constant @@ -216,10 +216,10 @@ (define_c_enum "unspec" [ ; vdata: vgpr0-255 ; srsrc: sgpr0-102 ; soffset: sgpr0-102 -; flags: offen, idxen, %G, lds, slc, tfe +; flags: offen, idxen, glc, lds, slc, tfe ; ; flat - flat or global memory operations -; flags: %G, slc +; flags: {CDNA3: sc0, nt, sc1 | otherwise: glc, slc, scc} ; addr: vgpr0-255 ; data: vgpr0-255 ; vdst: vgpr0-255 @@ -1987,7 +1987,7 @@ (define_insn "atomic_fetch_<bare_mnemonic><mode>" (use (match_operand 3 "const_int_operand"))] "0 /* Disabled. */" "@ - s_atomic_<bare_mnemonic><X>\t%0, %1, %2 %G2\;s_waitcnt\tlgkmcnt(0) + s_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0) flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 %G2\;s_waitcnt\t0 global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)" [(set_attr "type" "smem,flat,flat") @@ -2054,7 +2054,7 @@ (define_insn "sync_compare_and_swap<mode>_insn" UNSPECV_ATOMIC))] "" "@ - s_atomic_cmpswap<X>\t%0, %1, %2 %G2\;s_waitcnt\tlgkmcnt(0) + s_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0) flat_atomic_cmpswap<X>\t%0, %1, %2 %G2\;s_waitcnt\t0 global_atomic_cmpswap<X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)" [(set_attr "type" "smem,flat,flat") @@ -2096,7 +2096,7 @@ (define_insn "atomic_load<mode>" switch (which_alternative) { case 0: - return "s_load%o0\t%0, %A1 %G1\;s_waitcnt\tlgkmcnt(0)"; + return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)"; case 1: return (TARGET_RDNA2 /* Not GFX11. */ ? "flat_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\t0" @@ -2113,7 +2113,7 @@ (define_insn "atomic_load<mode>" switch (which_alternative) { case 0: - return "s_load%o0\t%0, %A1 %G1\;s_waitcnt\tlgkmcnt(0)\;" + return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;" "s_dcache_wb_vol"; case 1: return (TARGET_RDNA2 @@ -2147,7 +2147,7 @@ (define_insn "atomic_load<mode>" switch (which_alternative) { case 0: - return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 %G1\;" + return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;" "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; case 1: return (TARGET_RDNA2 @@ -2196,7 +2196,7 @@ (define_insn "atomic_store<mode>" switch (which_alternative) { case 0: - return "s_store%o1\t%1, %A0 %G1\;s_waitcnt\tlgkmcnt(0)"; + return "s_store%o1\t%1, %A0 glc\;s_waitcnt\tlgkmcnt(0)"; case 1: return "flat_store%o1\t%A0, %1%O0 %G1\;s_waitcnt\t0"; case 2: @@ -2208,7 +2208,7 @@ (define_insn "atomic_store<mode>" switch (which_alternative) { case 0: - return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 %G1"; + return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc"; case 1: return (TARGET_GLn_CACHE ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 %G1" @@ -2233,7 +2233,7 @@ (define_insn "atomic_store<mode>" switch (which_alternative) { case 0: - return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 %G1\;" + return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;" "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; case 1: return (TARGET_GLn_CACHE @@ -2282,7 +2282,7 @@ (define_insn "atomic_exchange<mode>" switch (which_alternative) { case 0: - return "s_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\tlgkmcnt(0)"; + return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)"; case 1: return "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0"; case 2: @@ -2296,7 +2296,7 @@ (define_insn "atomic_exchange<mode>" switch (which_alternative) { case 0: - return "s_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\tlgkmcnt(0)\;" + return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;" "s_dcache_wb_vol\;s_dcache_inv_vol"; case 1: return (TARGET_GLn_CACHE @@ -2327,7 +2327,7 @@ (define_insn "atomic_exchange<mode>" switch (which_alternative) { case 0: - return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 %G1\;" + return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;" "s_waitcnt\tlgkmcnt(0)"; case 1: return (TARGET_GLn_CACHE @@ -2362,7 +2362,7 @@ (define_insn "atomic_exchange<mode>" switch (which_alternative) { case 0: - return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 %G1\;" + return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;" "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; case 1: return (TARGET_GLn_CACHE