[gem5-dev] Change in gem5/gem5[develop]: mem-ruby: update memory interfaces to support GPU ISA
Anthony Gutierrez has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/28408 ) Change subject: mem-ruby: update memory interfaces to support GPU ISA .. mem-ruby: update memory interfaces to support GPU ISA This patch deprecates HSA-based memory request types and adds new types that can be used by real ISA instructions. Change-Id: Ie107a69d8a35e9de0853f1407392ad01a8b3e930 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28408 Reviewed-by: Anthony Gutierrez Maintainer: Anthony Gutierrez Tested-by: kokoro --- M src/mem/packet.cc M src/mem/packet.hh M src/mem/request.hh M src/mem/ruby/slicc_interface/RubyRequest.hh 4 files changed, 45 insertions(+), 131 deletions(-) Approvals: Anthony Gutierrez: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 2d69ba2..1c1da21 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -181,6 +181,10 @@ { 0, InvalidCmd, "Deprecated_MessageResp" }, /* MemFenceReq -- for synchronization requests */ {SET2(IsRequest, NeedsResponse), MemFenceResp, "MemFenceReq"}, +/* MemSyncReq */ +{SET2(IsRequest, NeedsResponse), MemSyncResp, "MemSyncReq"}, +/* MemSyncResp */ +{SET1(IsResponse), InvalidCmd, "MemSyncResp"}, /* MemFenceResp -- for synchronization responses */ {SET1(IsResponse), InvalidCmd, "MemFenceResp"}, /* Cache Clean Request -- Update with the latest data all existing diff --git a/src/mem/packet.hh b/src/mem/packet.hh index d390c00..42d286a 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -110,6 +110,8 @@ SwapResp, // MessageReq and MessageResp are deprecated. MemFenceReq = SwapResp + 3, +MemSyncReq, // memory synchronization request (e.g., cache invalidate) +MemSyncResp, // memory synchronization response MemFenceResp, CleanSharedReq, CleanSharedResp, diff --git a/src/mem/request.hh b/src/mem/request.hh index 01252bf..4e0ba97 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -110,7 +110,7 @@ * STRICT_ORDER flag should be set if such reordering is * undesirable. */ -UNCACHEABLE= 0x0400, +UNCACHEABLE = 0x0400, /** * The request is required to be strictly ordered by CPU * models and is non-speculative. @@ -216,35 +216,30 @@ }; /** @} */ -typedef uint32_t MemSpaceConfigFlagsType; -typedef ::Flags MemSpaceConfigFlags; +typedef uint64_t CacheCoherenceFlagsType; +typedef ::Flags CacheCoherenceFlags; -enum : MemSpaceConfigFlagsType { -/** Has a synchronization scope been set? */ -SCOPE_VALID= 0x0001, -/** Access has Wavefront scope visibility */ -WAVEFRONT_SCOPE= 0x0002, -/** Access has Workgroup scope visibility */ -WORKGROUP_SCOPE= 0x0004, -/** Access has Device (e.g., GPU) scope visibility */ -DEVICE_SCOPE = 0x0008, -/** Access has System (e.g., CPU + GPU) scope visibility */ -SYSTEM_SCOPE = 0x0010, - -/** Global Segment */ -GLOBAL_SEGMENT = 0x0020, -/** Group Segment */ -GROUP_SEGMENT = 0x0040, -/** Private Segment */ -PRIVATE_SEGMENT= 0x0080, -/** Kergarg Segment */ -KERNARG_SEGMENT= 0x0100, -/** Readonly Segment */ -READONLY_SEGMENT = 0x0200, -/** Spill Segment */ -SPILL_SEGMENT = 0x0400, -/** Arg Segment */ -ARG_SEGMENT= 0x0800, +/** + * These bits are used to set the coherence policy + * for the GPU and are encoded in the GCN3 instructions. + * See the AMD GCN3 ISA Architecture Manual for more + * details. + * + * SLC: System Level Coherent. Accesses are forced to miss in + * the L2 cache and are coherent with system memory. + * + * GLC: Globally Coherent. Controls how reads and writes are + * handled by the L1 cache. Global here referes to the + * data being visible globally on the GPU (i.e., visible + * to all WGs). + * + * For atomics, the GLC bit is used to distinguish between + * between atomic return/no-return operations. + */ +enum : CacheCoherenceFlagsType { +/** user-policy flags */ +SLC_BIT = 0x0080, +GLC_BIT = 0x0100, }; using LocalAccessor = @@ -305,8 +300,8 @@ /** Flag structure for the request. */ Flags _flags; -/** Memory space configuraiton flag structure for the request. */ -MemSpaceConfigFlags _memSpaceConfigFlags; +/** Flags that
[gem5-dev] Change in gem5/gem5[develop]: mem-ruby: update memory interfaces to support GPU ISA
Hello Tony Gutierrez, Tuan Ta, I'd like you to do a code review. Please visit https://gem5-review.googlesource.com/c/public/gem5/+/28408 to review the following change. Change subject: mem-ruby: update memory interfaces to support GPU ISA .. mem-ruby: update memory interfaces to support GPU ISA This patch deprecates HSA-based memory request types and adds new types that can be used by real ISA instructions. Change-Id: Ie107a69d8a35e9de0853f1407392ad01a8b3e930 --- M src/mem/packet.hh M src/mem/request.hh M src/mem/ruby/slicc_interface/RubyRequest.hh 3 files changed, 40 insertions(+), 130 deletions(-) diff --git a/src/mem/packet.hh b/src/mem/packet.hh index d390c00..42d286a 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -110,6 +110,8 @@ SwapResp, // MessageReq and MessageResp are deprecated. MemFenceReq = SwapResp + 3, +MemSyncReq, // memory synchronization request (e.g., cache invalidate) +MemSyncResp, // memory synchronization response MemFenceResp, CleanSharedReq, CleanSharedResp, diff --git a/src/mem/request.hh b/src/mem/request.hh index 01252bf..2b00dce 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -110,7 +110,7 @@ * STRICT_ORDER flag should be set if such reordering is * undesirable. */ -UNCACHEABLE= 0x0400, +UNCACHEABLE = 0x0400, /** * The request is required to be strictly ordered by CPU * models and is non-speculative. @@ -216,35 +216,30 @@ }; /** @} */ -typedef uint32_t MemSpaceConfigFlagsType; -typedef ::Flags MemSpaceConfigFlags; +typedef uint64_t CacheCoherenceFlagsType; +typedef ::Flags CacheCoherenceFlags; -enum : MemSpaceConfigFlagsType { -/** Has a synchronization scope been set? */ -SCOPE_VALID= 0x0001, -/** Access has Wavefront scope visibility */ -WAVEFRONT_SCOPE= 0x0002, -/** Access has Workgroup scope visibility */ -WORKGROUP_SCOPE= 0x0004, -/** Access has Device (e.g., GPU) scope visibility */ -DEVICE_SCOPE = 0x0008, -/** Access has System (e.g., CPU + GPU) scope visibility */ -SYSTEM_SCOPE = 0x0010, - -/** Global Segment */ -GLOBAL_SEGMENT = 0x0020, -/** Group Segment */ -GROUP_SEGMENT = 0x0040, -/** Private Segment */ -PRIVATE_SEGMENT= 0x0080, -/** Kergarg Segment */ -KERNARG_SEGMENT= 0x0100, -/** Readonly Segment */ -READONLY_SEGMENT = 0x0200, -/** Spill Segment */ -SPILL_SEGMENT = 0x0400, -/** Arg Segment */ -ARG_SEGMENT= 0x0800, +/** + * These bits are used to set the coherence policy + * for the GPU and are encoded in the GCN3 instructions. + * See the AMD GCN3 ISA Architecture Manual for more + * details. + * + * SLC: System Level Coherent. Accesses are forced to miss in + * the L2 cache and are coherent with system memory. + * + * GLC: Globally Coherent. Controls how reads and writes are + * handled by the L1 cache. Global here referes to the + * data being visible globally on the GPU (i.e., visible + * to all WGs). + * + * For atomics, the GLC bit is used to distinguish between + * between atomic return/no-return operations. + */ +enum : CacheCoherenceFlagsType { +/** user-policy flags */ +SLC_BIT = 0x0080, +GLC_BIT = 0x0100, }; using LocalAccessor = @@ -305,8 +300,8 @@ /** Flag structure for the request. */ Flags _flags; -/** Memory space configuraiton flag structure for the request. */ -MemSpaceConfigFlags _memSpaceConfigFlags; +/** Flags that control how downstream cache system maintains coherence*/ +CacheCoherenceFlags _cacheCoherenceFlags; /** Private flags for field validity checking. */ PrivateFlags privateFlags; @@ -629,10 +624,11 @@ } void -setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags) +setCacheCoherenceFlags(CacheCoherenceFlags extraFlags) { +// TODO: do mem_sync_op requests have valid paddr/vaddr? assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR)); -_memSpaceConfigFlags.set(extraFlags); +_cacheCoherenceFlags.set(extraFlags); } /** Accessor function for vaddr.*/ @@ -840,82 +836,10 @@ * Accessor functions for the memory space configuration flags and used by * GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that * these are for testing only; setting extraFlags should be done via