[gem5-dev] Change in gem5/gem5[develop]: mem-ruby: update memory interfaces to support GPU ISA

2020-06-09 Thread Anthony Gutierrez (Gerrit) via gem5-dev
Anthony Gutierrez has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/28408 )


Change subject: mem-ruby: update memory interfaces to support GPU ISA
..

mem-ruby: update memory interfaces to support GPU ISA

This patch deprecates HSA-based memory request types and adds new
types that can be used by real ISA instructions.

Change-Id: Ie107a69d8a35e9de0853f1407392ad01a8b3e930
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28408
Reviewed-by: Anthony Gutierrez 
Maintainer: Anthony Gutierrez 
Tested-by: kokoro 
---
M src/mem/packet.cc
M src/mem/packet.hh
M src/mem/request.hh
M src/mem/ruby/slicc_interface/RubyRequest.hh
4 files changed, 45 insertions(+), 131 deletions(-)

Approvals:
  Anthony Gutierrez: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass



diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 2d69ba2..1c1da21 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -181,6 +181,10 @@
 { 0, InvalidCmd, "Deprecated_MessageResp" },
 /* MemFenceReq -- for synchronization requests */
 {SET2(IsRequest, NeedsResponse), MemFenceResp, "MemFenceReq"},
+/* MemSyncReq */
+{SET2(IsRequest, NeedsResponse), MemSyncResp, "MemSyncReq"},
+/* MemSyncResp */
+{SET1(IsResponse), InvalidCmd, "MemSyncResp"},
 /* MemFenceResp -- for synchronization responses */
 {SET1(IsResponse), InvalidCmd, "MemFenceResp"},
 /* Cache Clean Request -- Update with the latest data all existing
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index d390c00..42d286a 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -110,6 +110,8 @@
 SwapResp,
 // MessageReq and MessageResp are deprecated.
 MemFenceReq = SwapResp + 3,
+MemSyncReq,  // memory synchronization request (e.g., cache  
invalidate)

+MemSyncResp, // memory synchronization response
 MemFenceResp,
 CleanSharedReq,
 CleanSharedResp,
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 01252bf..4e0ba97 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -110,7 +110,7 @@
  * STRICT_ORDER flag should be set if such reordering is
  * undesirable.
  */
-UNCACHEABLE= 0x0400,
+UNCACHEABLE = 0x0400,
 /**
  * The request is required to be strictly ordered by CPU
  * models and is non-speculative.
@@ -216,35 +216,30 @@
 };
 /** @} */

-typedef uint32_t MemSpaceConfigFlagsType;
-typedef ::Flags MemSpaceConfigFlags;
+typedef uint64_t CacheCoherenceFlagsType;
+typedef ::Flags CacheCoherenceFlags;

-enum : MemSpaceConfigFlagsType {
-/** Has a synchronization scope been set? */
-SCOPE_VALID= 0x0001,
-/** Access has Wavefront scope visibility */
-WAVEFRONT_SCOPE= 0x0002,
-/** Access has Workgroup scope visibility */
-WORKGROUP_SCOPE= 0x0004,
-/** Access has Device (e.g., GPU) scope visibility */
-DEVICE_SCOPE   = 0x0008,
-/** Access has System (e.g., CPU + GPU) scope visibility */
-SYSTEM_SCOPE   = 0x0010,
-
-/** Global Segment */
-GLOBAL_SEGMENT = 0x0020,
-/** Group Segment */
-GROUP_SEGMENT  = 0x0040,
-/** Private Segment */
-PRIVATE_SEGMENT= 0x0080,
-/** Kergarg Segment */
-KERNARG_SEGMENT= 0x0100,
-/** Readonly Segment */
-READONLY_SEGMENT   = 0x0200,
-/** Spill Segment */
-SPILL_SEGMENT  = 0x0400,
-/** Arg Segment */
-ARG_SEGMENT= 0x0800,
+/**
+ * These bits are used to set the coherence policy
+ * for the GPU and are encoded in the GCN3 instructions.
+ * See the AMD GCN3 ISA Architecture Manual for more
+ * details.
+ *
+ * SLC: System Level Coherent. Accesses are forced to miss in
+ *  the L2 cache and are coherent with system memory.
+ *
+ * GLC: Globally Coherent. Controls how reads and writes are
+ *  handled by the L1 cache. Global here referes to the
+ *  data being visible globally on the GPU (i.e., visible
+ *  to all WGs).
+ *
+ * For atomics, the GLC bit is used to distinguish between
+ * between atomic return/no-return operations.
+ */
+enum : CacheCoherenceFlagsType {
+/** user-policy flags */
+SLC_BIT = 0x0080,
+GLC_BIT = 0x0100,
 };

 using LocalAccessor =
@@ -305,8 +300,8 @@
 /** Flag structure for the request. */
 Flags _flags;

-/** Memory space configuraiton flag structure for the request. */
-MemSpaceConfigFlags _memSpaceConfigFlags;
+/** Flags that 

[gem5-dev] Change in gem5/gem5[develop]: mem-ruby: update memory interfaces to support GPU ISA

2020-04-30 Thread Anthony Gutierrez (Gerrit) via gem5-dev

Hello Tony Gutierrez, Tuan Ta,

I'd like you to do a code review. Please visit

https://gem5-review.googlesource.com/c/public/gem5/+/28408

to review the following change.


Change subject: mem-ruby: update memory interfaces to support GPU ISA
..

mem-ruby: update memory interfaces to support GPU ISA

This patch deprecates HSA-based memory request types and adds new
types that can be used by real ISA instructions.

Change-Id: Ie107a69d8a35e9de0853f1407392ad01a8b3e930
---
M src/mem/packet.hh
M src/mem/request.hh
M src/mem/ruby/slicc_interface/RubyRequest.hh
3 files changed, 40 insertions(+), 130 deletions(-)



diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index d390c00..42d286a 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -110,6 +110,8 @@
 SwapResp,
 // MessageReq and MessageResp are deprecated.
 MemFenceReq = SwapResp + 3,
+MemSyncReq,  // memory synchronization request (e.g., cache  
invalidate)

+MemSyncResp, // memory synchronization response
 MemFenceResp,
 CleanSharedReq,
 CleanSharedResp,
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 01252bf..2b00dce 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -110,7 +110,7 @@
  * STRICT_ORDER flag should be set if such reordering is
  * undesirable.
  */
-UNCACHEABLE= 0x0400,
+UNCACHEABLE = 0x0400,
 /**
  * The request is required to be strictly ordered by CPU
  * models and is non-speculative.
@@ -216,35 +216,30 @@
 };
 /** @} */

-typedef uint32_t MemSpaceConfigFlagsType;
-typedef ::Flags MemSpaceConfigFlags;
+typedef uint64_t CacheCoherenceFlagsType;
+typedef ::Flags CacheCoherenceFlags;

-enum : MemSpaceConfigFlagsType {
-/** Has a synchronization scope been set? */
-SCOPE_VALID= 0x0001,
-/** Access has Wavefront scope visibility */
-WAVEFRONT_SCOPE= 0x0002,
-/** Access has Workgroup scope visibility */
-WORKGROUP_SCOPE= 0x0004,
-/** Access has Device (e.g., GPU) scope visibility */
-DEVICE_SCOPE   = 0x0008,
-/** Access has System (e.g., CPU + GPU) scope visibility */
-SYSTEM_SCOPE   = 0x0010,
-
-/** Global Segment */
-GLOBAL_SEGMENT = 0x0020,
-/** Group Segment */
-GROUP_SEGMENT  = 0x0040,
-/** Private Segment */
-PRIVATE_SEGMENT= 0x0080,
-/** Kergarg Segment */
-KERNARG_SEGMENT= 0x0100,
-/** Readonly Segment */
-READONLY_SEGMENT   = 0x0200,
-/** Spill Segment */
-SPILL_SEGMENT  = 0x0400,
-/** Arg Segment */
-ARG_SEGMENT= 0x0800,
+/**
+ * These bits are used to set the coherence policy
+ * for the GPU and are encoded in the GCN3 instructions.
+ * See the AMD GCN3 ISA Architecture Manual for more
+ * details.
+ *
+ * SLC: System Level Coherent. Accesses are forced to miss in
+ *  the L2 cache and are coherent with system memory.
+ *
+ * GLC: Globally Coherent. Controls how reads and writes are
+ *  handled by the L1 cache. Global here referes to the
+ *  data being visible globally on the GPU (i.e., visible
+ *  to all WGs).
+ *
+ * For atomics, the GLC bit is used to distinguish between
+ * between atomic return/no-return operations.
+ */
+enum : CacheCoherenceFlagsType {
+/** user-policy flags */
+SLC_BIT = 0x0080,
+GLC_BIT = 0x0100,
 };

 using LocalAccessor =
@@ -305,8 +300,8 @@
 /** Flag structure for the request. */
 Flags _flags;

-/** Memory space configuraiton flag structure for the request. */
-MemSpaceConfigFlags _memSpaceConfigFlags;
+/** Flags that control how downstream cache system maintains  
coherence*/

+CacheCoherenceFlags _cacheCoherenceFlags;

 /** Private flags for field validity checking. */
 PrivateFlags privateFlags;
@@ -629,10 +624,11 @@
 }

 void
-setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags)
+setCacheCoherenceFlags(CacheCoherenceFlags extraFlags)
 {
+// TODO: do mem_sync_op requests have valid paddr/vaddr?
 assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR));
-_memSpaceConfigFlags.set(extraFlags);
+_cacheCoherenceFlags.set(extraFlags);
 }

 /** Accessor function for vaddr.*/
@@ -840,82 +836,10 @@
  * Accessor functions for the memory space configuration flags and  
used by
  * GPU ISAs such as the Heterogeneous System Architecture (HSA). Note  
that

  * these are for testing only; setting extraFlags should be done via