Matthew Poremba has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/35135 )

Change subject: configs,tests: Add tokens to GPU VIPER tester
......................................................................

configs,tests: Add tokens to GPU VIPER tester

This patch integrates tokens into the VIPER tester by adding a
GMTokenPort to the tester, having the tester acquire tokens for
requests that use tokens, and checking for available tokens
before issuing any requests.

Change-Id: Id317d703e4765dd5fa7de0d16f5eb595aab7096c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/35135
Maintainer: Matthew Poremba <matthew.pore...@amd.com>
Maintainer: Matt Sinclair <mattdsincl...@gmail.com>
Reviewed-by: Matt Sinclair <mattdsincl...@gmail.com>
Tested-by: kokoro <noreply+kok...@google.com>
---
M src/cpu/testers/gpu_ruby_test/ProtocolTester.py
M src/cpu/testers/gpu_ruby_test/gpu_thread.cc
M src/cpu/testers/gpu_ruby_test/gpu_thread.hh
M src/cpu/testers/gpu_ruby_test/protocol_tester.cc
M src/cpu/testers/gpu_ruby_test/protocol_tester.hh
5 files changed, 54 insertions(+), 3 deletions(-)

Approvals:
  Matt Sinclair: Looks good to me, approved; Looks good to me, approved
  Matthew Poremba: Looks good to me, approved
  kokoro: Regressions pass



diff --git a/src/cpu/testers/gpu_ruby_test/ProtocolTester.py b/src/cpu/testers/gpu_ruby_test/ProtocolTester.py
index e6874ab..ed0e0a8 100644
--- a/src/cpu/testers/gpu_ruby_test/ProtocolTester.py
+++ b/src/cpu/testers/gpu_ruby_test/ProtocolTester.py
@@ -41,6 +41,7 @@
     cu_vector_ports = VectorRequestPort("Vector ports for GPUs")
     cu_sqc_ports = VectorRequestPort("SQC ports for GPUs")
     cu_scalar_ports = VectorRequestPort("Scalar ports for GPUs")
+    cu_token_ports = VectorRequestPort("Token ports for GPU")

     cus_per_sqc = Param.Int(4, "Number of CUs per SQC")
     cus_per_scalar = Param.Int(4, "Number of CUs per scalar cache")
@@ -48,6 +49,11 @@
     wavefronts_per_cu = Param.Int(1, "Number of wavefronts per CU")
     workitems_per_wavefront = Param.Int(64, "Number of workitems per wf")

+ max_cu_tokens = Param.Int(4, "Maximum number of tokens, i.e., the number"
+                                 " of instructions that can be uncoalesced"
+                                 " before back-pressure occurs from the"
+                                 " coalescer.")
+
     cpu_threads = VectorParam.CpuThread("All cpus")
     wavefronts = VectorParam.GpuWavefront("All wavefronts")

diff --git a/src/cpu/testers/gpu_ruby_test/gpu_thread.cc b/src/cpu/testers/gpu_ruby_test/gpu_thread.cc
index 7bf939b..fbf5d0d 100644
--- a/src/cpu/testers/gpu_ruby_test/gpu_thread.cc
+++ b/src/cpu/testers/gpu_ruby_test/gpu_thread.cc
@@ -125,11 +125,13 @@
 void
 GpuThread::attachGpuThreadToPorts(ProtocolTester *_tester,
                             ProtocolTester::SeqPort *_port,
+                            ProtocolTester::GMTokenPort *_tokenPort,
                             ProtocolTester::SeqPort *_scalarPort,
                             ProtocolTester::SeqPort *_sqcPort)
 {
     tester = _tester;
     port = _port;
+    tokenPort = _tokenPort;
     scalarPort = _scalarPort;
     sqcPort = _sqcPort;

@@ -163,7 +165,8 @@
                 // to complete
                 if (pendingLdStCount == 0 &&
                     pendingFenceCount == 0 &&
-                    pendingAtomicCount == 0) {
+                    pendingAtomicCount == 0 &&
+                    tokenPort->haveTokens(numLanes)) {
                     return true;
                 }

@@ -198,7 +201,8 @@
                 assert(pendingAtomicCount == 0);

                 // can't issue if there is a pending fence
-                if (pendingFenceCount > 0) {
+                if (pendingFenceCount > 0 ||
+                    !tokenPort->haveTokens(numLanes)) {
                     return false;
                 }

@@ -241,6 +245,7 @@
 {
     switch(curAction->getType()) {
         case Episode::Action::Type::ATOMIC:
+            tokenPort->acquireTokens(numLanes);
             issueAtomicOps();
             break;
         case Episode::Action::Type::ACQUIRE:
@@ -250,9 +255,11 @@
             issueReleaseOp();
             break;
         case Episode::Action::Type::LOAD:
+            tokenPort->acquireTokens(numLanes);
             issueLoadOps();
             break;
         case Episode::Action::Type::STORE:
+            tokenPort->acquireTokens(numLanes);
             issueStoreOps();
             break;
         default:
diff --git a/src/cpu/testers/gpu_ruby_test/gpu_thread.hh b/src/cpu/testers/gpu_ruby_test/gpu_thread.hh
index 9e4569b..00a69be 100644
--- a/src/cpu/testers/gpu_ruby_test/gpu_thread.hh
+++ b/src/cpu/testers/gpu_ruby_test/gpu_thread.hh
@@ -42,6 +42,7 @@
 #include "cpu/testers/gpu_ruby_test/episode.hh"
 #include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
 #include "gpu-compute/gpu_dyn_inst.hh"
+#include "mem/token_port.hh"
 #include "sim/clocked_object.hh"

 class GpuThread : public ClockedObject
@@ -61,6 +62,7 @@

     void attachGpuThreadToPorts(ProtocolTester *_tester,
                              ProtocolTester::SeqPort *_port,
+ ProtocolTester::GMTokenPort *_tokenPort = nullptr,
                              ProtocolTester::SeqPort *_sqcPort = nullptr,
ProtocolTester::SeqPort *_scalarPort = nullptr);

@@ -136,6 +138,7 @@
     AddressManager *addrManager;

ProtocolTester::SeqPort *port; // main data port (GPU-vector data)
+    ProtocolTester::GMTokenPort *tokenPort;
     ProtocolTester::SeqPort *scalarPort; // nullptr for CPU
     ProtocolTester::SeqPort *sqcPort;   // nullptr for CPU

diff --git a/src/cpu/testers/gpu_ruby_test/protocol_tester.cc b/src/cpu/testers/gpu_ruby_test/protocol_tester.cc
index 98eda49..c4baa20 100644
--- a/src/cpu/testers/gpu_ruby_test/protocol_tester.cc
+++ b/src/cpu/testers/gpu_ruby_test/protocol_tester.cc
@@ -53,10 +53,12 @@
         numVectorPorts(p.port_cu_vector_ports_connection_count),
         numSqcPorts(p.port_cu_sqc_ports_connection_count),
         numScalarPorts(p.port_cu_scalar_ports_connection_count),
+        numTokenPorts(p.port_cu_token_ports_connection_count),
         numCusPerSqc(p.cus_per_sqc),
         numCusPerScalar(p.cus_per_scalar),
         numWfsPerCu(p.wavefronts_per_cu),
         numWisPerWf(p.workitems_per_wavefront),
+        numCuTokens(p.max_cu_tokens),
         numAtomicLocs(p.num_atomic_locations),
         numNormalLocsPerAtomic(p.num_normal_locs_per_atomic),
         episodeLength(p.episode_length),
@@ -107,6 +109,14 @@
         idx++;
     }

+    for (int i = 0; i < numTokenPorts; ++i) {
+        cuTokenPorts.push_back(new GMTokenPort(csprintf("%s-cuTokenPort%d",
+                                                        name(), i),
+                                               this, i));
+        cuTokenManagers.push_back(new TokenManager(numCuTokens));
+        cuTokenPorts[i]->setTokenManager(cuTokenManagers[i]);
+    }
+
     // create an address manager
     addrManager = new AddressManager(numAtomicLocs,
                                        numNormalLocsPerAtomic);
@@ -194,6 +204,7 @@
             wfId = cu_id * numWfsPerCu + i;
             wfs[wfId]->attachGpuThreadToPorts(this,
static_cast<SeqPort*>(cuVectorPorts[vectorPortId]),
+                           cuTokenPorts[vectorPortId],
                            static_cast<SeqPort*>(cuSqcPorts[sqcPortId]),
static_cast<SeqPort*>(cuScalarPorts[scalarPortId]));
             wfs[wfId]->scheduleWakeup();
@@ -206,7 +217,8 @@
 ProtocolTester::getPort(const std::string &if_name, PortID idx)
 {
     if (if_name != "cpu_ports" && if_name != "cu_vector_ports" &&
-        if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports") {
+        if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports" &&
+        if_name != "cu_token_ports") {
         // pass along to super class
         return ClockedObject::getPort(if_name, idx);
     } else {
@@ -222,6 +234,10 @@
             if (idx > numSqcPorts)
                 panic("ProtocolTester: unknown cu sqc port %d\n", idx);
             return *cuSqcPorts[idx];
+        } else if (if_name == "cu_token_ports") {
+            if (idx > numTokenPorts)
+                panic("ProtocolTester: unknown cu token port %d\n", idx);
+            return *cuTokenPorts[idx];
         } else {
             assert(if_name == "cu_scalar_ports");
             if (idx > numScalarPorts)
diff --git a/src/cpu/testers/gpu_ruby_test/protocol_tester.hh b/src/cpu/testers/gpu_ruby_test/protocol_tester.hh
index c1f2997..6109e5a 100644
--- a/src/cpu/testers/gpu_ruby_test/protocol_tester.hh
+++ b/src/cpu/testers/gpu_ruby_test/protocol_tester.hh
@@ -58,6 +58,7 @@
 #include "cpu/testers/gpu_ruby_test/address_manager.hh"
 #include "mem/packet.hh"
 #include "mem/ruby/system/RubyPort.hh"
+#include "mem/token_port.hh"
 #include "params/ProtocolTester.hh"

 class GpuThread;
@@ -81,6 +82,20 @@
             { panic("%s does not expect a retry\n", name()); }
     };

+    class GMTokenPort : public TokenRequestPort
+    {
+        public:
+            GMTokenPort(const std::string& name, ProtocolTester *_tester,
+                        PortID id = InvalidPortID)
+                : TokenRequestPort(name, _tester, id)
+            {}
+            ~GMTokenPort() {}
+
+        protected:
+            bool recvTimingResp(PacketPtr) { return false; }
+            void recvReqRetry() {}
+    };
+
     struct SenderState : public Packet::SenderState
     {
         GpuThread* th;
@@ -131,10 +146,12 @@
     int numVectorPorts;
     int numSqcPorts;
     int numScalarPorts;
+    int numTokenPorts;
     int numCusPerSqc;
     int numCusPerScalar;
     int numWfsPerCu;
     int numWisPerWf;
+    int numCuTokens;
     // parameters controlling the address range that the tester can access
     int numAtomicLocs;
     int numNormalLocsPerAtomic;
@@ -150,6 +167,8 @@
     std::vector<RequestPort*> cuVectorPorts; // ports to GPU vector cache
     std::vector<RequestPort*> cuSqcPorts;    // ports to GPU inst cache
     std::vector<RequestPort*> cuScalarPorts; // ports to GPU scalar cache
+    std::vector<TokenManager*> cuTokenManagers;
+    std::vector<GMTokenPort*> cuTokenPorts;
     // all CPU and GPU threads
     std::vector<CpuThread*> cpuThreads;
     std::vector<GpuWavefront*> wfs;

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/35135
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Id317d703e4765dd5fa7de0d16f5eb595aab7096c
Gerrit-Change-Number: 35135
Gerrit-PatchSet: 10
Gerrit-Owner: Kyle Roarty <kyleroarty1...@gmail.com>
Gerrit-Reviewer: Matt Sinclair <mattdsincl...@gmail.com>
Gerrit-Reviewer: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to