Matthew Poremba has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/35135 )
Change subject: configs,tests: Add tokens to GPU VIPER tester
......................................................................
configs,tests: Add tokens to GPU VIPER tester
This patch integrates tokens into the VIPER tester by adding a
GMTokenPort to the tester, having the tester acquire tokens for
requests that use tokens, and checking for available tokens
before issuing any requests.
Change-Id: Id317d703e4765dd5fa7de0d16f5eb595aab7096c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/35135
Maintainer: Matthew Poremba <matthew.pore...@amd.com>
Maintainer: Matt Sinclair <mattdsincl...@gmail.com>
Reviewed-by: Matt Sinclair <mattdsincl...@gmail.com>
Tested-by: kokoro <noreply+kok...@google.com>
---
M src/cpu/testers/gpu_ruby_test/ProtocolTester.py
M src/cpu/testers/gpu_ruby_test/gpu_thread.cc
M src/cpu/testers/gpu_ruby_test/gpu_thread.hh
M src/cpu/testers/gpu_ruby_test/protocol_tester.cc
M src/cpu/testers/gpu_ruby_test/protocol_tester.hh
5 files changed, 54 insertions(+), 3 deletions(-)
Approvals:
Matt Sinclair: Looks good to me, approved; Looks good to me, approved
Matthew Poremba: Looks good to me, approved
kokoro: Regressions pass
diff --git a/src/cpu/testers/gpu_ruby_test/ProtocolTester.py
b/src/cpu/testers/gpu_ruby_test/ProtocolTester.py
index e6874ab..ed0e0a8 100644
--- a/src/cpu/testers/gpu_ruby_test/ProtocolTester.py
+++ b/src/cpu/testers/gpu_ruby_test/ProtocolTester.py
@@ -41,6 +41,7 @@
cu_vector_ports = VectorRequestPort("Vector ports for GPUs")
cu_sqc_ports = VectorRequestPort("SQC ports for GPUs")
cu_scalar_ports = VectorRequestPort("Scalar ports for GPUs")
+ cu_token_ports = VectorRequestPort("Token ports for GPU")
cus_per_sqc = Param.Int(4, "Number of CUs per SQC")
cus_per_scalar = Param.Int(4, "Number of CUs per scalar cache")
@@ -48,6 +49,11 @@
wavefronts_per_cu = Param.Int(1, "Number of wavefronts per CU")
workitems_per_wavefront = Param.Int(64, "Number of workitems per wf")
+ max_cu_tokens = Param.Int(4, "Maximum number of tokens, i.e., the
number"
+ " of instructions that can be uncoalesced"
+ " before back-pressure occurs from the"
+ " coalescer.")
+
cpu_threads = VectorParam.CpuThread("All cpus")
wavefronts = VectorParam.GpuWavefront("All wavefronts")
diff --git a/src/cpu/testers/gpu_ruby_test/gpu_thread.cc
b/src/cpu/testers/gpu_ruby_test/gpu_thread.cc
index 7bf939b..fbf5d0d 100644
--- a/src/cpu/testers/gpu_ruby_test/gpu_thread.cc
+++ b/src/cpu/testers/gpu_ruby_test/gpu_thread.cc
@@ -125,11 +125,13 @@
void
GpuThread::attachGpuThreadToPorts(ProtocolTester *_tester,
ProtocolTester::SeqPort *_port,
+ ProtocolTester::GMTokenPort *_tokenPort,
ProtocolTester::SeqPort *_scalarPort,
ProtocolTester::SeqPort *_sqcPort)
{
tester = _tester;
port = _port;
+ tokenPort = _tokenPort;
scalarPort = _scalarPort;
sqcPort = _sqcPort;
@@ -163,7 +165,8 @@
// to complete
if (pendingLdStCount == 0 &&
pendingFenceCount == 0 &&
- pendingAtomicCount == 0) {
+ pendingAtomicCount == 0 &&
+ tokenPort->haveTokens(numLanes)) {
return true;
}
@@ -198,7 +201,8 @@
assert(pendingAtomicCount == 0);
// can't issue if there is a pending fence
- if (pendingFenceCount > 0) {
+ if (pendingFenceCount > 0 ||
+ !tokenPort->haveTokens(numLanes)) {
return false;
}
@@ -241,6 +245,7 @@
{
switch(curAction->getType()) {
case Episode::Action::Type::ATOMIC:
+ tokenPort->acquireTokens(numLanes);
issueAtomicOps();
break;
case Episode::Action::Type::ACQUIRE:
@@ -250,9 +255,11 @@
issueReleaseOp();
break;
case Episode::Action::Type::LOAD:
+ tokenPort->acquireTokens(numLanes);
issueLoadOps();
break;
case Episode::Action::Type::STORE:
+ tokenPort->acquireTokens(numLanes);
issueStoreOps();
break;
default:
diff --git a/src/cpu/testers/gpu_ruby_test/gpu_thread.hh
b/src/cpu/testers/gpu_ruby_test/gpu_thread.hh
index 9e4569b..00a69be 100644
--- a/src/cpu/testers/gpu_ruby_test/gpu_thread.hh
+++ b/src/cpu/testers/gpu_ruby_test/gpu_thread.hh
@@ -42,6 +42,7 @@
#include "cpu/testers/gpu_ruby_test/episode.hh"
#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
+#include "mem/token_port.hh"
#include "sim/clocked_object.hh"
class GpuThread : public ClockedObject
@@ -61,6 +62,7 @@
void attachGpuThreadToPorts(ProtocolTester *_tester,
ProtocolTester::SeqPort *_port,
+ ProtocolTester::GMTokenPort *_tokenPort =
nullptr,
ProtocolTester::SeqPort *_sqcPort = nullptr,
ProtocolTester::SeqPort *_scalarPort =
nullptr);
@@ -136,6 +138,7 @@
AddressManager *addrManager;
ProtocolTester::SeqPort *port; // main data port (GPU-vector
data)
+ ProtocolTester::GMTokenPort *tokenPort;
ProtocolTester::SeqPort *scalarPort; // nullptr for CPU
ProtocolTester::SeqPort *sqcPort; // nullptr for CPU
diff --git a/src/cpu/testers/gpu_ruby_test/protocol_tester.cc
b/src/cpu/testers/gpu_ruby_test/protocol_tester.cc
index 98eda49..c4baa20 100644
--- a/src/cpu/testers/gpu_ruby_test/protocol_tester.cc
+++ b/src/cpu/testers/gpu_ruby_test/protocol_tester.cc
@@ -53,10 +53,12 @@
numVectorPorts(p.port_cu_vector_ports_connection_count),
numSqcPorts(p.port_cu_sqc_ports_connection_count),
numScalarPorts(p.port_cu_scalar_ports_connection_count),
+ numTokenPorts(p.port_cu_token_ports_connection_count),
numCusPerSqc(p.cus_per_sqc),
numCusPerScalar(p.cus_per_scalar),
numWfsPerCu(p.wavefronts_per_cu),
numWisPerWf(p.workitems_per_wavefront),
+ numCuTokens(p.max_cu_tokens),
numAtomicLocs(p.num_atomic_locations),
numNormalLocsPerAtomic(p.num_normal_locs_per_atomic),
episodeLength(p.episode_length),
@@ -107,6 +109,14 @@
idx++;
}
+ for (int i = 0; i < numTokenPorts; ++i) {
+ cuTokenPorts.push_back(new GMTokenPort(csprintf("%s-cuTokenPort%d",
+ name(), i),
+ this, i));
+ cuTokenManagers.push_back(new TokenManager(numCuTokens));
+ cuTokenPorts[i]->setTokenManager(cuTokenManagers[i]);
+ }
+
// create an address manager
addrManager = new AddressManager(numAtomicLocs,
numNormalLocsPerAtomic);
@@ -194,6 +204,7 @@
wfId = cu_id * numWfsPerCu + i;
wfs[wfId]->attachGpuThreadToPorts(this,
static_cast<SeqPort*>(cuVectorPorts[vectorPortId]),
+ cuTokenPorts[vectorPortId],
static_cast<SeqPort*>(cuSqcPorts[sqcPortId]),
static_cast<SeqPort*>(cuScalarPorts[scalarPortId]));
wfs[wfId]->scheduleWakeup();
@@ -206,7 +217,8 @@
ProtocolTester::getPort(const std::string &if_name, PortID idx)
{
if (if_name != "cpu_ports" && if_name != "cu_vector_ports" &&
- if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports") {
+ if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports" &&
+ if_name != "cu_token_ports") {
// pass along to super class
return ClockedObject::getPort(if_name, idx);
} else {
@@ -222,6 +234,10 @@
if (idx > numSqcPorts)
panic("ProtocolTester: unknown cu sqc port %d\n", idx);
return *cuSqcPorts[idx];
+ } else if (if_name == "cu_token_ports") {
+ if (idx > numTokenPorts)
+ panic("ProtocolTester: unknown cu token port %d\n", idx);
+ return *cuTokenPorts[idx];
} else {
assert(if_name == "cu_scalar_ports");
if (idx > numScalarPorts)
diff --git a/src/cpu/testers/gpu_ruby_test/protocol_tester.hh
b/src/cpu/testers/gpu_ruby_test/protocol_tester.hh
index c1f2997..6109e5a 100644
--- a/src/cpu/testers/gpu_ruby_test/protocol_tester.hh
+++ b/src/cpu/testers/gpu_ruby_test/protocol_tester.hh
@@ -58,6 +58,7 @@
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
#include "mem/packet.hh"
#include "mem/ruby/system/RubyPort.hh"
+#include "mem/token_port.hh"
#include "params/ProtocolTester.hh"
class GpuThread;
@@ -81,6 +82,20 @@
{ panic("%s does not expect a retry\n", name()); }
};
+ class GMTokenPort : public TokenRequestPort
+ {
+ public:
+ GMTokenPort(const std::string& name, ProtocolTester *_tester,
+ PortID id = InvalidPortID)
+ : TokenRequestPort(name, _tester, id)
+ {}
+ ~GMTokenPort() {}
+
+ protected:
+ bool recvTimingResp(PacketPtr) { return false; }
+ void recvReqRetry() {}
+ };
+
struct SenderState : public Packet::SenderState
{
GpuThread* th;
@@ -131,10 +146,12 @@
int numVectorPorts;
int numSqcPorts;
int numScalarPorts;
+ int numTokenPorts;
int numCusPerSqc;
int numCusPerScalar;
int numWfsPerCu;
int numWisPerWf;
+ int numCuTokens;
// parameters controlling the address range that the tester can access
int numAtomicLocs;
int numNormalLocsPerAtomic;
@@ -150,6 +167,8 @@
std::vector<RequestPort*> cuVectorPorts; // ports to GPU vector cache
std::vector<RequestPort*> cuSqcPorts; // ports to GPU inst cache
std::vector<RequestPort*> cuScalarPorts; // ports to GPU scalar cache
+ std::vector<TokenManager*> cuTokenManagers;
+ std::vector<GMTokenPort*> cuTokenPorts;
// all CPU and GPU threads
std::vector<CpuThread*> cpuThreads;
std::vector<GpuWavefront*> wfs;
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/35135
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Id317d703e4765dd5fa7de0d16f5eb595aab7096c
Gerrit-Change-Number: 35135
Gerrit-PatchSet: 10
Gerrit-Owner: Kyle Roarty <kyleroarty1...@gmail.com>
Gerrit-Reviewer: Matt Sinclair <mattdsincl...@gmail.com>
Gerrit-Reviewer: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s