Hello Tuan Ta,
I'd like you to do a code review. Please visit
https://gem5-review.googlesource.com/c/public/gem5/+/32858
to review the following change.
Change subject: mem-ruby: fix integration issues between GCN3 and VIPER
......................................................................
mem-ruby: fix integration issues between GCN3 and VIPER
This patch basically changes how coalescer supports two different cases:
(1) using real CUs and (2) using tester threads.
This patch keeps the implementation of writeCompleteCallback in
GPUCoalescer instead of placing it in VIPER-specific coalescer.
Change-Id: I6d54795ac60d7aa020bc94b853dc3673f59762ba
---
M configs/example/ruby_gpu_random_test.py
M src/cpu/testers/gpu_ruby_test/AddressManager.cc
M src/cpu/testers/gpu_ruby_test/GpuWavefront.cc
M src/cpu/testers/gpu_ruby_test/GpuWavefront.hh
M src/cpu/testers/gpu_ruby_test/ProtocolTester.cc
M src/cpu/testers/gpu_ruby_test/ProtocolTester.hh
M src/mem/ruby/protocol/GPU_VIPER-TCP.sm
M src/mem/ruby/protocol/GPU_VIPER-msg.sm
M src/mem/ruby/system/GPUCoalescer.cc
M src/mem/ruby/system/GPUCoalescer.hh
M src/mem/ruby/system/VIPERCoalescer.cc
M src/mem/ruby/system/VIPERCoalescer.hh
12 files changed, 197 insertions(+), 143 deletions(-)
diff --git a/configs/example/ruby_gpu_random_test.py
b/configs/example/ruby_gpu_random_test.py
index d40a942..7e79d9e 100644
--- a/configs/example/ruby_gpu_random_test.py
+++ b/configs/example/ruby_gpu_random_test.py
@@ -98,7 +98,97 @@
assert(options.num_compute_units >= 1)
n_cu = options.num_compute_units
-options.num_sqc = int((n_cu + options.cu_per_sqc - 1) //
options.cu_per_sqc)
+#
+# Set up system size - 3 options
+#
+if (options.system_size == 0):
+ # 1 CU, 1 CPU, 1 SQC, 1 Scalar
+ options.wf_size = 1
+ options.wavefronts_per_cu = 1
+ options.num_cpus = 1
+ options.cu_per_sqc = 1
+ options.cu_per_scalar_cache = 1
+ options.num_compute_units = 1
+elif (options.system_size == 1):
+ # 4 CUs, 4 CPUs, 1 SQCs, 1 Scalars
+ options.wf_size = 16
+ options.wavefronts_per_cu = 4
+ options.num_cpus = 4
+ options.cu_per_sqc = 4
+ options.cu_per_scalar_cache = 4
+ options.num_compute_units = 4
+elif (options.system_size == 2):
+ # 8 CUs, 4 CPUs, 1 SQCs, 1 Scalars
+ options.wf_size = 32
+ options.wavefronts_per_cu = 4
+ options.num_cpus = 4
+ options.cu_per_sqc = 4
+ options.cu_per_scalar_cache = 4
+ options.num_compute_units = 8
+else:
+ print("Error: option system size '%s' not recognized",
options.system_size)
+ sys.exit(1)
+
+#
+# set address range - 2 options
+# level 0: small
+# level 1: large
+# each location corresponds to a 4-byte piece of data
+#
+options.mem_size = '1024MB'
+num_atomic_locs = 10
+num_regular_locs_per_atomic_loc = 10000
+if (options.address_range == 1):
+ num_atomic_locs = 100
+ num_regular_locs_per_atomic_loc = 100000
+elif (options.address_range != 0):
+ print("Error: option address_range '%s' not recognized", \
+ options.address_range)
+ sys.exit(1)
+
+#
+# set episode length (# of actions per episode) - 3 options
+# 0: 10 actions
+# 1: 100 actions
+# 2: 500 actions
+#
+eps_length = 10
+if (options.episode_length == 1):
+ eps_length = 100
+elif (options.episode_length == 2):
+ eps_length = 500
+elif (options.episode_length != 0):
+ print("Error: option episode_length '%s' not recognized",
+ options.episode_length)
+ sys.exit(1)
+
+# set the Ruby's and tester's deadlock thresholds
+# the Ruby's deadlock detection is the primary check for deadlock.
+# the tester's deadlock threshold detection is a secondary check for
deadlock
+# if there is a bug in RubyPort that causes a packet not to return to the
+# tester properly, the tester will throw a deadlock exception.
+# we set cache_deadlock_threshold < tester_deadlock_threshold to detect
+# deadlock caused by Ruby protocol first before one caused by the coalescer
+options.cache_deadlock_threshold = 100000000
+tester_deadlock_threshold = 1000000000
+
+# for now, we're testing only GPU protocol, so we set num_cpus to 0
+options.num_cpus = 0
+# number of CPUs and CUs
+n_CPUs = options.num_cpus
+n_CUs = options.num_compute_units
+# set test length, i.e., number of episodes per wavefront * #WFs
+# test length can be 1x#WFs, 10x#WFs, 100x#WFs, ...
+n_WFs = n_CUs * options.wavefronts_per_cu
+max_episodes = options.test_length * n_WFs
+# number of SQC and Scalar caches
+assert(n_CUs % options.cu_per_sqc == 0)
+n_SQCs = int(n_CUs/options.cu_per_sqc)
+options.num_sqc = n_SQCs
+assert(n_CUs % options.cu_per_scalar_cache == 0)
+assert(options.cu_per_scalar_cache != 0)
+n_Scalars = int(n_CUs/options.cu_per_scalar_cache)
+options.num_scalar_cache = n_Scalars
if args:
print("Error: script doesn't take any positional arguments")
diff --git a/src/cpu/testers/gpu_ruby_test/AddressManager.cc
b/src/cpu/testers/gpu_ruby_test/AddressManager.cc
index 6ef52fc..3d9b161 100644
--- a/src/cpu/testers/gpu_ruby_test/AddressManager.cc
+++ b/src/cpu/testers/gpu_ruby_test/AddressManager.cc
@@ -38,6 +38,7 @@
#include <algorithm>
#include "base/intmath.hh"
+#include "base/logging.hh"
#include "base/random.hh"
#include "base/trace.hh"
@@ -314,8 +315,8 @@
// increment the location's number of owners
loc_prop.second++;
} else {
- panic("Location in loadStoreMap but wasn't picked "
- "in any action\n");
+ panic("Location in loadStoreMap but wasn't picked in any"
+ " action\n");
}
}
@@ -376,7 +377,14 @@
ExpectedValueSet::iterator it = expectedValues.find(val);
if (it == expectedValues.end()) {
- return false;
+ std::stringstream exp_val_ss;
+ for (auto& val : expectedValues) {
+ exp_val_ss << " " << val;
+ }
+
+ warn("Expected return values are:\n\t%s\n", exp_val_ss.str());
+
+ return false;
}
// erase this value b/c it's done
diff --git a/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc
b/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc
index 699776c..822fb26 100644
--- a/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc
+++ b/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc
@@ -81,14 +81,9 @@
// for now, assert address is 4-byte aligned
assert(address % load_size == 0);
- Request *req = new Request(0, // asid
- address, // virtual
addr
- load_size, // size in
bytes
- 0, // flags
- tester->masterId(), // port id
- 0, // pc
- threadId, // thread_id
- 0);
+ auto req = std::make_shared<Request>(address, load_size,
+ 0, tester->masterId(),
+ 0, threadId, nullptr);
req->setPaddr(address);
req->setReqInstSeqNum(tester->getActionSeqNum());
// set protocol-specific flags
@@ -103,7 +98,7 @@
pendingLdStCount++;
if (!port->sendTimingReq(pkt)) {
- retryPkts.push_back(pkt);
+ panic("Not expected failed sendTimingReq\n");
}
// insert an outstanding load
@@ -138,14 +133,9 @@
curEpisode->getEpisodeId(), printAddress(address),
new_value);
- Request *req = new Request(0, // asid
- address, // virtual
addr
- sizeof(Value), // size in
bytes
- 0, // flags
- tester->masterId(), // port id
- 0, // pc
- threadId, // thread_id
- 0);
+ auto req = std::make_shared<Request>(address, sizeof(Value),
+ 0, tester->masterId(), 0,
+ threadId, nullptr);
req->setPaddr(address);
req->setReqInstSeqNum(tester->getActionSeqNum());
// set protocol-specific flags
@@ -163,7 +153,7 @@
pendingLdStCount++;
if (!port->sendTimingReq(pkt)) {
- retryPkts.push_back(pkt);
+ panic("Not expecting a failed sendTimingReq\n");
}
// add an outstanding store
@@ -198,14 +188,11 @@
// must be aligned with store size
assert(address % sizeof(Value) == 0);
- Request *req = new Request(0, // asid
- address, // virtual addr
- sizeof(Value), // size in bytes
- flags, // flags
- tester->masterId(), // port id
- 0, // pc
- threadId, // thread_id
- new AtomicOpInc<Value>());
+ AtomicOpFunctor *amo_op = new AtomicOpInc<Value>();
+ auto req = std::make_shared<Request>(address, sizeof(Value),
+ flags, tester->masterId(),
+ 0, threadId,
+ AtomicOpFunctorPtr(amo_op));
req->setPaddr(address);
req->setReqInstSeqNum(tester->getActionSeqNum());
// set protocol-specific flags
@@ -217,7 +204,7 @@
pkt->senderState = new ProtocolTester::SenderState(this);
if (!port->sendTimingReq(pkt)) {
- retryPkts.push_back(pkt);
+ panic("Not expecting failed sendTimingReq\n");
}
// increment the number of outstanding atomic ops
@@ -241,14 +228,9 @@
assert(pendingLdStCount == 0);
assert(pendingAtomicCount == 0);
- Request *acq_req = new Request(0,
- 0, // vaddr
- 0, // request size
- 0, // flags
- tester->masterId(),
- 0,
- threadId,
- 0);
+ auto acq_req = std::make_shared<Request>(0, 0, 0,
+ tester->masterId(), 0,
+ threadId, nullptr);
acq_req->setPaddr(0);
acq_req->setReqInstSeqNum(tester->getActionSeqNum());
acq_req->setFlags(Request::ACQUIRE);
@@ -262,7 +244,7 @@
pendingFenceCount++;
if (!port->sendTimingReq(pkt)) {
- retryPkts.push_back(pkt);
+ panic("Not expecting failed sendTimingReq\n");
}
}
@@ -287,7 +269,7 @@
{
assert(pkt);
MemCmd resp_cmd = pkt->cmd;
- Addr addr = pkt->getAddr();
+ Addr addr = (resp_cmd == MemCmd::MemSyncResp) ? 0 : pkt->getAddr();
DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - "
"Addr %s\n", this->getName(),
@@ -362,7 +344,7 @@
// this Atomic is done
pendingAtomicCount--;
- } else if (resp_cmd == MemCmd::MessageResp) {
+ } else if (resp_cmd == MemCmd::WriteCompleteResp) {
// write completion ACK
assert(pendingLdStCount > 0);
assert(pendingAtomicCount == 0);
@@ -376,7 +358,6 @@
if (isTransactionDone) {
// no need to keep senderState and request around
delete pkt->senderState;
- delete pkt->req;
}
delete pkt;
@@ -391,17 +372,7 @@
}
void
-GpuWavefront::setExtraRequestFlags(Request* req)
+GpuWavefront::setExtraRequestFlags(RequestPtr req)
{
// No extra request flag is set
}
-
-void
-GpuWavefront::retry()
-{
- while (retryPkts.size() > 0) {
- if (!port->sendTimingReq(retryPkts.front()))
- break;
- retryPkts.pop_front();
- }
-}
diff --git a/src/cpu/testers/gpu_ruby_test/GpuWavefront.hh
b/src/cpu/testers/gpu_ruby_test/GpuWavefront.hh
index e17da0e..fa9528b 100644
--- a/src/cpu/testers/gpu_ruby_test/GpuWavefront.hh
+++ b/src/cpu/testers/gpu_ruby_test/GpuWavefront.hh
@@ -52,9 +52,6 @@
virtual void hitCallback(PacketPtr pkt);
- // retry failed packets
- void retry();
-
protected:
void issueLoadOps();
void issueStoreOps();
@@ -64,11 +61,10 @@
virtual void issueAcquireOp();
virtual void issueReleaseOp();
// set extra request flags that is specific to a target protocol
- virtual void setExtraRequestFlags(Request* req);
+ virtual void setExtraRequestFlags(RequestPtr req);
protected:
int cuId; // compute unit associated with this wavefront
- std::deque<PacketPtr> retryPkts;
};
#endif /* CPU_TESTERS_PROTOCOL_TESTER_GPUWAVEFRONT_HH_ */
diff --git a/src/cpu/testers/gpu_ruby_test/ProtocolTester.cc
b/src/cpu/testers/gpu_ruby_test/ProtocolTester.cc
index 1a9389b..8235b44 100644
--- a/src/cpu/testers/gpu_ruby_test/ProtocolTester.cc
+++ b/src/cpu/testers/gpu_ruby_test/ProtocolTester.cc
@@ -50,7 +50,7 @@
ProtocolTester::ProtocolTester(const Params *p)
: MemObject(p),
- _masterId(p->system->getMasterId(this, name())),
+ _masterId(p->system->getMasterId(this)),
numCpuPorts(p->port_cpu_ports_connection_count),
numVectorPorts(p->port_cu_vector_ports_connection_count),
numSqcPorts(p->port_cu_sqc_ports_connection_count),
@@ -190,16 +190,13 @@
for (int cu_id = 0; cu_id < numCus; ++cu_id) {
vectorPortId = cu_id;
sqcPortId = cu_id/numCusPerSqc;
- // no scalar port if 'numCusPerScalar' is '0'
- if (numCusPerScalar != 0)
- scalarPortId = cu_id/numCusPerScalar;
+ scalarPortId = cu_id/numCusPerScalar;
for (int i = 0; i < numWfsPerCu; ++i) {
wfId = cu_id * numWfsPerCu + i;
wfs[wfId]->attachThreadToPorts(this,
static_cast<SeqPort*>(cuVectorPorts[vectorPortId]),
static_cast<SeqPort*>(cuSqcPorts[sqcPortId]),
- !numCusPerScalar ? nullptr :
static_cast<SeqPort*>(cuScalarPorts[scalarPortId]));
wfs[wfId]->scheduleWakeup();
wfs[wfId]->scheduleDeadlockCheckEvent();
@@ -207,13 +204,13 @@
}
}
-BaseMasterPort &
-ProtocolTester::getMasterPort(const std::string & if_name, PortID idx)
+Port&
+ProtocolTester::getPort(const std::string &if_name, PortID idx)
{
if (if_name != "cpu_ports" && if_name != "cu_vector_ports" &&
if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports") {
// pass along to super class
- return MemObject::getMasterPort(if_name, idx);
+ return MemObject::getPort(if_name, idx);
} else {
if (if_name == "cpu_ports") {
if (idx > numCpuPorts)
@@ -245,8 +242,7 @@
if (!sentExitSignal) {
// all done
inform("Total completed episodes: %d\n", nextEpisodeId - 1);
- inform("Protocol Test: Passed!\n");
- exitSimLoop("ProtocolTester completed!");
+ exitSimLoop("GPU Ruby Tester: Passed!");
sentExitSignal = true;
}
return true;
@@ -292,17 +288,9 @@
ccprintf(*(logFile->stream()), "%s", ss.str());
logFile->stream()->flush();
- // exit the sim loop
- exitSimLoop("GPU Ruby Tester: Failed!", -1);
sentExitSignal = true;
- }
-}
-
-void ProtocolTester::retry()
-{
- // FIXME for now, just let all wavefronts retry
- for (auto wf : wfs) {
- wf->retry();
+ // terminate the simulation
+ panic("GPU Ruby Tester: Failed!\n");
}
}
@@ -319,12 +307,6 @@
return true;
}
-void
-ProtocolTester::SeqPort::recvReqRetry()
-{
- tester->retry();
-}
-
ProtocolTester*
ProtocolTesterParams::create()
{
diff --git a/src/cpu/testers/gpu_ruby_test/ProtocolTester.hh
b/src/cpu/testers/gpu_ruby_test/ProtocolTester.hh
index ba3fdac..2264d1c 100644
--- a/src/cpu/testers/gpu_ruby_test/ProtocolTester.hh
+++ b/src/cpu/testers/gpu_ruby_test/ProtocolTester.hh
@@ -75,16 +75,13 @@
public:
SeqPort(const std::string &_name, ProtocolTester *_tester, PortID
_id,
PortID _index)
- : MasterPort(_name, _tester, _id),
- tester(_tester)
+ : MasterPort(_name, _tester, _id)
{}
protected:
virtual bool recvTimingResp(PacketPtr pkt);
- virtual void recvReqRetry();
-
- private:
- ProtocolTester* tester;
+ virtual void recvReqRetry()
+ { panic("%s does not expect a retry\n", name()); }
};
struct SenderState : public Packet::SenderState
@@ -110,8 +107,8 @@
void init();
MasterID masterId() { return _masterId; };
- virtual BaseMasterPort &getMasterPort(const std::string &if_name,
- PortID idx = InvalidPortID);
+ Port& getPort(const std::string &if_name,
+ PortID idx=InvalidPortID) override;
int getEpisodeLength() const { return episodeLength; }
// return pointer to the address manager
@@ -125,8 +122,6 @@
int getNextEpisodeID() { return nextEpisodeId++; }
// get action sequence number
int getActionSeqNum() { return actionCount++; }
- // retry all failed request pkts
- void retry();
// dump error log into a file and exit the simulation
void dumpErrorLog(std::stringstream& ss);
diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
index aafe5a4..511d584 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
@@ -495,7 +495,7 @@
assert(false);
} else {
peek(responseToTCP_in, ResponseMsg) {
- coalescer.writeCompleteCallback(address, in_msg.instSeqNum);
+ coalescer.writeCompleteCallback(address, in_msg.instSeqNum,
MachineType:L1Cache);
}
}
}
diff --git a/src/mem/ruby/protocol/GPU_VIPER-msg.sm
b/src/mem/ruby/protocol/GPU_VIPER-msg.sm
index 6011748..04172ed 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-msg.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-msg.sm
@@ -44,6 +44,7 @@
Cycles, Cycles, Cycles);
void writeCallback(Addr, MachineType, DataBlock,
Cycles, Cycles, Cycles, bool);
+ void writeCompleteCallback(Addr, uint64_t, MachineType);
void evictionCallback(Addr);
void recordCPReadCallBack(MachineID, MachineID);
void recordCPWriteCallBack(MachineID, MachineID);
@@ -64,6 +65,6 @@
Cycles, Cycles, Cycles, bool);
void atomicCallback(Addr, MachineType, DataBlock);
void invTCPCallback(Addr);
- void writeCompleteCallback(Addr, uint64_t);
+ void writeCompleteCallback(Addr, uint64_t, MachineType);
void evictionCallback(Addr);
}
diff --git a/src/mem/ruby/system/GPUCoalescer.cc
b/src/mem/ruby/system/GPUCoalescer.cc
index 80bc19a..1a44e55 100644
--- a/src/mem/ruby/system/GPUCoalescer.cc
+++ b/src/mem/ruby/system/GPUCoalescer.cc
@@ -671,22 +671,8 @@
// instruction.
RubyPort::MemSlavePort* mem_slave_port = ss->port;
- GPUDynInstPtr gpuDynInst = nullptr;
-
- if (!m_usingRubyTester) {
- // If this coalescer is connected to a real CU, we need
- // to save the corresponding gpu dynamic instruction.
- // CU will use that instruction to decrement wait counters
- // in the issuing wavefront.
- // For Ruby tester, gpuDynInst == nullptr
- ComputeUnit::DataPort::SenderState* cu_state =
- safe_cast<ComputeUnit::DataPort::SenderState*>
- (ss->predecessor);
- gpuDynInst = cu_state->_gpuDynInst;
- }
-
PendingWriteInst& inst = pendingWriteInsts[seqNum];
- inst.addPendingReq(mem_slave_port, gpuDynInst,
m_usingRubyTester);
+ inst.addPendingReq(mem_slave_port, pkt, m_usingRubyTester);
}
return true;
diff --git a/src/mem/ruby/system/GPUCoalescer.hh
b/src/mem/ruby/system/GPUCoalescer.hh
index 401f70b..ca53c7f 100644
--- a/src/mem/ruby/system/GPUCoalescer.hh
+++ b/src/mem/ruby/system/GPUCoalescer.hh
@@ -38,6 +38,7 @@
#include <unordered_map>
#include "base/statistics.hh"
+#include "cpu/testers/gpu_ruby_test/ProtocolTester.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/misc.hh"
#include "mem/request.hh"
@@ -127,6 +128,7 @@
public:
PendingWriteInst()
: numPendingStores(0),
+ numRequiredWriteCompleteAcks(0),
originalPort(nullptr),
gpuDynInstPtr(nullptr)
{}
@@ -135,14 +137,35 @@
{}
void
- addPendingReq(RubyPort::MemSlavePort* port, GPUDynInstPtr inst,
+ addPendingReq(RubyPort::MemSlavePort* port, PacketPtr pkt,
bool usingRubyTester)
{
assert(port);
originalPort = port;
- if (!usingRubyTester) {
- gpuDynInstPtr = inst;
+ RubyPort::SenderState* ss =
+ safe_cast<RubyPort::SenderState*>(pkt->senderState);
+
+ if (usingRubyTester) {
+ // If this coalescer is connected to a tester thread, we need
+ // to save the corresponding requesting thread.
+ // get the requesting thread from the original sender state
+ ProtocolTester::SenderState* senderState =
+ safe_cast<ProtocolTester::SenderState*>
+ (ss->predecessor);
+ testerThreadPtr = senderState->th;
+ numRequiredWriteCompleteAcks++;
+ } else {
+ // If this coalescer is connected to a real CU, we need
+ // to save the corresponding gpu dynamic instruction.
+ // CU will use that instruction to decrement wait counters
+ // in the issuing wavefront.
+ // For Ruby tester, gpuDynInst == nullptr
+ ComputeUnit::DataPort::SenderState* cu_state =
+ safe_cast<ComputeUnit::DataPort::SenderState*>
+ (ss->predecessor);
+ gpuDynInstPtr = cu_state->_gpuDynInst;
+ numRequiredWriteCompleteAcks = 1;
}
numPendingStores++;
@@ -162,21 +185,29 @@
ackWriteCompletion(bool usingRubyTester)
{
assert(numPendingStores == 0);
+ assert(numRequiredWriteCompleteAcks > 0);
- // make a response packet
- PacketPtr pkt = new Packet(std::make_shared<Request>(),
- MemCmd::WriteCompleteResp);
+ for (int i = 0; i < numRequiredWriteCompleteAcks; ++i) {
+ // make a response packet
+ PacketPtr pkt = new Packet(std::make_shared<Request>(),
+ MemCmd::WriteCompleteResp);
- if (!usingRubyTester) {
- assert(gpuDynInstPtr);
- ComputeUnit::DataPort::SenderState* ss =
- new ComputeUnit::DataPort::SenderState
- (gpuDynInstPtr, 0, nullptr);
- pkt->senderState = ss;
+ if (usingRubyTester) {
+ assert(testerThreadPtr);
+ ProtocolTester::SenderState* ss =
+ new ProtocolTester::SenderState(testerThreadPtr);
+ pkt->senderState = ss;
+ } else {
+ assert(gpuDynInstPtr);
+ ComputeUnit::DataPort::SenderState* ss =
+ new ComputeUnit::DataPort::SenderState
+ (gpuDynInstPtr, 0,
nullptr);
+ pkt->senderState = ss;
+ }
+
+ // send the ack response to the requester
+ originalPort->sendTimingResp(pkt);
}
-
- // send the ack response to the requester
- originalPort->sendTimingResp(pkt);
}
int
@@ -187,6 +218,11 @@
private:
// the number of stores waiting for writeCompleteCallback
int numPendingStores;
+ // number of write-complete acks required by the requestor
+ // if the requestor is a CU, this number is always 1
+ // if the requestor is a tester thread, this number is equal to the
number
+ // of store requests sent to the coalescer
+ int numRequiredWriteCompleteAcks;
// The original port that sent one of packets associated with this
// write instruction. We may have more than one packet per instruction,
// which implies multiple ports per instruction. However, we need
@@ -196,6 +232,8 @@
// similar to the originalPort, this gpuDynInstPtr is set only for
// the first packet of this instruction.
GPUDynInstPtr gpuDynInstPtr;
+ // if protocol tester is used, this points to the requesting tester
thread
+ Thread* testerThreadPtr;
};
class GPUCoalescer : public RubyPort
diff --git a/src/mem/ruby/system/VIPERCoalescer.cc
b/src/mem/ruby/system/VIPERCoalescer.cc
index eafce6d..a21cf33 100644
--- a/src/mem/ruby/system/VIPERCoalescer.cc
+++ b/src/mem/ruby/system/VIPERCoalescer.cc
@@ -180,9 +180,9 @@
dataBlock, crequest->getSeqNum());
}
- if (pkt->cmd == MemCmd::WriteReq) {
- makeWriteCompletePkts(crequest);
- }
+// if (pkt->cmd == MemCmd::WriteReq) {
+// makeWriteCompletePkts(crequest);
+// }
DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
curTick(), m_version, "Coal", "Begin", "", "",
diff --git a/src/mem/ruby/system/VIPERCoalescer.hh
b/src/mem/ruby/system/VIPERCoalescer.hh
index 2f68c10..c83bda0 100644
--- a/src/mem/ruby/system/VIPERCoalescer.hh
+++ b/src/mem/ruby/system/VIPERCoalescer.hh
@@ -57,7 +57,6 @@
typedef VIPERCoalescerParams Params;
VIPERCoalescer(const Params *);
~VIPERCoalescer();
- void writeCompleteCallback(Addr address, uint64_t instSeqNum);
void invTCPCallback(Addr address);
RequestStatus makeRequest(PacketPtr pkt) override;
void issueRequest(CoalescedRequest* crequest) override;
@@ -65,23 +64,11 @@
private:
void invTCP();
- // make write-complete response packets from original write request
packets
- void makeWriteCompletePkts(CoalescedRequest* crequest);
-
// current cache invalidation packet
// nullptr if there is no active cache invalidation request
PacketPtr m_cache_inv_pkt;
// number of remaining cache lines to be invalidated in TCP
int m_num_pending_invs;
-
- // a map of instruction sequence number and corresponding pending
- // write-complete response packets. Each write-complete response
- // corresponds to a pending store request that is waiting for
- // writeCompleteCallback. We may have multiple pending store requests
per
- // wavefront at a time. Each time writeCompleteCallback is called, an
entry
- // with a corresponding seqNum is popped off from map and returned to
- // compute unit.
- std::unordered_map<uint64_t, std::vector<PacketPtr>>
m_writeCompletePktMap;
};
#endif //__MEM_RUBY_SYSTEM_VIPERCOALESCER_HH__
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/32858
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I6d54795ac60d7aa020bc94b853dc3673f59762ba
Gerrit-Change-Number: 32858
Gerrit-PatchSet: 1
Gerrit-Owner: Bradford Beckmann <[email protected]>
Gerrit-Reviewer: Tuan Ta <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s