Austin Harris has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/52303 )
Change subject: mem: implement x86 locked accesses in timing-mode classic
cache
......................................................................
mem: implement x86 locked accesses in timing-mode classic cache
Add LockedRMW(Read|Write)(Req|Resp) commands. In timing mode,
use a combination of clearing permission bits and leaving
an MSHR in place to prevent accesses & snoops from touching
a locked block between the read and write parts of an locked
RMW sequence.
Based on an old-patch by Steve Reinhardt:
http://reviews.gem5.org/r/2691/index.html
Change-Id: Ieadda4deb17667ca4a6282f87f6da2af3b011f66
---
M src/mem/cache/noncoherent_cache.cc
M src/mem/cache/noncoherent_cache.hh
M src/mem/cache/mshr.cc
M src/mem/cache/cache.cc
M src/mem/cache/cache.hh
M src/mem/cache/base.cc
M src/mem/cache/base.hh
M src/mem/packet.cc
M src/mem/packet.hh
M src/mem/cache/queue_entry.hh
10 files changed, 248 insertions(+), 66 deletions(-)
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index fa9257c..3e1b53d 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -223,6 +223,59 @@
void
BaseCache::handleTimingReqHit(PacketPtr pkt, CacheBlk *blk, Tick
request_time)
{
+
+ // handle special cases for LockedRMW transactions
+ if (pkt->isLockedRMW()) {
+ Addr blk_addr = pkt->getBlockAddr(blkSize);
+
+ if (pkt->isRead()) {
+ // Read hit for LockedRMW. Since it requires exclusive
+ // permissions, there should be no outstanding access.
+ assert(!mshrQueue.findMatch(blk_addr, pkt->isSecure()));
+ // The keys to LockedRMW are that (1) we always have an MSHR
+ // allocated during the RMW interval to catch snoops and
+ // defer them until after the RMW completes, and (2) we
+ // clear permissions on the block to turn any upstream
+ // access other than the matching write into a miss, causing
+ // it to append to the MSHR as well.
+
+ // Because we hit in the cache, we have to fake an MSHR to
+ // achieve part (1). If the read had missed, this MSHR
+ // would get allocated as part of normal miss processing.
+ // Basically we need to get the MSHR in the same state as if
+ // we had missed and just received the response.
+ // Request *req2 = new Request(*(pkt->req));
+ RequestPtr req2 = std::make_shared<Request>(*(pkt->req));
+ PacketPtr pkt2 = new Packet(req2, pkt->cmd);
+ MSHR *mshr = allocateMissBuffer(pkt2, curTick(), true);
+ // Mark the MSHR "in service" (even though it's not) to prevent
+ // the cache from sending out a request.
+ mshrQueue.markInService(mshr, false);
+ // Part (2): mark block inaccessible
+ assert(blk);
+ blk->clearCoherenceBits(CacheBlk::ReadableBit);
+ blk->clearCoherenceBits(CacheBlk::WritableBit);
+ } else {
+ assert(pkt->isWrite());
+ // All LockedRMW writes come here, as they cannot miss.
+ // Need to undo the two things described above. Block
+ // permissions were already restored earlier in this
+ // function, prior to the access() call. Now we just need
+ // to clear out the MSHR.
+
+ // Read should have already allocated MSHR.
+ MSHR *mshr = mshrQueue.findMatch(blk_addr, pkt->isSecure());
+ assert(mshr);
+ // Fake up a packet and "respond" to the still-pending
+ // LockedRMWRead, to process any pending targets and clear
+ // out the MSHR
+ PacketPtr resp_pkt =
+ new Packet(pkt->req, MemCmd::LockedRMWWriteResp);
+ resp_pkt->senderState = mshr;
+ recvTimingResp(resp_pkt);
+ }
+ }
+
if (pkt->needsResponse()) {
// These delays should have been consumed by now
assert(pkt->headerDelay == 0);
@@ -353,6 +406,20 @@
// the delay provided by the crossbar
Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
+ if (pkt->cmd == MemCmd::LockedRMWWriteReq) {
+ // For LockedRMW accesses, we mark the block inaccessible after the
+ // read (see below), to make sure no one gets in before the write.
+ // Now that the write is here, mark it accessible again, so the
+ // write will succeed. LockedRMWReadReq brings the block in in
+ // exclusive mode, so we know it was previously writable.
+ CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure());
+ assert(blk && blk->isValid());
+ assert(!blk->isSet(CacheBlk::WritableBit) &&
+ !blk->isSet(CacheBlk::ReadableBit));
+ blk->setCoherenceBits(CacheBlk::ReadableBit);
+ blk->setCoherenceBits(CacheBlk::WritableBit);
+ }
+
Cycles lat;
CacheBlk *blk = nullptr;
bool satisfied = false;
@@ -438,7 +505,7 @@
// if this is a write, we should be looking at an uncacheable
// write
- if (pkt->isWrite()) {
+ if (pkt->isWrite() && pkt->cmd != MemCmd::LockedRMWWriteResp) {
assert(pkt->req->isUncacheable());
handleUncacheableWriteResp(pkt);
return;
@@ -491,59 +558,67 @@
ppFill->notify(pkt);
}
- if (blk && blk->isValid() && pkt->isClean() && !pkt->isInvalidate()) {
- // The block was marked not readable while there was a pending
- // cache maintenance operation, restore its flag.
- blk->setCoherenceBits(CacheBlk::ReadableBit);
+ MemCmd cmd = mshr->getTarget()->pkt->cmd;
+ // Don't want to promote the Locked RMW Read until
+ // the locked write comes in
+ if (cmd != MemCmd::LockedRMWReadReq) {
+ if (blk && blk->isValid() && pkt->isClean()
&& !pkt->isInvalidate()) {
+ // The block was marked not readable while there was a pending
+ // cache maintenance operation, restore its flag.
+ blk->setCoherenceBits(CacheBlk::ReadableBit);
- // This was a cache clean operation (without invalidate)
- // and we have a copy of the block already. Since there
- // is no invalidation, we can promote targets that don't
- // require a writable copy
- mshr->promoteReadable();
- }
-
- if (blk && blk->isSet(CacheBlk::WritableBit) &&
- !pkt->req->isCacheInvalidate()) {
- // If at this point the referenced block is writable and the
- // response is not a cache invalidate, we promote targets that
- // were deferred as we couldn't guarrantee a writable copy
- mshr->promoteWritable();
- }
-
- serviceMSHRTargets(mshr, pkt, blk);
-
- if (mshr->promoteDeferredTargets()) {
- // avoid later read getting stale data while write miss is
- // outstanding.. see comment in timingAccess()
- if (blk) {
- blk->clearCoherenceBits(CacheBlk::ReadableBit);
- }
- mshrQueue.markPending(mshr);
- schedMemSideSendEvent(clockEdge() + pkt->payloadDelay);
- } else {
- // while we deallocate an mshr from the queue we still have to
- // check the isFull condition before and after as we might
- // have been using the reserved entries already
- const bool was_full = mshrQueue.isFull();
- mshrQueue.deallocate(mshr);
- if (was_full && !mshrQueue.isFull()) {
- clearBlocked(Blocked_NoMSHRs);
+ // This was a cache clean operation (without invalidate)
+ // and we have a copy of the block already. Since there
+ // is no invalidation, we can promote targets that don't
+ // require a writable copy
+ mshr->promoteReadable();
}
- // Request the bus for a prefetch if this deallocation freed enough
- // MSHRs for a prefetch to take place
- if (prefetcher && mshrQueue.canPrefetch() && !isBlocked()) {
- Tick next_pf_time =
std::max(prefetcher->nextPrefetchReadyTime(),
- clockEdge());
- if (next_pf_time != MaxTick)
- schedMemSideSendEvent(next_pf_time);
+ if (blk && blk->isSet(CacheBlk::WritableBit) &&
+ !pkt->req->isCacheInvalidate()) {
+ // If at this point the referenced block is writable and the
+ // response is not a cache invalidate, we promote targets that
+ // were deferred as we couldn't guarrantee a writable copy
+ mshr->promoteWritable();
}
}
- // if we used temp block, check to see if its valid and then clear it
out
- if (blk == tempBlock && tempBlock->isValid()) {
- evictBlock(blk, writebacks);
+ bool early_exit = serviceMSHRTargets(mshr, pkt, blk);
+ // We are stopping servicing targets early for the Locked RMW Read
until
+ // the write comes.
+ if (!early_exit) {
+ if (mshr->promoteDeferredTargets()) {
+ // avoid later read getting stale data while write miss is
+ // outstanding.. see comment in timingAccess()
+ if (blk) {
+ blk->clearCoherenceBits(CacheBlk::ReadableBit);
+ }
+ mshrQueue.markPending(mshr);
+ schedMemSideSendEvent(clockEdge() + pkt->payloadDelay);
+ } else {
+ // while we deallocate an mshr from the queue we still have to
+ // check the isFull condition before and after as we might
+ // have been using the reserved entries already
+ const bool was_full = mshrQueue.isFull();
+ mshrQueue.deallocate(mshr);
+ if (was_full && !mshrQueue.isFull()) {
+ clearBlocked(Blocked_NoMSHRs);
+ }
+
+ // Request the bus for a prefetch if this deallocation freed
enough
+ // MSHRs for a prefetch to take place
+ if (prefetcher && mshrQueue.canPrefetch() && !isBlocked()) {
+ Tick next_pf_time = std::max(
+ prefetcher->nextPrefetchReadyTime(), clockEdge());
+ if (next_pf_time != MaxTick)
+ schedMemSideSendEvent(next_pf_time);
+ }
+ }
+
+ // if we used temp block, check to see if its valid and then clear
it
+ if (blk == tempBlock && tempBlock->isValid()) {
+ evictBlock(blk, writebacks);
+ }
}
const Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index 988a678..5625472 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -552,7 +552,7 @@
* @param pkt The response packet
* @param blk The reference block
*/
- virtual void serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
+ virtual bool serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
CacheBlk *blk) = 0;
/**
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index 3c24343..7618ba5 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -324,6 +324,9 @@
Cache::handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk, Tick forward_time,
Tick request_time)
{
+
+ // These should always hit due to the earlier Locked Read
+ assert(pkt->cmd != MemCmd::LockedRMWWriteReq);
if (pkt->req->isUncacheable()) {
// ignore any existing MSHR if we are dealing with an
// uncacheable request
@@ -680,7 +683,7 @@
/////////////////////////////////////////////////////
-void
+bool
Cache::serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt, CacheBlk *blk)
{
QueueEntry::Target *initial_tgt = mshr->getTarget();
@@ -693,8 +696,29 @@
bool is_invalidate = pkt->isInvalidate() &&
!mshr->wasWholeLineWrite;
+ if (pkt->cmd == MemCmd::LockedRMWWriteResp) {
+ // This is the fake response generated by the write half of the
RMW;
+ // see comments in recvTimingReq(). The first target on the list
+ // should be the LockedRMWReadReq which has already been satisfied,
+ // either because it was a hit (and the MSHR was allocated in
+ // recvTimingReq()) or because it was left there after the inital
+ // response (using the 'early_exit' flag below). In either case,
we
+ // don't need to respond now, so pop it off to prevent the loop
+ // below from generating another response.
+ assert(initial_tgt->pkt->cmd == MemCmd::LockedRMWReadReq);
+ mshr->popTarget();
+ delete initial_tgt->pkt;
+ initial_tgt = nullptr;
+ }
+
+ // Early exit flag for LockedRMWRead
+ bool early_exit = false;
+
MSHR::TargetList targets = mshr->extractServiceableTargets(pkt);
for (auto &target: targets) {
+ if (early_exit) {
+ break;
+ }
Packet *tgt_pkt = target.pkt;
switch (target.source) {
case MSHR::Target::FromCPU:
@@ -781,6 +805,23 @@
stats.cmdStats(tgt_pkt)
.missLatency[tgt_pkt->req->requestorId()] +=
completion_time - target.recvTime;
+
+ if (tgt_pkt->cmd == MemCmd::LockedRMWReadReq) {
+ // We're going to leave a target in the MSHR until the
+ // write half of the RMW occurs (see comments above in
+ // recvTimingReq()). Since we'll be using the current
+ // request packet (which has the allocated data
pointer)
+ // to form the response, we have to allocate a new
dummy
+ // packet to save in the MSHR target.
+ RequestPtr r =
std::make_shared<Request>(*(tgt_pkt->req));
+ target.pkt = new Packet(r, MemCmd::LockedRMWReadReq);
+ // skip the rest of target processing after we
+ // send the response
+ early_exit = true;
+ // Mark block inaccessible until write arrives
+ blk->clearCoherenceBits(CacheBlk::WritableBit);
+ blk->clearCoherenceBits(CacheBlk::ReadableBit);
+ }
} else if (pkt->cmd == MemCmd::UpgradeFailResp) {
// failed StoreCond upgrade
assert(tgt_pkt->cmd == MemCmd::StoreCondReq ||
@@ -792,6 +833,11 @@
completion_time += clockEdge(responseLatency) +
pkt->payloadDelay;
tgt_pkt->req->setExtraData(0);
+ } else if (pkt->cmd == MemCmd::LockedRMWWriteResp) {
+ // Fake response on LockedRMW completion, see above.
+ // Since the data is already in the cache, we just use
+ // responseLatency with no extra penalties.
+ completion_time = clockEdge(responseLatency);
} else {
if (is_invalidate && blk && blk->isValid()) {
// We are about to send a response to a cache above
@@ -882,18 +928,22 @@
}
}
- maintainClusivity(targets.hasFromCache, blk);
+ if (!early_exit) {
+ maintainClusivity(targets.hasFromCache, blk);
- if (blk && blk->isValid()) {
- // an invalidate response stemming from a write line request
- // should not invalidate the block, so check if the
- // invalidation should be discarded
- if (is_invalidate || mshr->hasPostInvalidate()) {
- invalidateBlock(blk);
- } else if (mshr->hasPostDowngrade()) {
- blk->clearCoherenceBits(CacheBlk::WritableBit);
+ if (blk && blk->isValid()) {
+ // an invalidate response stemming from a write line request
+ // should not invalidate the block, so check if the
+ // invalidation should be discarded
+ if (is_invalidate || mshr->hasPostInvalidate()) {
+ invalidateBlock(blk);
+ } else if (mshr->hasPostDowngrade()) {
+ blk->clearCoherenceBits(CacheBlk::WritableBit);
+ }
}
}
+
+ return early_exit;
}
PacketPtr
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index dfc583b..6c72ce3 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -101,7 +101,7 @@
void doWritebacksAtomic(PacketList& writebacks) override;
- void serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
+ bool serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
CacheBlk *blk) override;
void recvTimingSnoopReq(PacketPtr pkt) override;
diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc
index 6aaaf9e..eff74b0 100644
--- a/src/mem/cache/mshr.cc
+++ b/src/mem/cache/mshr.cc
@@ -137,7 +137,7 @@
const Request::FlagsType no_merge_flags =
Request::UNCACHEABLE | Request::STRICT_ORDER |
Request::PRIVILEGED | Request::LLSC | Request::MEM_SWAP |
- Request::MEM_SWAP_COND | Request::SECURE;
+ Request::MEM_SWAP_COND | Request::SECURE | Request::LOCKED_RMW;
const auto &req_flags = pkt->req->getFlags();
bool compat_write = !req_flags.isSet(no_merge_flags);
@@ -558,7 +558,11 @@
assert((it->source == Target::FromCPU) ||
(it->source == Target::FromPrefetcher));
ready_targets.push_back(*it);
- it = targets.erase(it);
+ // Leave the Locked RMW Read until the corresponding Locked Write
+ // request comes in
+ if (it->pkt->cmd != MemCmd::LockedRMWReadReq) {
+ it = targets.erase(it);
+ }
while (it != targets.end()) {
if (it->source == Target::FromCPU) {
it++;
@@ -570,7 +574,18 @@
}
ready_targets.populateFlags();
} else {
- std::swap(ready_targets, targets);
+ auto it = targets.begin();
+ while (it != targets.end()) {
+ ready_targets.push_back(*it);
+ if (it->pkt->cmd == MemCmd::LockedRMWReadReq){
+ // Leave the Locked RMW Read until the corresponding Locked
+ // Write comes in. Also don't service any later targets as
the
+ // line is now "locked".
+ break;
+ }
+ it = targets.erase(it);
+ }
+ ready_targets.populateFlags();
}
targets.populateFlags();
diff --git a/src/mem/cache/noncoherent_cache.cc
b/src/mem/cache/noncoherent_cache.cc
index 314025f..dc409de 100644
--- a/src/mem/cache/noncoherent_cache.cc
+++ b/src/mem/cache/noncoherent_cache.cc
@@ -238,7 +238,7 @@
BaseCache::functionalAccess(pkt, from_cpu_side);
}
-void
+bool
NoncoherentCache::serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
CacheBlk *blk)
{
@@ -311,6 +311,7 @@
// there should be no deferred targets and all the non-deferred
// targets are now serviced.
assert(mshr->getNumTargets() == 0);
+ return false;
}
void
diff --git a/src/mem/cache/noncoherent_cache.hh
b/src/mem/cache/noncoherent_cache.hh
index f01b08c..82c4fd8 100644
--- a/src/mem/cache/noncoherent_cache.hh
+++ b/src/mem/cache/noncoherent_cache.hh
@@ -82,7 +82,7 @@
void doWritebacksAtomic(PacketList& writebacks) override;
- void serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
+ bool serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
CacheBlk *blk) override;
void recvTimingResp(PacketPtr pkt) override;
diff --git a/src/mem/cache/queue_entry.hh b/src/mem/cache/queue_entry.hh
index d891365..55c1928 100644
--- a/src/mem/cache/queue_entry.hh
+++ b/src/mem/cache/queue_entry.hh
@@ -90,7 +90,7 @@
const Tick recvTime; //!< Time when request was received (for
stats)
const Tick readyTime; //!< Time when request is ready to be
serviced
const Counter order; //!< Global order (for memory consistency
mgmt)
- const PacketPtr pkt; //!< Pending request packet.
+ PacketPtr pkt; //!< Pending request packet.
/**
* Default constructor. Assigns the current tick as the arrival
time
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 219bc76..5b23f13 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -164,6 +164,18 @@
/* StoreCondResp */
{ {IsWrite, IsLlsc, IsResponse},
InvalidCmd, "StoreCondResp" },
+ /* LockedRMWReadReq */
+ { {IsRead, IsLockedRMW, NeedsWritable, IsRequest, NeedsResponse},
+ LockedRMWReadResp, "LockedRMWReadReq" },
+ /* LockedRMWReadResp */
+ { {IsRead, IsLockedRMW, NeedsWritable, IsResponse, HasData},
+ InvalidCmd, "LockedRMWReadResp" },
+ /* LockedRMWWriteReq */
+ { {IsWrite, IsLockedRMW, NeedsWritable, IsRequest, NeedsResponse,
+ HasData}, LockedRMWWriteResp, "LockedRMWWriteReq" },
+ /* LockedRMWWriteResp */
+ { {IsWrite, IsLockedRMW, NeedsWritable, IsResponse},
+ InvalidCmd, "LockedRMWWriteResp" },
/* SwapReq -- for Swap ldstub type operations */
{ {IsRead, IsWrite, NeedsWritable, IsRequest, HasData, NeedsResponse},
SwapResp, "SwapReq" },
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 88995f1..d42ea27 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -112,6 +112,10 @@
StoreCondReq,
StoreCondFailReq, // Failed StoreCondReq in MSHR (never sent)
StoreCondResp,
+ LockedRMWReadReq,
+ LockedRMWReadResp,
+ LockedRMWWriteReq,
+ LockedRMWWriteResp,
SwapReq,
SwapResp,
// MessageReq and MessageResp are deprecated.
@@ -162,6 +166,7 @@
IsSWPrefetch,
IsHWPrefetch,
IsLlsc, //!< Alpha/MIPS LL or SC access
+ IsLockedRMW, //!< x86 locked RMW access
HasData, //!< There is an associated payload
IsError, //!< Error response
IsPrint, //!< Print state matching address (for debugging)
@@ -239,6 +244,7 @@
*/
bool hasData() const { return testCmdAttrib(HasData); }
bool isLLSC() const { return testCmdAttrib(IsLlsc); }
+ bool isLockedRMW() const { return testCmdAttrib(IsLockedRMW); }
bool isSWPrefetch() const { return testCmdAttrib(IsSWPrefetch); }
bool isHWPrefetch() const { return testCmdAttrib(IsHWPrefetch); }
bool isPrefetch() const { return testCmdAttrib(IsSWPrefetch) ||
@@ -607,6 +613,7 @@
return resp_cmd.hasData();
}
bool isLLSC() const { return cmd.isLLSC(); }
+ bool isLockedRMW() const { return cmd.isLockedRMW(); }
bool isError() const { return cmd.isError(); }
bool isPrint() const { return cmd.isPrint(); }
bool isFlush() const { return cmd.isFlush(); }
@@ -976,6 +983,8 @@
return MemCmd::SoftPFExReq;
else if (req->isPrefetch())
return MemCmd::SoftPFReq;
+ else if (req->isLockedRMW())
+ return MemCmd::LockedRMWReadReq;
else
return MemCmd::ReadReq;
}
@@ -995,6 +1004,8 @@
MemCmd::InvalidateReq;
} else if (req->isCacheClean()) {
return MemCmd::CleanSharedReq;
+ } else if (req->isLockedRMW()) {
+ return MemCmd::LockedRMWWriteReq;
} else
return MemCmd::WriteReq;
}
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/52303
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ieadda4deb17667ca4a6282f87f6da2af3b011f66
Gerrit-Change-Number: 52303
Gerrit-PatchSet: 1
Gerrit-Owner: Austin Harris <m...@austin-harris.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s