changeset 80492ae5148e in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=80492ae5148e
description:
O3: Fix up pipelining icache accesses in fetch stage to function
properly
Fixed up the patch from Yasuko Watanabe that enabled pipelining of
fetch accessess to
icache to work with recent changes to main repository.
Also added in ability for fetch stage to delay issuing the fault
carrying
nop when a pipeline fetch causes a fault and no fetch bandwidth is
available
until the next cycle.
diffstat:
src/cpu/o3/fetch.hh | 61 +++++++++++++++
src/cpu/o3/fetch_impl.hh | 180 ++++++++++++++++++++++++++++++++++++++--------
2 files changed, 209 insertions(+), 32 deletions(-)
diffs (truncated from 368 to 300 lines):
diff -r 7d0669201f80 -r 80492ae5148e src/cpu/o3/fetch.hh
--- a/src/cpu/o3/fetch.hh Sun Jul 10 12:56:08 2011 -0500
+++ b/src/cpu/o3/fetch.hh Sun Jul 10 12:56:08 2011 -0500
@@ -150,6 +150,45 @@
}
};
+ private:
+ /* Event to delay delivery of a fetch translation result in case of
+ * a fault and the nop to carry the fault cannot be generated
+ * immediately */
+ class FinishTranslationEvent : public Event
+ {
+ private:
+ DefaultFetch<Impl> *fetch;
+ Fault fault;
+ RequestPtr req;
+
+ public:
+ FinishTranslationEvent(DefaultFetch<Impl> *_fetch)
+ : fetch(_fetch)
+ {}
+
+ void setFault(Fault _fault)
+ {
+ fault = _fault;
+ }
+
+ void setReq(RequestPtr _req)
+ {
+ req = _req;
+ }
+
+ /** Process the delayed finish translation */
+ void process()
+ {
+ assert(fetch->numInst < fetch->fetchWidth);
+ fetch->finishTranslation(fault, req);
+ }
+
+ const char *description() const
+ {
+ return "FullO3CPU FetchFinishTranslation";
+ }
+ };
+
public:
/** Overall fetch status. Used to determine if the CPU can
* deschedule itsef due to a lack of activity.
@@ -363,6 +402,12 @@
* policy. */
ThreadID branchCount();
+ /** Pipeline the next I-cache access to the current one. */
+ void pipelineIcacheAccesses(ThreadID tid);
+
+ /** Profile the reasons of fetch stall. */
+ void profileStall(ThreadID tid);
+
private:
/** Pointer to the O3CPU. */
O3CPU *cpu;
@@ -497,6 +542,12 @@
/** Records if fetch is switched out. */
bool switchedOut;
+ /** Set to true if a pipelined I-cache request should be issued. */
+ bool issuePipelinedIfetch[Impl::MaxThreads];
+
+ /** Event used to delay fault generation of translation faults */
+ FinishTranslationEvent finishTranslationEvent;
+
// @todo: Consider making these vectors and tracking on a per thread basis.
/** Stat for total number of cycles stalled due to an icache miss. */
Stats::Scalar icacheStallCycles;
@@ -520,6 +571,16 @@
Stats::Scalar fetchBlockedCycles;
/** Total number of cycles spent in any other state. */
Stats::Scalar fetchMiscStallCycles;
+ /** Total number of cycles spent in waiting for drains. */
+ Stats::Scalar fetchPendingDrainCycles;
+ /** Total number of stall cycles caused by no active threads to run. */
+ Stats::Scalar fetchNoActiveThreadStallCycles;
+ /** Total number of stall cycles caused by pending traps. */
+ Stats::Scalar fetchPendingTrapStallCycles;
+ /** Total number of stall cycles caused by pending quiesce instructions. */
+ Stats::Scalar fetchPendingQuiesceStallCycles;
+ /** Total number of stall cycles caused by I-cache wait retrys. */
+ Stats::Scalar fetchIcacheWaitRetryStallCycles;
/** Stat for total number of fetched cache lines. */
Stats::Scalar fetchedCacheLines;
/** Total number of outstanding icache accesses that were dropped
diff -r 7d0669201f80 -r 80492ae5148e src/cpu/o3/fetch_impl.hh
--- a/src/cpu/o3/fetch_impl.hh Sun Jul 10 12:56:08 2011 -0500
+++ b/src/cpu/o3/fetch_impl.hh Sun Jul 10 12:56:08 2011 -0500
@@ -49,6 +49,7 @@
#include "base/types.hh"
#include "config/the_isa.hh"
#include "config/use_checker.hh"
+#include "cpu/base.hh"
#include "cpu/checker/cpu.hh"
#include "cpu/o3/fetch.hh"
#include "cpu/exetrace.hh"
@@ -59,6 +60,7 @@
#include "params/DerivO3CPU.hh"
#include "sim/byteswap.hh"
#include "sim/core.hh"
+#include "sim/eventq.hh"
#if FULL_SYSTEM
#include "arch/tlb.hh"
@@ -135,6 +137,7 @@
: cpu(_cpu),
branchPred(params),
predecoder(NULL),
+ numInst(0),
decodeToFetchDelay(params->decodeToFetchDelay),
renameToFetchDelay(params->renameToFetchDelay),
iewToFetchDelay(params->iewToFetchDelay),
@@ -147,7 +150,8 @@
numFetchingThreads(params->smtNumFetchingThreads),
interruptPending(false),
drainPending(false),
- switchedOut(false)
+ switchedOut(false),
+ finishTranslationEvent(this)
{
if (numThreads > Impl::MaxThreads)
fatal("numThreads (%d) is larger than compiled limit (%d),\n"
@@ -268,6 +272,31 @@
"bad addresses, or out of MSHRs")
.prereq(fetchMiscStallCycles);
+ fetchPendingDrainCycles
+ .name(name() + ".PendingDrainCycles")
+ .desc("Number of cycles fetch has spent waiting on pipes to drain")
+ .prereq(fetchPendingDrainCycles);
+
+ fetchNoActiveThreadStallCycles
+ .name(name() + ".NoActiveThreadStallCycles")
+ .desc("Number of stall cycles due to no active thread to fetch from")
+ .prereq(fetchNoActiveThreadStallCycles);
+
+ fetchPendingTrapStallCycles
+ .name(name() + ".PendingTrapStallCycles")
+ .desc("Number of stall cycles due to pending traps")
+ .prereq(fetchPendingTrapStallCycles);
+
+ fetchPendingQuiesceStallCycles
+ .name(name() + ".PendingQuiesceStallCycles")
+ .desc("Number of stall cycles due to pending quiesce instructions")
+ .prereq(fetchPendingQuiesceStallCycles);
+
+ fetchIcacheWaitRetryStallCycles
+ .name(name() + ".IcacheWaitRetryStallCycles")
+ .desc("Number of stall cycles due to full MSHR")
+ .prereq(fetchIcacheWaitRetryStallCycles);
+
fetchIcacheSquashes
.name(name() + ".IcacheSquashes")
.desc("Number of outstanding Icache misses that were squashed")
@@ -675,8 +704,15 @@
fetchStatus[tid] = IcacheWaitResponse;
}
} else {
+ if (!(numInst < fetchWidth)) {
+ assert(!finishTranslationEvent.scheduled());
+ finishTranslationEvent.setFault(fault);
+ finishTranslationEvent.setReq(mem_req);
+ cpu->schedule(finishTranslationEvent, cpu->nextCycle(curTick() +
cpu->ticks(1)));
+ return;
+ }
DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected
%#x\n",
- mem_req->getVaddr(), memReq[tid]->getVaddr());
+ tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
// Translation faulted, icache request won't be sent.
delete mem_req;
memReq[tid] = NULL;
@@ -851,6 +887,10 @@
wroteToTimeBuffer = false;
+ for (ThreadID i = 0; i < Impl::MaxThreads; ++i) {
+ issuePipelinedIfetch[i] = false;
+ }
+
while (threads != end) {
ThreadID tid = *threads++;
@@ -862,10 +902,7 @@
DPRINTF(Fetch, "Running stage.\n");
- // Reset the number of the instruction we're fetching.
- numInst = 0;
-
-#if FULL_SYSTEM
+ #if FULL_SYSTEM
if (fromCommit->commitInfo[0].interruptPending) {
interruptPending = true;
}
@@ -895,6 +932,16 @@
cpu->activityThisCycle();
}
+
+ // Issue the next I-cache request if possible.
+ for (ThreadID i = 0; i < Impl::MaxThreads; ++i) {
+ if (issuePipelinedIfetch[i]) {
+ pipelineIcacheAccesses(i);
+ }
+ }
+
+ // Reset the number of the instruction we've fetched.
+ numInst = 0;
}
template <class Impl>
@@ -1099,10 +1146,13 @@
ThreadID tid = getFetchingThread(fetchPolicy);
if (tid == InvalidThreadID || drainPending) {
- DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
-
// Breaks looping condition in tick()
threadFetched = numFetchingThreads;
+
+ if (numThreads == 1) { // @todo Per-thread stats
+ profileStall(0);
+ }
+
return;
}
@@ -1157,32 +1207,9 @@
if (fetchStatus[tid] == Idle) {
++fetchIdleCycles;
DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid);
- } else if (fetchStatus[tid] == Blocked) {
- ++fetchBlockedCycles;
- DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid);
- } else if (fetchStatus[tid] == Squashing) {
- ++fetchSquashCycles;
- DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid);
- } else if (fetchStatus[tid] == IcacheWaitResponse) {
- ++icacheStallCycles;
- DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n",
- tid);
- } else if (fetchStatus[tid] == ItlbWait) {
- DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to "
- "finish! \n", tid);
- ++fetchTlbCycles;
- } else if (fetchStatus[tid] == TrapPending) {
- DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap\n",
- tid);
- } else if (fetchStatus[tid] == NoGoodAddr) {
- DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable
address\n",
- tid);
}
-
-
- // Status is Idle, Squashing, Blocked, ItlbWait or IcacheWaitResponse
- // so fetch should do nothing.
+ // Status is Idle, so fetch should do nothing.
return;
}
@@ -1329,6 +1356,17 @@
}
pc[tid] = thisPC;
+
+ // pipeline a fetch if we're crossing a cache boundary and not in
+ // a state that would preclude fetching
+ fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
+ Addr block_PC = icacheBlockAlignPC(fetchAddr);
+ issuePipelinedIfetch[tid] = block_PC != cacheDataPC[tid] &&
+ fetchStatus[tid] != IcacheWaitResponse &&
+ fetchStatus[tid] != ItlbWait &&
+ fetchStatus[tid] != IcacheWaitRetry &&
+ fetchStatus[tid] != QuiescePending &&
+ !curMacroop;
}
template<class Impl>
@@ -1511,3 +1549,81 @@
panic("Branch Count Fetch policy unimplemented\n");
return InvalidThreadID;
}
+
+template<class Impl>
+void
+DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid)
+{
+ if (!issuePipelinedIfetch[tid]) {
+ return;
+ }
+
+ // The next PC to access.
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev