changeset 308771bd2647 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=308771bd2647
description:
mem, cpu: Add a separate flag for strictly ordered memory
The Request::UNCACHEABLE flag currently has two different
functions. The first, and obvious, function is to prevent the memory
system from caching data in the request. The second function is to
prevent reordering and speculation in CPU models.
This changeset gives the order/speculation requirement a separate flag
(Request::STRICT_ORDER). This flag prevents CPU models from doing the
following optimizations:
* Speculation: CPU models are not allowed to issue speculative
loads.
* Write combining: CPU models and caches are not allowed to merge
writes to the same cache line.
Note: The memory system may still reorder accesses unless the
UNCACHEABLE flag is set. It is therefore expected that the
STRICT_ORDER flag is combined with the UNCACHEABLE flag to prevent
this behavior.
diffstat:
src/arch/alpha/tlb.cc | 2 +-
src/arch/arm/tlb.cc | 14 +++++++-------
src/arch/mips/tlb.cc | 2 +-
src/arch/power/tlb.cc | 2 +-
src/arch/sparc/tlb.cc | 14 +++++++++-----
src/arch/x86/tlb.cc | 8 ++++----
src/cpu/base_dyn_inst.hh | 8 ++++----
src/cpu/minor/lsq.cc | 2 +-
src/cpu/o3/comm.hh | 10 ++++++----
src/cpu/o3/commit_impl.hh | 10 +++++-----
src/cpu/o3/iew_impl.hh | 11 ++++++-----
src/cpu/o3/lsq_unit.hh | 16 ++++++++--------
src/cpu/o3/lsq_unit_impl.hh | 12 ++++++------
src/cpu/translation.hh | 10 +++++-----
src/mem/request.hh | 21 +++++++++++++++++++--
15 files changed, 83 insertions(+), 59 deletions(-)
diffs (truncated from 431 to 300 lines):
diff -r 64cd1dcd61a5 -r 308771bd2647 src/arch/alpha/tlb.cc
--- a/src/arch/alpha/tlb.cc Tue May 05 03:22:31 2015 -0400
+++ b/src/arch/alpha/tlb.cc Tue May 05 03:22:33 2015 -0400
@@ -225,7 +225,7 @@
"IPR memory space not implemented!");
} else {
// mark request as uncacheable
- req->setFlags(Request::UNCACHEABLE);
+ req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
// Clear bits 42:35 of the physical address (10-2 in
// Tsunami manual)
diff -r 64cd1dcd61a5 -r 308771bd2647 src/arch/arm/tlb.cc
--- a/src/arch/arm/tlb.cc Tue May 05 03:22:31 2015 -0400
+++ b/src/arch/arm/tlb.cc Tue May 05 03:22:33 2015 -0400
@@ -985,13 +985,13 @@
if (flags & Request::CLEAR_LL){
// @todo: check implications of security extensions
req->setPaddr(0);
- req->setFlags(Request::UNCACHEABLE);
+ req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
req->setFlags(Request::CLEAR_LL);
return NoFault;
}
if ((req->isInstFetch() && (!sctlr.i)) ||
((!req->isInstFetch()) && (!sctlr.c))){
- req->setFlags(Request::UNCACHEABLE);
+ req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
}
if (!is_fetch) {
assert(flags & MustBeOne);
@@ -1018,10 +1018,10 @@
// @todo: double check this (ARM ARM issue C B3.2.1)
if (long_desc_format || sctlr.tre == 0) {
- req->setFlags(Request::UNCACHEABLE);
+ req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
} else {
if (nmrr.ir0 == 0 || nmrr.or0 == 0 || prrr.tr0 != 0x2)
- req->setFlags(Request::UNCACHEABLE);
+ req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
}
// Set memory attributes
@@ -1074,9 +1074,9 @@
te->shareable, te->innerAttrs, te->outerAttrs,
static_cast<uint8_t>(te->mtype), isStage2);
setAttr(te->attributes);
- if (te->nonCacheable) {
- req->setFlags(Request::UNCACHEABLE);
- }
+
+ if (te->nonCacheable)
+ req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
Addr pa = te->pAddr(vaddr);
req->setPaddr(pa);
diff -r 64cd1dcd61a5 -r 308771bd2647 src/arch/mips/tlb.cc
--- a/src/arch/mips/tlb.cc Tue May 05 03:22:31 2015 -0400
+++ b/src/arch/mips/tlb.cc Tue May 05 03:22:33 2015 -0400
@@ -148,7 +148,7 @@
// address or by the TLB entry
if ((req->getVaddr() & VAddrUncacheable) == VAddrUncacheable) {
// mark request as uncacheable
- req->setFlags(Request::UNCACHEABLE);
+ req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
}
return NoFault;
}
diff -r 64cd1dcd61a5 -r 308771bd2647 src/arch/power/tlb.cc
--- a/src/arch/power/tlb.cc Tue May 05 03:22:31 2015 -0400
+++ b/src/arch/power/tlb.cc Tue May 05 03:22:33 2015 -0400
@@ -150,7 +150,7 @@
if ((req->getVaddr() & VAddrUncacheable) == VAddrUncacheable) {
// mark request as uncacheable
- req->setFlags(Request::UNCACHEABLE);
+ req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
}
return NoFault;
}
diff -r 64cd1dcd61a5 -r 308771bd2647 src/arch/sparc/tlb.cc
--- a/src/arch/sparc/tlb.cc Tue May 05 03:22:31 2015 -0400
+++ b/src/arch/sparc/tlb.cc Tue May 05 03:22:33 2015 -0400
@@ -571,8 +571,10 @@
ce_va < vaddr + size && ce_va + ce->range.size > vaddr &&
(!write || ce->pte.writable())) {
req->setPaddr(ce->pte.translate(vaddr));
- if (ce->pte.sideffect() || (ce->pte.paddr() >> 39) & 1)
- req->setFlags(Request::UNCACHEABLE);
+ if (ce->pte.sideffect() || (ce->pte.paddr() >> 39) & 1) {
+ req->setFlags(
+ Request::UNCACHEABLE | Request::STRICT_ORDER);
+ }
DPRINTF(TLB, "TLB: %#X -> %#X\n", vaddr, req->getPaddr());
return NoFault;
} // if matched
@@ -584,8 +586,10 @@
ce_va < vaddr + size && ce_va + ce->range.size > vaddr &&
(!write || ce->pte.writable())) {
req->setPaddr(ce->pte.translate(vaddr));
- if (ce->pte.sideffect() || (ce->pte.paddr() >> 39) & 1)
- req->setFlags(Request::UNCACHEABLE);
+ if (ce->pte.sideffect() || (ce->pte.paddr() >> 39) & 1) {
+ req->setFlags(
+ Request::UNCACHEABLE | Request::STRICT_ORDER);
+ }
DPRINTF(TLB, "TLB: %#X -> %#X\n", vaddr, req->getPaddr());
return NoFault;
} // if matched
@@ -748,7 +752,7 @@
}
if (e->pte.sideffect() || (e->pte.paddr() >> 39) & 1)
- req->setFlags(Request::UNCACHEABLE);
+ req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
// cache translation date for next translation
cacheState = tlbdata;
diff -r 64cd1dcd61a5 -r 308771bd2647 src/arch/x86/tlb.cc
--- a/src/arch/x86/tlb.cc Tue May 05 03:22:31 2015 -0400
+++ b/src/arch/x86/tlb.cc Tue May 05 03:22:33 2015 -0400
@@ -206,7 +206,7 @@
req->setFlags(Request::MMAPPED_IPR);
req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
} else if ((IOPort & ~mask(2)) == 0xCFC) {
- req->setFlags(Request::UNCACHEABLE);
+ req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
Addr configAddress =
tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
if (bits(configAddress, 31, 31)) {
@@ -217,7 +217,7 @@
req->setPaddr(PhysAddrPrefixIO | IOPort);
}
} else {
- req->setFlags(Request::UNCACHEABLE);
+ req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
req->setPaddr(PhysAddrPrefixIO | IOPort);
}
return NoFault;
@@ -261,7 +261,7 @@
return new GeneralProtection(0);
*/
// Force the access to be uncacheable.
- req->setFlags(Request::UNCACHEABLE);
+ req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
req->setPaddr(x86LocalAPICAddress(tc->contextId(),
paddr - apicRange.start()));
}
@@ -401,7 +401,7 @@
DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, paddr);
req->setPaddr(paddr);
if (entry->uncacheable)
- req->setFlags(Request::UNCACHEABLE);
+ req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
} else {
//Use the address which already has segmentation applied.
DPRINTF(TLB, "Paging disabled.\n");
diff -r 64cd1dcd61a5 -r 308771bd2647 src/cpu/base_dyn_inst.hh
--- a/src/cpu/base_dyn_inst.hh Tue May 05 03:22:31 2015 -0400
+++ b/src/cpu/base_dyn_inst.hh Tue May 05 03:22:33 2015 -0400
@@ -144,7 +144,7 @@
* @todo: Consider if this is necessary or not.
*/
EACalcDone,
- IsUncacheable,
+ IsStrictlyOrdered,
ReqMade,
MemOpDone,
MaxFlags
@@ -834,8 +834,8 @@
/** Returns whether or not the eff. addr. source registers are ready. */
bool eaSrcsReady();
- /** Is this instruction's memory access uncacheable. */
- bool uncacheable() { return instFlags[IsUncacheable]; }
+ /** Is this instruction's memory access strictly ordered? */
+ bool strictlyOrdered() const { return instFlags[IsStrictlyOrdered]; }
/** Has this instruction generated a memory request. */
bool hasRequest() { return instFlags[ReqMade]; }
@@ -1052,7 +1052,7 @@
{
fault = state->getFault();
- instFlags[IsUncacheable] = state->isUncacheable();
+ instFlags[IsStrictlyOrdered] = state->isStrictlyOrdered();
if (fault == NoFault) {
physEffAddr = state->getPaddr();
diff -r 64cd1dcd61a5 -r 308771bd2647 src/cpu/minor/lsq.cc
--- a/src/cpu/minor/lsq.cc Tue May 05 03:22:31 2015 -0400
+++ b/src/cpu/minor/lsq.cc Tue May 05 03:22:33 2015 -0400
@@ -926,7 +926,7 @@
bool is_load = request->isLoad;
bool is_llsc = request->request.isLLSC();
bool is_swap = request->request.isSwap();
- bool bufferable = !(request->request.isUncacheable() ||
+ bool bufferable = !(request->request.isStrictlyOrdered() ||
is_llsc || is_swap);
if (is_load) {
diff -r 64cd1dcd61a5 -r 308771bd2647 src/cpu/o3/comm.hh
--- a/src/cpu/o3/comm.hh Tue May 05 03:22:31 2015 -0400
+++ b/src/cpu/o3/comm.hh Tue May 05 03:22:33 2015 -0400
@@ -185,8 +185,9 @@
/// Instruction that caused the a non-mispredict squash
DynInstPtr squashInst; // *F
- /// Hack for now to send back an uncached access to the IEW stage.
- DynInstPtr uncachedLoad; // *I
+ /// Hack for now to send back a strictly ordered access to the
+ /// IEW stage.
+ DynInstPtr strictlyOrderedLoad; // *I
/// Communication specifically to the IQ to tell the IQ that it can
/// schedule a non-speculative instruction.
@@ -216,8 +217,9 @@
/// If the interrupt ended up being cleared before being handled
bool clearInterrupt; // *F
- /// Hack for now to send back an uncached access to the IEW stage.
- bool uncached; // *I
+ /// Hack for now to send back an strictly ordered access to
+ /// the IEW stage.
+ bool strictlyOrdered; // *I
};
diff -r 64cd1dcd61a5 -r 308771bd2647 src/cpu/o3/commit_impl.hh
--- a/src/cpu/o3/commit_impl.hh Tue May 05 03:22:31 2015 -0400
+++ b/src/cpu/o3/commit_impl.hh Tue May 05 03:22:33 2015 -0400
@@ -1145,7 +1145,7 @@
// think are possible.
assert(head_inst->isNonSpeculative() || head_inst->isStoreConditional()
|| head_inst->isMemBarrier() || head_inst->isWriteBarrier() ||
- (head_inst->isLoad() && head_inst->uncacheable()));
+ (head_inst->isLoad() && head_inst->strictlyOrdered()));
DPRINTF(Commit, "Encountered a barrier or non-speculative "
"instruction [sn:%lli] at the head of the ROB, PC %s.\n",
@@ -1162,11 +1162,11 @@
// it is executed.
head_inst->clearCanCommit();
- if (head_inst->isLoad() && head_inst->uncacheable()) {
- DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %s.\n",
+ if (head_inst->isLoad() && head_inst->strictlyOrdered()) {
+ DPRINTF(Commit, "[sn:%lli]: Strictly ordered load, PC %s.\n",
head_inst->seqNum, head_inst->pcState());
- toIEW->commitInfo[tid].uncached = true;
- toIEW->commitInfo[tid].uncachedLoad = head_inst;
+ toIEW->commitInfo[tid].strictlyOrdered = true;
+ toIEW->commitInfo[tid].strictlyOrderedLoad = head_inst;
} else {
++commitNonSpecStalls;
}
diff -r 64cd1dcd61a5 -r 308771bd2647 src/cpu/o3/iew_impl.hh
--- a/src/cpu/o3/iew_impl.hh Tue May 05 03:22:31 2015 -0400
+++ b/src/cpu/o3/iew_impl.hh Tue May 05 03:22:33 2015 -0400
@@ -1418,9 +1418,9 @@
// Some instructions will be sent to commit without having
// executed because they need commit to handle them.
- // E.g. Uncached loads have not actually executed when they
+ // E.g. Strictly ordered loads have not actually executed when they
// are first sent to commit. Instead commit must tell the LSQ
- // when it's ready to execute the uncached load.
+ // when it's ready to execute the strictly ordered load.
if (!inst->isSquashed() && inst->isExecuted() && inst->getFault() ==
NoFault) {
int dependents = instQueue.wakeDependents(inst);
@@ -1522,9 +1522,10 @@
if (fromCommit->commitInfo[tid].nonSpecSeqNum != 0) {
//DPRINTF(IEW,"NonspecInst from thread %i",tid);
- if (fromCommit->commitInfo[tid].uncached) {
-
instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad);
- fromCommit->commitInfo[tid].uncachedLoad->setAtCommit();
+ if (fromCommit->commitInfo[tid].strictlyOrdered) {
+ instQueue.replayMemInst(
+ fromCommit->commitInfo[tid].strictlyOrderedLoad);
+ fromCommit->commitInfo[tid].strictlyOrderedLoad->setAtCommit();
} else {
instQueue.scheduleNonSpec(
fromCommit->commitInfo[tid].nonSpecSeqNum);
diff -r 64cd1dcd61a5 -r 308771bd2647 src/cpu/o3/lsq_unit.hh
--- a/src/cpu/o3/lsq_unit.hh Tue May 05 03:22:31 2015 -0400
+++ b/src/cpu/o3/lsq_unit.hh Tue May 05 03:22:33 2015 -0400
@@ -559,15 +559,15 @@
assert(!load_inst->isExecuted());
- // Make sure this isn't an uncacheable access
- // A bit of a hackish way to get uncached accesses to work only if they're
- // at the head of the LSQ and are ready to commit (at the head of the ROB
- // too).
- if (req->isUncacheable() &&
+ // Make sure this isn't a strictly ordered load
+ // A bit of a hackish way to get strictly ordered accesses to work
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev