Hello Gabor Dozsa,
I'd like you to do a code review. Please visit
https://gem5-review.googlesource.com/c/public/gem5/+/19173
to review the following change.
Change subject: cpu: Disable value forwarding for stores with write strobes
......................................................................
cpu: Disable value forwarding for stores with write strobes
Change-Id: I7cb50b80b70fcf43ab23eb9e7333d16328993fe1
Signed-off-by: Gabor Dozsa <[email protected]>
---
M src/cpu/minor/lsq.cc
M src/cpu/o3/lsq_impl.hh
M src/cpu/o3/lsq_unit.hh
M src/cpu/utils.hh
4 files changed, 69 insertions(+), 21 deletions(-)
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index d43e1ef..87df17f 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -142,10 +142,18 @@
LSQ::AddrRangeCoverage
LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request)
{
- return containsAddrRangeOf(request->getPaddr(), request->getSize(),
+ AddrRangeCoverage ret;
+ ret = containsAddrRangeOf(request->getPaddr(), request->getSize(),
other_request->request->getPaddr(),
other_request->request->getSize());
+ /* If there is a strobe mask then store data forwarding might not be
+ * correct. Instead of checking enablemant of every byte we just fall
back
+ * to PartialAddrRangeCoverage to prohibit store data forwarding */
+ if (ret == FullAddrRangeCoverage && !request->getByteEnable().empty())
+ ret = PartialAddrRangeCoverage;
+ return ret;
}
+
bool
LSQ::LSQRequest::isBarrier()
{
@@ -1632,7 +1640,9 @@
addr, size, flags, cpu.dataMasterId(),
/* I've no idea why we need the PC, but give it */
inst->pc.instAddr(), amo_op);
- request->request->setByteEnable(byteEnable);
+ request->request->setByteEnable(
+ isAllActiveElement(byteEnable.cbegin(), byteEnable.cend()) ?
+ std::vector<bool>() : byteEnable);
requests.push(request);
inst->inLSQ = true;
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index f2945e7..bd66d45 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -718,9 +718,13 @@
size, flags, data, res, amo_op);
}
assert(req);
- if (!byteEnable.empty()) {
+ if (!byteEnable.empty() &&
+ isAllActiveElement(byteEnable.cbegin(), byteEnable.cend())) {
+ req->_byteEnable = std::vector<bool>();
+ } else {
req->_byteEnable = byteEnable;
}
+
inst->setRequest();
req->taskId(cpu->taskId());
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index d3f1d07..56ace54 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -209,6 +209,14 @@
};
using LQEntry = LSQEntry;
+ /** Coverage of one address range with another */
+ enum AddrRangeCoverage
+ {
+ PartialAddrRangeCoverage, /* Two ranges partly overlap */
+ FullAddrRangeCoverage, /* One range fully covers another */
+ NoAddrRangeCoverage /* Two ranges are disjoint */
+ };
+
public:
using LoadQueue = CircularQueue<LQEntry>;
using StoreQueue = CircularQueue<SQEntry>;
@@ -705,6 +713,8 @@
bool lower_load_has_store_part = req_s < st_e;
bool upper_load_has_store_part = req_e > st_s;
+ AddrRangeCoverage coverage = NoAddrRangeCoverage;
+
// If the store entry is not atomic (atomic does not have valid
// data), the store has all of the data needed, and
// the load is not LLSC, then
@@ -712,7 +722,36 @@
if (!store_it->instruction()->isAtomic() &&
store_has_lower_limit && store_has_upper_limit &&
!req->mainRequest()->isLLSC()) {
+ // If the store's data has all of the data needed and the
load
+ // isn't LLSC then we can forward. Execept if the store
request
+ // has a byte strobe mask. In the latter case, we fall back
+ // to PartialAddrRangeCoverage to disable forward.
+ const auto& store_req = store_it->request()->mainRequest();
+ if (store_req->getByteEnable().empty())
+ coverage = FullAddrRangeCoverage;
+ else
+ coverage = PartialAddrRangeCoverage;
+ } else if (
+ // This is the partial store-load forwarding case where a
store
+ // has only part of the load's data and the load isn't LLSC
+ (!req->mainRequest()->isLLSC() &&
+ ((store_has_lower_limit && lower_load_has_store_part) ||
+ (store_has_upper_limit && upper_load_has_store_part) ||
+ (lower_load_has_store_part &&
upper_load_has_store_part))) ||
+ // The load is LLSC, and the store has all or part of the
+ // load's data
+ (req->mainRequest()->isLLSC() &&
+ ((store_has_lower_limit || upper_load_has_store_part) &&
+ (store_has_upper_limit || lower_load_has_store_part))) ||
+ // The store entry is atomic and has all or part of the
load's
+ // data
+ (store_it->instruction()->isAtomic() &&
+ ((store_has_lower_limit || upper_load_has_store_part) &&
+ (store_has_upper_limit || lower_load_has_store_part)))) {
+ coverage = PartialAddrRangeCoverage;
+ }
+ if (coverage == FullAddrRangeCoverage) {
// Get shift amount for offset into the store's data.
int shift_amt = req->mainRequest()->getVaddr() -
store_it->instruction()->effAddr;
@@ -759,24 +798,7 @@
++lsqForwLoads;
return NoFault;
- } else if (
- // This is the partial store-load forwarding case where a
store
- // has only part of the load's data and the load isn't LLSC
- (!req->mainRequest()->isLLSC() &&
- ((store_has_lower_limit && lower_load_has_store_part) ||
- (store_has_upper_limit && upper_load_has_store_part) ||
- (lower_load_has_store_part &&
upper_load_has_store_part))) ||
- // The load is LLSC, and the store has all or part of the
- // load's data
- (req->mainRequest()->isLLSC() &&
- ((store_has_lower_limit || upper_load_has_store_part) &&
- (store_has_upper_limit || lower_load_has_store_part))) ||
- // The store entry is atomic and has all or part of the
load's
- // data
- (store_it->instruction()->isAtomic() &&
- ((store_has_lower_limit || upper_load_has_store_part) &&
- (store_has_upper_limit || lower_load_has_store_part)))) {
-
+ } else if (coverage == PartialAddrRangeCoverage) {
// If it's already been written back, then don't worry
about
// stalling on it.
if (store_it->completed()) {
diff --git a/src/cpu/utils.hh b/src/cpu/utils.hh
index 4c13181..dd1b94f 100644
--- a/src/cpu/utils.hh
+++ b/src/cpu/utils.hh
@@ -93,4 +93,16 @@
return (it_tmp != it_end);
}
+/**
+ * Test if all elements are active in an enablement range
+ */
+inline bool
+isAllActiveElement(const std::vector<bool>::const_iterator& it_start,
+ const std::vector<bool>::const_iterator& it_end)
+{
+ auto it_tmp = it_start;
+ for (;it_tmp != it_end && (*it_tmp); ++it_tmp);
+ return (it_tmp == it_end);
+}
+
#endif // __CPU_UTILS_HH__
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/19173
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I7cb50b80b70fcf43ab23eb9e7333d16328993fe1
Gerrit-Change-Number: 19173
Gerrit-PatchSet: 1
Gerrit-Owner: Giacomo Gabrielli <[email protected]>
Gerrit-Reviewer: Gabor Dozsa <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev