Hello Gabor Dozsa,

I'd like you to do a code review. Please visit

    https://gem5-review.googlesource.com/c/public/gem5/+/19173

to review the following change.


Change subject: cpu: Disable value forwarding for stores with write strobes
......................................................................

cpu: Disable value forwarding for stores with write strobes

Change-Id: I7cb50b80b70fcf43ab23eb9e7333d16328993fe1
Signed-off-by: Gabor Dozsa <[email protected]>
---
M src/cpu/minor/lsq.cc
M src/cpu/o3/lsq_impl.hh
M src/cpu/o3/lsq_unit.hh
M src/cpu/utils.hh
4 files changed, 69 insertions(+), 21 deletions(-)



diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index d43e1ef..87df17f 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -142,10 +142,18 @@
 LSQ::AddrRangeCoverage
 LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request)
 {
-    return containsAddrRangeOf(request->getPaddr(), request->getSize(),
+    AddrRangeCoverage ret;
+    ret = containsAddrRangeOf(request->getPaddr(), request->getSize(),
other_request->request->getPaddr(), other_request->request->getSize());
+    /* If there is a strobe mask then store data forwarding might not be
+ * correct. Instead of checking enablemant of every byte we just fall back
+     * to PartialAddrRangeCoverage to prohibit store data forwarding */
+    if (ret == FullAddrRangeCoverage && !request->getByteEnable().empty())
+        ret = PartialAddrRangeCoverage;
+    return ret;
 }

+
 bool
 LSQ::LSQRequest::isBarrier()
 {
@@ -1632,7 +1640,9 @@
             addr, size, flags, cpu.dataMasterId(),
             /* I've no idea why we need the PC, but give it */
             inst->pc.instAddr(), amo_op);
-        request->request->setByteEnable(byteEnable);
+        request->request->setByteEnable(
+            isAllActiveElement(byteEnable.cbegin(), byteEnable.cend()) ?
+            std::vector<bool>() : byteEnable);

         requests.push(request);
         inst->inLSQ = true;
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index f2945e7..bd66d45 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -718,9 +718,13 @@
                     size, flags, data, res, amo_op);
         }
         assert(req);
-        if (!byteEnable.empty()) {
+        if (!byteEnable.empty()  &&
+            isAllActiveElement(byteEnable.cbegin(), byteEnable.cend())) {
+            req->_byteEnable = std::vector<bool>();
+        } else {
             req->_byteEnable = byteEnable;
         }
+
         inst->setRequest();
         req->taskId(cpu->taskId());

diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index d3f1d07..56ace54 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -209,6 +209,14 @@
     };
     using LQEntry = LSQEntry;

+    /** Coverage of one address range with another */
+    enum AddrRangeCoverage
+    {
+        PartialAddrRangeCoverage, /* Two ranges partly overlap */
+        FullAddrRangeCoverage, /* One range fully covers another */
+        NoAddrRangeCoverage /* Two ranges are disjoint */
+    };
+
   public:
     using LoadQueue = CircularQueue<LQEntry>;
     using StoreQueue = CircularQueue<SQEntry>;
@@ -705,6 +713,8 @@
             bool lower_load_has_store_part = req_s < st_e;
             bool upper_load_has_store_part = req_e > st_s;

+            AddrRangeCoverage coverage = NoAddrRangeCoverage;
+
             // If the store entry is not atomic (atomic does not have valid
             // data), the store has all of the data needed, and
             // the load is not LLSC, then
@@ -712,7 +722,36 @@
             if (!store_it->instruction()->isAtomic() &&
                 store_has_lower_limit && store_has_upper_limit &&
                 !req->mainRequest()->isLLSC()) {
+ // If the store's data has all of the data needed and the load + // isn't LLSC then we can forward. Execept if the store request
+                // has a byte strobe mask. In the latter case, we fall back
+                // to PartialAddrRangeCoverage to disable forward.
+                const auto& store_req = store_it->request()->mainRequest();
+                if (store_req->getByteEnable().empty())
+                    coverage = FullAddrRangeCoverage;
+                else
+                    coverage = PartialAddrRangeCoverage;
+            } else if (
+ // This is the partial store-load forwarding case where a store
+                // has only part of the load's data and the load isn't LLSC
+                (!req->mainRequest()->isLLSC() &&
+                 ((store_has_lower_limit && lower_load_has_store_part) ||
+                  (store_has_upper_limit && upper_load_has_store_part) ||
+ (lower_load_has_store_part && upper_load_has_store_part))) ||
+                // The load is LLSC, and the store has all or part of the
+                // load's data
+                (req->mainRequest()->isLLSC() &&
+                 ((store_has_lower_limit || upper_load_has_store_part) &&
+                  (store_has_upper_limit || lower_load_has_store_part))) ||
+ // The store entry is atomic and has all or part of the load's
+                // data
+                (store_it->instruction()->isAtomic() &&
+                 ((store_has_lower_limit || upper_load_has_store_part) &&
+                  (store_has_upper_limit || lower_load_has_store_part)))) {
+                coverage = PartialAddrRangeCoverage;
+            }

+            if (coverage == FullAddrRangeCoverage) {
                 // Get shift amount for offset into the store's data.
                 int shift_amt = req->mainRequest()->getVaddr() -
                     store_it->instruction()->effAddr;
@@ -759,24 +798,7 @@
                 ++lsqForwLoads;

                 return NoFault;
-            } else if (
- // This is the partial store-load forwarding case where a store
-                // has only part of the load's data and the load isn't LLSC
-                (!req->mainRequest()->isLLSC() &&
-                 ((store_has_lower_limit && lower_load_has_store_part) ||
-                  (store_has_upper_limit && upper_load_has_store_part) ||
- (lower_load_has_store_part && upper_load_has_store_part))) ||
-                // The load is LLSC, and the store has all or part of the
-                // load's data
-                (req->mainRequest()->isLLSC() &&
-                 ((store_has_lower_limit || upper_load_has_store_part) &&
-                  (store_has_upper_limit || lower_load_has_store_part))) ||
- // The store entry is atomic and has all or part of the load's
-                // data
-                (store_it->instruction()->isAtomic() &&
-                 ((store_has_lower_limit || upper_load_has_store_part) &&
-                  (store_has_upper_limit || lower_load_has_store_part)))) {
-
+            } else if (coverage == PartialAddrRangeCoverage) {
// If it's already been written back, then don't worry about
                 // stalling on it.
                 if (store_it->completed()) {
diff --git a/src/cpu/utils.hh b/src/cpu/utils.hh
index 4c13181..dd1b94f 100644
--- a/src/cpu/utils.hh
+++ b/src/cpu/utils.hh
@@ -93,4 +93,16 @@
     return (it_tmp != it_end);
 }

+/**
+ * Test if all elements are active in an enablement range
+ */
+inline bool
+isAllActiveElement(const std::vector<bool>::const_iterator& it_start,
+                   const std::vector<bool>::const_iterator& it_end)
+{
+    auto it_tmp = it_start;
+    for (;it_tmp != it_end && (*it_tmp); ++it_tmp);
+    return (it_tmp == it_end);
+}
+
 #endif // __CPU_UTILS_HH__

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/19173
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I7cb50b80b70fcf43ab23eb9e7333d16328993fe1
Gerrit-Change-Number: 19173
Gerrit-PatchSet: 1
Gerrit-Owner: Giacomo Gabrielli <[email protected]>
Gerrit-Reviewer: Gabor Dozsa <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev

Reply via email to