Matthew Poremba has uploaded this change for review. ( https://gem5-review.googlesource.com/8302


Change subject: mem: Add unified queue to DRAMCtrl
......................................................................

mem: Add unified queue to DRAMCtrl

Add optional unified queue to the DRAMCtrl to serve as a front end queue
for flow control. This prevents needing individual flot control for both
read and write queues by exposing only one queue at the slave side. This
makes flow control more generic and simplistic.

Change-Id: I1fde4c8da2a51688979d148d9124fb0e22be5c5b
---
M src/mem/DRAMCtrl.py
M src/mem/dram_ctrl.cc
M src/mem/dram_ctrl.hh
3 files changed, 168 insertions(+), 2 deletions(-)



diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py
index 3145751..92d25cc 100644
--- a/src/mem/DRAMCtrl.py
+++ b/src/mem/DRAMCtrl.py
@@ -76,6 +76,14 @@
     # bus in front of the controller for multiple ports
     port = SlavePort("Slave port")

+    # the unified queue holds all incoming memory requests to help with
+    # flow control and is searched by the memory controller when it's
+    # read or write queues have free space available
+ use_unified_buffer = Param.Bool(False, "Should we use a unified buffer?")
+    unified_buffer_size = Param.Unsigned(100, "Number of total requests")
+    unified_search_depth = Param.Unsigned(16, "Maximum number of entries "
+                                 "to search when looking for new requests")
+
     # the basic configuration of the controller architecture, note
     # that each entry corresponds to a burst for the specific DRAM
     # configuration (e.g. x32 with burst length 8 is 32 bytes) and not
diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc
index 62de18d..b1fff15 100644
--- a/src/mem/dram_ctrl.cc
+++ b/src/mem/dram_ctrl.cc
@@ -78,6 +78,9 @@
     bankGroupsPerRank(p->bank_groups_per_rank),
     bankGroupArch(p->bank_groups_per_rank > 0),
     banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0),
+    useUnifiedBuffer(p->use_unified_buffer),
+    unifiedBufferSize(p->unified_buffer_size),
+    unifiedSearchDepth(p->unified_search_depth),
     readBufferSize(p->read_buffer_size),
     writeBufferSize(p->write_buffer_size),
writeHighThreshold(writeBufferSize * p->write_high_thresh_perc / 100.0),
@@ -271,6 +274,67 @@
     return latency;
 }

+void
+DRAMCtrl::processUnifiedQueue()
+{
+    // Fast path if there is nothing to do
+    if (unifiedQueue.empty()) {
+        return;
+    }
+
+    // See how many reads and writes we can issue
+    int read_queue_space = readBufferSize
+                         - (readQueue.size() + respQueue.size());
+    int write_queue_space = writeBufferSize - writeQueue.size();
+    int search_count = 0;
+
+    for (auto iter = unifiedQueue.begin();
+         iter != unifiedQueue.end(); ++iter) {
+        // Emulate a maximum number of requests we can search per cycle
+        if (++search_count > unifiedSearchDepth) {
+            break;
+        }
+
+        PacketPtr pkt = (*iter);
+        unsigned size = pkt->getSize();
+        unsigned offset = pkt->getAddr() & (burstSize - 1);
+        unsigned int dram_pkt_count = divCeil(offset + size, burstSize);
+
+        // Look for as many requests up to the maximum search depth. Note
+        // that write merging and read serviings by write queue are not
+        // handled here and left to the split read and write queues.
+        if (pkt->isRead() && read_queue_space >= dram_pkt_count) {
+            DPRINTF(DRAM, "Read addr %lld moved from unified queue\n",
+                    pkt->getAddr());
+            inUnifiedQueue.erase(burstAlign(pkt->getAddr()));
+            unifiedPending.insert(burstAlign(pkt->getAddr()));
+            iter = unifiedQueue.erase(iter);
+            read_queue_space -= dram_pkt_count;
+            issueTimingReq(pkt);
+        } else if (pkt->isWrite() && write_queue_space >= dram_pkt_count) {
+            DPRINTF(DRAM, "Write addr %lld moved from unified queue\n",
+                    pkt->getAddr());
+            inUnifiedQueue.erase(burstAlign(pkt->getAddr()));
+            unifiedPending.insert(burstAlign(pkt->getAddr()));
+            iter = unifiedQueue.erase(iter);
+            write_queue_space -= dram_pkt_count;
+            issueTimingReq(pkt);
+        }
+
+        // make sure we don't enqueue too many requests or invalid types
+        DPRINTF(DRAM, "Unified queue occupancy is %d of %d\n",
+                unifiedQueueOccupancy(), unifiedBufferSize);
+        assert(unifiedQueueOccupancy() <= unifiedBufferSize);
+        assert(pkt->isRead() || pkt->isWrite());
+    }
+}
+
+int
+DRAMCtrl::unifiedQueueOccupancy()
+{
+    return unifiedQueue.size() + unifiedPending.size();
+}
+
 bool
 DRAMCtrl::readQueueFull(unsigned int neededEntries) const
 {
@@ -594,7 +658,51 @@
     }
     prevArrival = curTick();

+    // If we are using a unified buffer, place requests in it and
+    // check if space is available in the separate read/write buffers.
+    // Otherwise, send directly to the read/write buffers and use retries.
+    bool rv = true;

+    if (useUnifiedBuffer && (pkt->isRead() || pkt->isWrite())) {
+        // See if we can enqueue directly to internal queues. Otherwise,
+        // place in unified queue to be searched later.
+        unsigned size = pkt->getSize();
+        unsigned offset = pkt->getAddr() & (burstSize - 1);
+        unsigned int dram_pkt_count = divCeil(offset + size, burstSize);
+
+        if ((pkt->isRead() && !readQueueFull(dram_pkt_count)) ||
+            (pkt->isWrite() && !writeQueueFull(dram_pkt_count))) {
+            DPRINTF(DRAM, "Request addr %lld skipping unified queue\n",
+                    pkt->getAddr());
+
+            // separate issued requests to simplify unified queue searching
+            unifiedPending.insert(burstAlign(pkt->getAddr()));
+
+            bool M5_VAR_USED issue_success = issueTimingReq(pkt);
+            assert(issue_success);
+        } else {
+            DPRINTF(DRAM, "Request addr %lld placed in unified queue\n",
+                    pkt->getAddr());
+
+            // place in queue to be searched once a request completes
+            unifiedQueue.push_back(pkt);
+            inUnifiedQueue.insert(burstAlign(pkt->getAddr()));
+        }
+
+        avgUniQLen = unifiedQueueOccupancy();
+
+        // this should not happen if flow control is managed by the port
+        assert(unifiedQueueOccupancy() <= unifiedBufferSize);
+    } else {
+        rv = issueTimingReq(pkt);
+    }
+
+    return rv;
+}
+
+bool
+DRAMCtrl::issueTimingReq(PacketPtr pkt)
+{
     // Find out how many dram packets a pkt translates to
     // If the burst size is equal or larger than the pkt size, then a pkt
     // translates to only one dram packet. Otherwise, a pkt translates to
@@ -715,13 +823,20 @@
     } else {
         // if there is nothing left in any queue, signal a drain
         if (drainState() == DrainState::Draining &&
-            writeQueue.empty() && readQueue.empty() && allRanksDrained()) {
+            writeQueue.empty() && readQueue.empty() && allRanksDrained() &&
+            unifiedQueueOccupancy() == 0) {

             DPRINTF(Drain, "DRAM controller done draining\n");
             signalDrainDone();
         }
     }

+    // If we have space available in the read queue, see if we can
+    // insert requests from the unified queue, if used.
+    if (useUnifiedBuffer) {
+        processUnifiedQueue();
+    }
+
     // We have made a location in the queue available at this point,
     // so if there is a read that was forced to wait, retry now
     if (retryRdReq) {
@@ -873,6 +988,14 @@
 {
     DPRINTF(DRAM, "Responding to Address %lld.. ",pkt->getAddr());

+    // if we are using a unified queue, this request can be removed now
+    if (useUnifiedBuffer && (pkt->isRead() || pkt->isWrite())) {
+        DPRINTF(DRAM, "Removing addr %lld from unified queue\n",
+                pkt->getAddr());
+        assert(unifiedPending.count(burstAlign(pkt->getAddr())));
+        unifiedPending.erase(burstAlign(pkt->getAddr()));
+    }
+
     bool needsResponse = pkt->needsResponse();
     // do the actual memory access which also turns the packet into a
     // response
@@ -1514,6 +1637,12 @@
     if (!nextReqEvent.scheduled())
         schedule(nextReqEvent, std::max(nextReqTime, curTick()));

+    // If there is space available writes are available in the unified
+    // queue, try to place them into the write queue now.
+    if (useUnifiedBuffer && writeQueue.size() < writeBufferSize) {
+        processUnifiedQueue();
+    }
+
     // If there is space available and we have writes waiting then let
     // them retry. This is done here to ensure that the retry does not
     // cause a nextReqEvent to be scheduled before we do so as part of
@@ -2423,6 +2552,11 @@
         .name(name() + ".perBankWrBursts")
         .desc("Per bank write bursts");

+    avgUniQLen
+        .name(name() + ".avgUniQLen")
+        .desc("Average number of requests in unified queue")
+        .precision(3);
+
     avgRdQLen
         .name(name() + ".avgRdQLen")
         .desc("Average read queue length when enqueuing")
diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh
index 467cfe8..dc5be36 100644
--- a/src/mem/dram_ctrl.hh
+++ b/src/mem/dram_ctrl.hh
@@ -714,6 +714,12 @@
     EventFunctionWrapper respondEvent;

     /**
+     * Check the unified queue for any requests that may be able to be
+     * copied into the separated read/write queues.
+     */
+    void processUnifiedQueue();
+
+    /**
      * Check if the read queue has room for more entries
      *
      * @param pktCount The number of entries needed in the read queue
@@ -873,7 +879,20 @@
      *
      * @return An address aligned to a DRAM burst
      */
- Addr burstAlign(Addr addr) const { return (addr & ~(Addr(burstSize - 1))); }
+    Addr burstAlign(Addr addr) const
+    { return (addr & ~(Addr(burstSize - 1))); }
+
+    /**
+ * Unified queue to hold all outstanding memory requests. Issued requests
+     * are moved to a  pending set to simplify searching of the queue.
+     * Requests are marked done once DRAM access is complete. After sending
+ * a response on the port, the request is finally removed from the queue.
+     */
+    std::list<PacketPtr> unifiedQueue;
+    std::unordered_set<Addr> inUnifiedQueue;
+    std::unordered_set<Addr> unifiedPending;
+
+    int unifiedQueueOccupancy();

     /**
      * The controller's main read and write queues
@@ -926,6 +945,9 @@
     const uint32_t banksPerRank;
     const uint32_t channels;
     uint32_t rowsPerBank;
+    const bool useUnifiedBuffer;
+    const uint32_t unifiedBufferSize;
+    const uint32_t unifiedSearchDepth;
     const uint32_t readBufferSize;
     const uint32_t writeBufferSize;
     const uint32_t writeHighThreshold;
@@ -1049,6 +1071,7 @@
     Stats::Formula busUtilWrite;

     // Average queue lengths
+    Stats::Average avgUniQLen;
     Stats::Average avgRdQLen;
     Stats::Average avgWrQLen;

@@ -1130,6 +1153,7 @@
     Tick recvAtomic(PacketPtr pkt);
     void recvFunctional(PacketPtr pkt);
     bool recvTimingReq(PacketPtr pkt);
+    bool issueTimingReq(PacketPtr pkt);

 };


--
To view, visit https://gem5-review.googlesource.com/8302
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-MessageType: newchange
Gerrit-Change-Id: I1fde4c8da2a51688979d148d9124fb0e22be5c5b
Gerrit-Change-Number: 8302
Gerrit-PatchSet: 1
Gerrit-Owner: Matthew Poremba <matthew.pore...@amd.com>
_______________________________________________
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev

Reply via email to