Alex Dutu has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/53076 )

Change subject: dev-hsa: Update HSA queue tracking for FS mode
......................................................................

dev-hsa: Update HSA queue tracking for FS mode

In FS mode offsets for HSA queues are determined by the driver and
cannot be linearly assigned as they are in SE mode. Add plumbing to pass
the offset of a queue to the HSA packet processor and then to HW
scheduler.

A mapping to/from queue ID <-> doorbell offset are also needed to be
able to unmap queues. ROCm 4.2 is fairly aggressive about context
switching queues, which results in queues being constantly mapped and
unmapped.

Another result of remapping queues is the read index is not preserved in
gem5. The PM4 packet processor will write the currenty read index value
to the MQD before the queue is unmapped. The MQD is written back to
memory on unmap and re-read on mapping to obtain the previous value.
Some helper functions are added to be able to restore the read index
from a non-zero value.

Change-Id: I0153ff53765daccc1de23ea3f4b69fd2fa5a275f
---
M src/dev/hsa/hsa_packet_processor.cc
M src/dev/hsa/hsa_packet_processor.hh
M src/dev/hsa/hw_scheduler.cc
M src/dev/hsa/hw_scheduler.hh
4 files changed, 71 insertions(+), 14 deletions(-)



diff --git a/src/dev/hsa/hsa_packet_processor.cc b/src/dev/hsa/hsa_packet_processor.cc
index 44c0e87..b0421d2 100644
--- a/src/dev/hsa/hsa_packet_processor.cc
+++ b/src/dev/hsa/hsa_packet_processor.cc
@@ -102,14 +102,15 @@
                                        uint64_t basePointer,
                                        uint64_t queue_id,
                                        uint32_t size, int doorbellSize,
-                                       GfxVersion gfxVersion)
+                                       GfxVersion gfxVersion,
+                                       Addr offset, uint64_t rd_idx)
 {
     DPRINTF(HSAPacketProcessor,
              "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
              (void *)basePointer, queue_id, size);
     hwSchdlr->registerNewQueue(hostReadIndexPointer,
                                basePointer, queue_id, size, doorbellSize,
-                               gfxVersion);
+                               gfxVersion, offset, rd_idx);
 }

 AddrRangeList
@@ -584,6 +585,20 @@
     std::fill(_aqlComplete.begin(), _aqlComplete.end(), false);
 }

+void
+AQLRingBuffer::setRdIdx(uint64_t value)
+{
+    _rdIdx = value;
+
+ // Mark entries below the previous doorbell value as complete. This will
+    // cause the next call to freeEntry on the queue to increment the read
+    // index to the next value which will be written to the doorbell.
+    for (int i = 0; i <= value; ++i) {
+        _aqlComplete[i] = true;
+        DPRINTF(HSAPacketProcessor, "Marking _aqlComplete[%d] true\n", i);
+    }
+}
+
 bool
 AQLRingBuffer::freeEntry(void *pkt)
 {
diff --git a/src/dev/hsa/hsa_packet_processor.hh b/src/dev/hsa/hsa_packet_processor.hh
index 144fe42..a838069 100644
--- a/src/dev/hsa/hsa_packet_processor.hh
+++ b/src/dev/hsa/hsa_packet_processor.hh
@@ -234,6 +234,7 @@
      void incWrIdx(uint64_t value) { _wrIdx += value; }
      void incDispIdx(uint64_t value) { _dispIdx += value; }
      uint64_t compltnPending() { return (_dispIdx - _rdIdx); }
+     void setRdIdx(uint64_t value);
 };

 struct QCntxt
@@ -353,11 +354,13 @@
                             uint64_t basePointer,
                             uint64_t queue_id,
                             uint32_t size, int doorbellSize,
-                            GfxVersion gfxVersion);
+                            GfxVersion gfxVersion,
+                            Addr offset = 0, uint64_t rd_idx = 0);
     void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize);
     void setDevice(GPUCommandProcessor * dev);
     void updateReadIndex(int, uint32_t);
     void getCommandsFromHost(int pid, uint32_t rl_idx);
+    HWScheduler *hwScheduler() { return hwSchdlr; }

     // PIO interface
     virtual Tick read(Packet*) override;
diff --git a/src/dev/hsa/hw_scheduler.cc b/src/dev/hsa/hw_scheduler.cc
index f42dede..341c9eb 100644
--- a/src/dev/hsa/hw_scheduler.cc
+++ b/src/dev/hsa/hw_scheduler.cc
@@ -88,19 +88,22 @@
                               uint64_t basePointer,
                               uint64_t queue_id,
                               uint32_t size, int doorbellSize,
-                              GfxVersion gfxVersion)
+                              GfxVersion gfxVersion,
+                              Addr offset, uint64_t rd_idx)
 {
     assert(queue_id < MAX_ACTIVE_QUEUES);
     // Map queue ID to doorbell.
     // We are only using offset to pio base address as doorbell
// We use the same mapping function used by hsa runtime to do this mapping
-    Addr db_offset = queue_id * doorbellSize;
-    if (dbMap.find(db_offset) != dbMap.end()) {
+    if (!offset)
+        offset = queue_id * doorbellSize;
+    if (dbMap.find(offset) != dbMap.end()) {
         panic("Creating an already existing queue (queueID %d)", queue_id);
     }

     // Populate doorbell map
-    dbMap[db_offset] = queue_id;
+    dbMap[offset] = queue_id;
+    qidMap[queue_id] = offset;

     if (queue_id >= MAX_ACTIVE_QUEUES) {
         panic("Attempting to create a queue (queueID %d)" \
@@ -108,12 +111,16 @@
     }

     HSAQueueDescriptor* q_desc =
-       new HSAQueueDescriptor(basePointer, db_offset,
+       new HSAQueueDescriptor(basePointer, offset,
                               hostReadIndexPointer, size, gfxVersion);
     AQLRingBuffer* aql_buf =
         new AQLRingBuffer(NUM_DMA_BUFS, hsaPP->name());
+    aql_buf->setRdIdx(rd_idx);
+    DPRINTF(HSAPacketProcessor, "Setting read index for %#lx to %ld\n",
+                                offset, rd_idx);
+
     QCntxt q_cntxt(q_desc, aql_buf);
-    activeList[dbMap[db_offset]] = q_cntxt;
+    activeList[dbMap[offset]] = q_cntxt;

     // Check if this newly created queue can be directly mapped
     // to registered queue list
@@ -122,7 +129,7 @@
     schedWakeup();
     DPRINTF(HSAPacketProcessor,
              "%s: offset = %p, qID = %d, is_regd = %s, AL size %d\n",
-             __FUNCTION__, db_offset, queue_id,
+             __FUNCTION__, offset, queue_id,
              (register_q) ? "true" : "false", dbMap.size());
 }

@@ -193,7 +200,7 @@
 HWScheduler::contextSwitchQ()
 {
     DPRINTF(HSAPacketProcessor,
-            "Trying to map next queue, @ %s", __FUNCTION__);
+            "Trying to map next queue, @ %s\n", __FUNCTION__);
     // Identify the next queue, if there is nothing to
     // map, return false
     if (!findNextActiveALQ()) {
@@ -327,6 +334,8 @@
     uint32_t al_idx = dbMap[db_addr];
     // Modify the write pointer
     activeList[al_idx].qDesc->writeIndex = doorbell_reg;
+    DPRINTF(HSAPacketProcessor, "queue %d qDesc->writeIndex %d\n",
+            al_idx, activeList[al_idx].qDesc->writeIndex);
     // If this queue is mapped, then start DMA to fetch the
     // AQL packet
     if (regdListMap.find(al_idx) != regdListMap.end()) {
@@ -337,7 +346,8 @@
 void
 HWScheduler::unregisterQueue(uint64_t queue_id, int doorbellSize)
 {
-    Addr db_offset = queue_id * doorbellSize;
+    assert(qidMap.count(queue_id));
+    Addr db_offset = qidMap[queue_id];
     auto dbmap_iter = dbMap.find(db_offset);
     if (dbmap_iter == dbMap.end()) {
         panic("Destroying a non-existing queue (db_offset %x)",
diff --git a/src/dev/hsa/hw_scheduler.hh b/src/dev/hsa/hw_scheduler.hh
index 8c043ec..942eb81 100644
--- a/src/dev/hsa/hw_scheduler.hh
+++ b/src/dev/hsa/hw_scheduler.hh
@@ -61,7 +61,8 @@
                           uint64_t basePointer,
                           uint64_t queue_id,
                           uint32_t size, int doorbellSize,
-                          GfxVersion gfxVersion);
+                          GfxVersion gfxVersion,
+                          Addr offset = 0, uint64_t rd_idx = 0);
     void unregisterQueue(uint64_t queue_id, int doorbellSize);
     void wakeup();
     void schedWakeup();
@@ -92,8 +93,10 @@
     // Active list keeps track of all queues created
     std::map<uint32_t, QCntxt> activeList;
     //TODO: Modify this to support multi-process in the future.
-    // doorbell map, maps doorbells to active list entry
+    // doorbell map, maps doorbell offsets to queue ID
     std::map<Addr, uint32_t> dbMap;
+    // Reverse of doorbell map, maps queue ID to doorbell offset
+    std::map<uint64_t, Addr> qidMap;
     // regdListMap keeps track of the mapping of queues to
     // registered list. regdListMap is indexed with active
     // list index (which is same as queue ID)

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53076
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I0153ff53765daccc1de23ea3f4b69fd2fa5a275f
Gerrit-Change-Number: 53076
Gerrit-PatchSet: 1
Gerrit-Owner: Alex Dutu <alexandru.d...@amd.com>
Gerrit-CC: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to