Matthew Poremba has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/70040?usp=email )

 (

1 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the submitted one.
 )Change subject: dev-amdgpu: Refactor MMIO interface for SDMA engines
......................................................................

dev-amdgpu: Refactor MMIO interface for SDMA engines

Currently the amdgpu simulated device is assumed to be a Vega10. As a
result there are a few things that are hardcoded. One of those is the
number of SDMAs. In order to add a newer device, such as MI100+, we need
to enable a flexible number of SDMAs.

In order to support a variable number of SDMAs and with the MMIO offsets
of each device being potentially different, the MMIO interface for SDMAs
is changed to use an SDMA class method dispatch table with forwards a
32-bit value from the MMIO packet to the MMIO functions in SDMA of the
format `void method(uint32_t)`. Several changes are made to enable this:

 - Allow the SDMA to have a variable MMIO base and size. These are
   configured in python.
 - An SDMA class method dispatch table which contains the MMIO offset
   relative to the SDMA's MMIO base address.
 - An updated writeMMIO method to iterate over the SDMA MMIO address
   ranges and call the appropriate SDMA MMIO method which matches the
   MMIO offset.
 - Moved all SDMA related MMIO data bit twiddling, masking, etc. into
   the MMIO methods themselves instead of in the writeMMIO method in
   SDMAEngine.

Change-Id: Ifce626f84d52f9e27e4438ba4e685e30dbf06dbc
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70040
Maintainer: Matt Sinclair <mattdsincl...@gmail.com>
Tested-by: kokoro <noreply+kok...@google.com>
Reviewed-by: Matt Sinclair <mattdsincl...@gmail.com>
---
M configs/example/gpufs/system/system.py
M src/dev/amdgpu/AMDGPU.py
M src/dev/amdgpu/amdgpu_device.cc
M src/dev/amdgpu/amdgpu_device.hh
M src/dev/amdgpu/interrupt_handler.cc
M src/dev/amdgpu/interrupt_handler.hh
M src/dev/amdgpu/sdma_engine.cc
M src/dev/amdgpu/sdma_engine.hh
8 files changed, 182 insertions(+), 57 deletions(-)

Approvals:
  Matt Sinclair: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py
index 93f0194..90c5c01 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -129,15 +129,45 @@
     device_ih = AMDGPUInterruptHandler()
     system.pc.south_bridge.gpu.device_ih = device_ih

-    # Setup the SDMA engines
-    sdma0_pt_walker = VegaPagetableWalker()
-    sdma1_pt_walker = VegaPagetableWalker()
+    # Setup the SDMA engines depending on device. The MMIO base addresses
+    # can be found in the driver code under:
+    # include/asic_reg/sdmaX/sdmaX_Y_Z_offset.h
+    num_sdmas = 2
+    sdma_bases = []
+    sdma_sizes = []
+    if args.gpu_device == "Vega10":
+        num_sdmas = 2
+        sdma_bases = [0x4980, 0x5180]
+        sdma_sizes = [0x800] * 2
+    elif args.gpu_device == "MI100":
+        num_sdmas = 8
+        sdma_bases = [
+            0x4980,
+            0x6180,
+            0x78000,
+            0x79000,
+            0x7A000,
+            0x7B000,
+            0x7C000,
+            0x7D000,
+        ]
+        sdma_sizes = [0x1000] * 8
+    else:
+        m5.util.panic(f"Unknown GPU device {args.gpu_device}")

-    sdma0 = SDMAEngine(walker=sdma0_pt_walker)
-    sdma1 = SDMAEngine(walker=sdma1_pt_walker)
+    sdma_pt_walkers = []
+    sdma_engines = []
+    for sdma_idx in range(num_sdmas):
+        sdma_pt_walker = VegaPagetableWalker()
+        sdma_engine = SDMAEngine(
+            walker=sdma_pt_walker,
+            mmio_base=sdma_bases[sdma_idx],
+            mmio_size=sdma_sizes[sdma_idx],
+        )
+        sdma_pt_walkers.append(sdma_pt_walker)
+        sdma_engines.append(sdma_engine)

-    system.pc.south_bridge.gpu.sdma0 = sdma0
-    system.pc.south_bridge.gpu.sdma1 = sdma1
+    system.pc.south_bridge.gpu.sdmas = sdma_engines

     # Setup PM4 packet processor
     pm4_pkt_proc = PM4PacketProcessor()
@@ -155,22 +185,22 @@
     system._dma_ports.append(gpu_hsapp)
     system._dma_ports.append(gpu_cmd_proc)
     system._dma_ports.append(system.pc.south_bridge.gpu)
-    system._dma_ports.append(sdma0)
-    system._dma_ports.append(sdma1)
+    for sdma in sdma_engines:
+        system._dma_ports.append(sdma)
     system._dma_ports.append(device_ih)
     system._dma_ports.append(pm4_pkt_proc)
     system._dma_ports.append(system_hub)
     system._dma_ports.append(gpu_mem_mgr)
     system._dma_ports.append(hsapp_pt_walker)
     system._dma_ports.append(cp_pt_walker)
-    system._dma_ports.append(sdma0_pt_walker)
-    system._dma_ports.append(sdma1_pt_walker)
+    for sdma_pt_walker in sdma_pt_walkers:
+        system._dma_ports.append(sdma_pt_walker)

     gpu_hsapp.pio = system.iobus.mem_side_ports
     gpu_cmd_proc.pio = system.iobus.mem_side_ports
     system.pc.south_bridge.gpu.pio = system.iobus.mem_side_ports
-    sdma0.pio = system.iobus.mem_side_ports
-    sdma1.pio = system.iobus.mem_side_ports
+    for sdma in sdma_engines:
+        sdma.pio = system.iobus.mem_side_ports
     device_ih.pio = system.iobus.mem_side_ports
     pm4_pkt_proc.pio = system.iobus.mem_side_ports
     system_hub.pio = system.iobus.mem_side_ports
diff --git a/src/dev/amdgpu/AMDGPU.py b/src/dev/amdgpu/AMDGPU.py
index 1e78672..616c501 100644
--- a/src/dev/amdgpu/AMDGPU.py
+++ b/src/dev/amdgpu/AMDGPU.py
@@ -79,11 +79,9 @@
         False, "Take a checkpoint before the device begins sending MMIOs"
     )

-    # Specific to Vega10: Vega10 has two SDMA engines these do not have any
- # assigned function and are referenced by ID so they are given the generic
-    # names sdma0, sdma1, ... sdmaN.
-    sdma0 = Param.SDMAEngine("SDMA Engine 0")
-    sdma1 = Param.SDMAEngine("SDMA Engine 1")
+    # SDMA engines. There are a different number depending on device,
+    # therefore an array is used.
+    sdmas = VectorParam.SDMAEngine("All SDMA Engines")

# The cp is needed here to handle certain packets the device may receive. # The config script should not create a new cp here but rather assign the
@@ -100,6 +98,9 @@
     cxx_header = "dev/amdgpu/sdma_engine.hh"
     cxx_class = "gem5::SDMAEngine"

+    mmio_base = Param.Addr(0x0, "Base MMIO Address")
+    mmio_size = Param.Addr(0x800, "Size of MMIO range")
+
     gpu_device = Param.AMDGPUDevice(NULL, "GPU Controller")
     walker = Param.VegaPagetableWalker("Page table walker")

diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc
index 7e6304a..2acf1f4 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -53,7 +53,7 @@

 AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
     : PciDevice(p), gpuMemMgr(p.memory_manager), deviceIH(p.device_ih),
-      sdma0(p.sdma0), sdma1(p.sdma1), pm4PktProc(p.pm4_pkt_proc), cp(p.cp),
+      pm4PktProc(p.pm4_pkt_proc), cp(p.cp),
       checkpoint_before_mmios(p.checkpoint_before_mmios),
       init_interrupt_count(0), _lastVMID(0),
       deviceMem(name() + ".deviceMem", p.memories, false, "", false)
@@ -84,10 +84,47 @@
         mmioReader.readMMIOTrace(p.trace_file);
     }

-    sdma0->setGPUDevice(this);
-    sdma0->setId(0);
-    sdma1->setGPUDevice(this);
-    sdma1->setId(1);
+    int sdma_id = 0;
+    for (auto& s : p.sdmas) {
+        s->setGPUDevice(this);
+        s->setId(sdma_id);
+        sdmaIds.insert({sdma_id, s});
+        sdmaMmios.insert({sdma_id,
+                          RangeSize(s->getMmioBase(), s->getMmioSize())});
+        DPRINTF(AMDGPUDevice, "SDMA%d has MMIO range %s\n", sdma_id,
+                sdmaMmios[sdma_id].to_string().c_str());
+        sdma_id++;
+    }
+
+    // Map SDMA MMIO addresses to functions
+    sdmaFunc.insert({0x81, &SDMAEngine::setGfxBaseLo});
+    sdmaFunc.insert({0x82, &SDMAEngine::setGfxBaseHi});
+    sdmaFunc.insert({0x88, &SDMAEngine::setGfxRptrHi});
+    sdmaFunc.insert({0x89, &SDMAEngine::setGfxRptrLo});
+    sdmaFunc.insert({0x92, &SDMAEngine::setGfxDoorbellLo});
+    sdmaFunc.insert({0xab, &SDMAEngine::setGfxDoorbellOffsetLo});
+    sdmaFunc.insert({0x80, &SDMAEngine::setGfxSize});
+    sdmaFunc.insert({0xb2, &SDMAEngine::setGfxWptrLo});
+    sdmaFunc.insert({0xb3, &SDMAEngine::setGfxWptrHi});
+    if (p.device_name == "Vega10") {
+        sdmaFunc.insert({0xe1, &SDMAEngine::setPageBaseLo});
+        sdmaFunc.insert({0xe9, &SDMAEngine::setPageRptrLo});
+        sdmaFunc.insert({0xe8, &SDMAEngine::setPageRptrHi});
+        sdmaFunc.insert({0xf2, &SDMAEngine::setPageDoorbellLo});
+        sdmaFunc.insert({0x10b, &SDMAEngine::setPageDoorbellOffsetLo});
+        sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize});
+        sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo});
+    } else if (p.device_name == "MI100") {
+        sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo});
+        sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo});
+        sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi});
+        sdmaFunc.insert({0xea, &SDMAEngine::setPageDoorbellLo});
+        sdmaFunc.insert({0xd8, &SDMAEngine::setPageDoorbellOffsetLo});
+        sdmaFunc.insert({0x10b, &SDMAEngine::setPageWptrLo});
+    } else {
+        panic("Unknown GPU device %s\n", p.device_name);
+    }
+
     deviceIH->setGPUDevice(this);
     pm4PktProc->setGPUDevice(this);
     cp->hsaPacketProc().setGPUDevice(this);
@@ -351,15 +388,25 @@

     DPRINTF(AMDGPUDevice, "Wrote MMIO %#lx\n", offset);

+    // Check SDMA functions first, then fallback to switch statement
+    for (int idx = 0; idx < sdmaIds.size(); ++idx) {
+        if (sdmaMmios[idx].contains(offset)) {
+            Addr sdma_offset = (offset - sdmaMmios[idx].start()) >> 2;
+            if (sdmaFunc.count(sdma_offset)) {
+                DPRINTF(AMDGPUDevice, "Calling SDMA%d MMIO function %lx\n",
+                        idx, sdma_offset);
+                sdmaFuncPtr mptr = sdmaFunc[sdma_offset];
+                (getSDMAById(idx)->*mptr)(pkt->getLE<uint32_t>());
+            } else {
+                DPRINTF(AMDGPUDevice, "Unknown SDMA%d MMIO: %#lx\n", idx,
+                        sdma_offset);
+            }
+
+            return;
+        }
+    }
+
     switch (aperture) {
-      /* Write a register to the first System DMA. */
-      case SDMA0_BASE:
-        sdma0->writeMMIO(pkt, aperture_offset >> SDMA_OFFSET_SHIFT);
-        break;
-      /* Write a register to the second System DMA. */
-      case SDMA1_BASE:
-        sdma1->writeMMIO(pkt, aperture_offset >> SDMA_OFFSET_SHIFT);
-        break;
       /* Write a general register to the graphics register bus manager. */
       case GRBM_BASE:
         gpuvm.writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
@@ -483,19 +530,9 @@
* PM4 packets selected SDMAs using an integer ID. This method simply maps
      * the integer ID to a pointer to the SDMA and checks for invalid IDs.
      */
-    switch (id) {
-        case 0:
-            return sdma0;
-            break;
-        case 1:
-            return sdma1;
-            break;
-        default:
-            panic("No SDMA with id %d\n", id);
-            break;
-    }
+    assert(sdmaIds.count(id));

-    return nullptr;
+    return sdmaIds[id];
 }

 SDMAEngine*
@@ -549,7 +586,7 @@
     idx = 0;
     for (auto & it : sdmaEngs) {
         sdma_engs_offset[idx] = it.first;
-        sdma_engs[idx] = it.second == sdma0 ? 0 : 1;
+        sdma_engs[idx] = idx;
         ++idx;
     }

@@ -620,7 +657,8 @@
UNSERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0]));

         for (int idx = 0; idx < sdma_engs_size; ++idx) {
-            SDMAEngine *sdma = sdma_engs[idx] == 0 ? sdma0 : sdma1;
+            assert(sdmaIds.count(idx));
+            SDMAEngine *sdma = sdmaIds[idx];
             sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma));
         }
     }
@@ -669,8 +707,9 @@
     idMap.erase(idMap.begin(), idMap.end());
     usedVMIDs.erase(usedVMIDs.begin(), usedVMIDs.end());

-    sdma0->deallocateRLCQueues();
-    sdma1->deallocateRLCQueues();
+    for (auto& it : sdmaEngs) {
+        it.second->deallocateRLCQueues();
+    }
 }

 void
diff --git a/src/dev/amdgpu/amdgpu_device.hh b/src/dev/amdgpu/amdgpu_device.hh
index b64067a..0e58f29 100644
--- a/src/dev/amdgpu/amdgpu_device.hh
+++ b/src/dev/amdgpu/amdgpu_device.hh
@@ -109,12 +109,19 @@
     AMDGPUMemoryManager *gpuMemMgr;
     AMDGPUInterruptHandler *deviceIH;
     AMDGPUVM gpuvm;
-    SDMAEngine *sdma0;
-    SDMAEngine *sdma1;
-    std::unordered_map<uint32_t, SDMAEngine *> sdmaEngs;
     PM4PacketProcessor *pm4PktProc;
     GPUCommandProcessor *cp;

+    // SDMAs mapped by doorbell offset
+    std::unordered_map<uint32_t, SDMAEngine *> sdmaEngs;
+    // SDMAs mapped by ID
+    std::unordered_map<uint32_t, SDMAEngine *> sdmaIds;
+    // SDMA ID to MMIO range
+    std::unordered_map<uint32_t, AddrRange> sdmaMmios;
+    // SDMA ID to function
+    typedef void (SDMAEngine::*sdmaFuncPtr)(uint32_t);
+    std::unordered_map<uint32_t, sdmaFuncPtr> sdmaFunc;
+
     /**
      * Initial checkpoint support variables.
      */
diff --git a/src/dev/amdgpu/interrupt_handler.cc b/src/dev/amdgpu/interrupt_handler.cc
index a771976..6f277a1 100644
--- a/src/dev/amdgpu/interrupt_handler.cc
+++ b/src/dev/amdgpu/interrupt_handler.cc
@@ -80,6 +80,12 @@
     assert(client_id == SOC15_IH_CLIENTID_RLC ||
            client_id == SOC15_IH_CLIENTID_SDMA0 ||
            client_id == SOC15_IH_CLIENTID_SDMA1 ||
+           client_id == SOC15_IH_CLIENTID_SDMA2 ||
+           client_id == SOC15_IH_CLIENTID_SDMA3 ||
+           client_id == SOC15_IH_CLIENTID_SDMA4 ||
+           client_id == SOC15_IH_CLIENTID_SDMA5 ||
+           client_id == SOC15_IH_CLIENTID_SDMA6 ||
+           client_id == SOC15_IH_CLIENTID_SDMA7 ||
            client_id == SOC15_IH_CLIENTID_GRBM_CP);
     assert(source_id == CP_EOP || source_id == TRAP_ID);

diff --git a/src/dev/amdgpu/interrupt_handler.hh b/src/dev/amdgpu/interrupt_handler.hh
index ab8a853..9b80e08 100644
--- a/src/dev/amdgpu/interrupt_handler.hh
+++ b/src/dev/amdgpu/interrupt_handler.hh
@@ -58,6 +58,12 @@
     SOC15_IH_CLIENTID_RLC       = 0x07,
     SOC15_IH_CLIENTID_SDMA0     = 0x08,
     SOC15_IH_CLIENTID_SDMA1     = 0x09,
+    SOC15_IH_CLIENTID_SDMA2     = 0x01,
+    SOC15_IH_CLIENTID_SDMA3     = 0x04,
+    SOC15_IH_CLIENTID_SDMA4     = 0x05,
+    SOC15_IH_CLIENTID_SDMA5     = 0x11,
+    SOC15_IH_CLIENTID_SDMA6     = 0x13,
+    SOC15_IH_CLIENTID_SDMA7     = 0x18,
     SOC15_IH_CLIENTID_GRBM_CP   = 0x14
 };

diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc
index 736df45..e99d694 100644
--- a/src/dev/amdgpu/sdma_engine.cc
+++ b/src/dev/amdgpu/sdma_engine.cc
@@ -49,7 +49,8 @@
     : DmaVirtDevice(p), id(0), gfxBase(0), gfxRptr(0),
       gfxDoorbell(0), gfxDoorbellOffset(0), gfxWptr(0), pageBase(0),
       pageRptr(0), pageDoorbell(0), pageDoorbellOffset(0),
-      pageWptr(0), gpuDevice(nullptr), walker(p.walker)
+      pageWptr(0), gpuDevice(nullptr), walker(p.walker),
+      mmioBase(p.mmio_base), mmioSize(p.mmio_size)
 {
     gfx.ib(&gfxIb);
     gfxIb.parent(&gfx);
@@ -87,6 +88,18 @@
         return SOC15_IH_CLIENTID_SDMA0;
       case 1:
         return SOC15_IH_CLIENTID_SDMA1;
+      case 2:
+        return SOC15_IH_CLIENTID_SDMA2;
+      case 3:
+        return SOC15_IH_CLIENTID_SDMA3;
+      case 4:
+        return SOC15_IH_CLIENTID_SDMA4;
+      case 5:
+        return SOC15_IH_CLIENTID_SDMA5;
+      case 6:
+        return SOC15_IH_CLIENTID_SDMA6;
+      case 7:
+        return SOC15_IH_CLIENTID_SDMA7;
       default:
         panic("Unknown SDMA id");
     }
@@ -1240,6 +1253,10 @@
 {
     gfxDoorbellOffset = insertBits(gfxDoorbellOffset, 31, 0, 0);
     gfxDoorbellOffset |= data;
+    if (bits(gfxDoorbell, 28, 28)) {
+        gpuDevice->setDoorbellType(gfxDoorbellOffset, QueueType::SDMAGfx);
+        gpuDevice->setSDMAEngine(gfxDoorbellOffset, this);
+    }
 }

 void
@@ -1250,9 +1267,11 @@
 }

 void
-SDMAEngine::setGfxSize(uint64_t data)
+SDMAEngine::setGfxSize(uint32_t data)
 {
-    gfx.size(data);
+    uint32_t rb_size = bits(data, 6, 1);
+    assert(rb_size >= 6 && rb_size <= 62);
+    gfx.size(1 << (rb_size + 2));
 }

 void
@@ -1320,6 +1339,10 @@
 {
     pageDoorbellOffset = insertBits(pageDoorbellOffset, 31, 0, 0);
     pageDoorbellOffset |= data;
+    if (bits(pageDoorbell, 28, 28)) {
+ gpuDevice->setDoorbellType(pageDoorbellOffset, QueueType::SDMAPage);
+        gpuDevice->setSDMAEngine(pageDoorbellOffset, this);
+    }
 }

 void
@@ -1330,9 +1353,11 @@
 }

 void
-SDMAEngine::setPageSize(uint64_t data)
+SDMAEngine::setPageSize(uint32_t data)
 {
-    page.size(data);
+    uint32_t rb_size = bits(data, 6, 1);
+    assert(rb_size >= 6 && rb_size <= 62);
+    page.size(1 << (rb_size + 2));
 }

 void
diff --git a/src/dev/amdgpu/sdma_engine.hh b/src/dev/amdgpu/sdma_engine.hh
index 27c1691..1e4f965 100644
--- a/src/dev/amdgpu/sdma_engine.hh
+++ b/src/dev/amdgpu/sdma_engine.hh
@@ -156,6 +156,9 @@
     void processRLC0(Addr wptrOffset);
     void processRLC1(Addr wptrOffset);

+    Addr mmioBase = 0;
+    Addr mmioSize = 0;
+
   public:
     SDMAEngine(const SDMAEngineParams &p);

@@ -243,6 +246,14 @@
                     uint64_t *dmaBuffer);

     /**
+ * Methods for getting SDMA MMIO base address and size. These are set by + * the python configuration depending on device to allow for flexible base
+     * addresses depending on what GPU is being simulated.
+     */
+    Addr getMmioBase() { return mmioBase; }
+    Addr getMmioSize() { return mmioSize; }
+
+    /**
      * Methods for getting the values of SDMA MMIO registers.
      */
     uint64_t getGfxBase() { return gfxBase; }
@@ -269,7 +280,7 @@
     void setGfxDoorbellHi(uint32_t data);
     void setGfxDoorbellOffsetLo(uint32_t data);
     void setGfxDoorbellOffsetHi(uint32_t data);
-    void setGfxSize(uint64_t data);
+    void setGfxSize(uint32_t data);
     void setGfxWptrLo(uint32_t data);
     void setGfxWptrHi(uint32_t data);
     void setPageBaseLo(uint32_t data);
@@ -280,7 +291,7 @@
     void setPageDoorbellHi(uint32_t data);
     void setPageDoorbellOffsetLo(uint32_t data);
     void setPageDoorbellOffsetHi(uint32_t data);
-    void setPageSize(uint64_t data);
+    void setPageSize(uint32_t data);
     void setPageWptrLo(uint32_t data);
     void setPageWptrHi(uint32_t data);


--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/70040?usp=email To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-MessageType: merged
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ifce626f84d52f9e27e4438ba4e685e30dbf06dbc
Gerrit-Change-Number: 70040
Gerrit-PatchSet: 3
Gerrit-Owner: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: Matt Sinclair <mattdsincl...@gmail.com>
Gerrit-Reviewer: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org

Reply via email to