[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Create CU's ports in the standard way

Anthony Gutierrez (Gerrit) via gem5-dev Thu, 27 Aug 2020 09:32:29 -0700

Anthony Gutierrez has submitted this change. (https://gem5-review.googlesource.com/c/public/gem5/+/32836 )


Change subject: gpu-compute: Create CU's ports in the standard way
......................................................................


gpu-compute: Create CU's ports in the standard way

The CU would initialize its ports in getMasterPort(), which
is not desirable as getMasterPort() may be called several
times for the same port. This can lead to a fatal if the CU
expects to only create a single port of a given type, and may
lead to other issues where stat names are duplicated.

This change instantiates and initializes the CU's ports in the
CU constructor using the CU params.

The index field is also removed from the CU's ports because the
base class already has an ID field, which will be set to the
default value in the base class's constructor for scalar ports.

It doesn't make sense for scalar port's to take an index because
they are scalar, so we let the base class initialize the ID to
the invalid port ID.

Change-Id: Id18386f5f53800a6447d968380676d8fd9bac9df
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/32836
Reviewed-by: Anthony Gutierrez <[email protected]>
Maintainer: Anthony Gutierrez <[email protected]>
Tested-by: kokoro <[email protected]>
---
M src/gpu-compute/compute_unit.cc
M src/gpu-compute/compute_unit.hh
M src/gpu-compute/fetch_unit.cc
M src/gpu-compute/shader.cc
4 files changed, 99 insertions(+), 126 deletions(-)

Approvals:
  Anthony Gutierrez: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass

diff --git a/src/gpu-compute/compute_unit.ccb/src/gpu-compute/compute_unit.cc

index 9a41233..2d64fa3 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -96,6 +96,11 @@
     resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
     _masterId(p->system->getMasterId(this, "ComputeUnit")),
     lds(*p->localDataStore), gmTokenPort(name() + ".gmTokenPort", this),
+    ldsPort(csprintf("%s-port", name()), this),
+    scalarDataPort(csprintf("%s-port", name()), this),
+    scalarDTLBPort(csprintf("%s-port", name()), this),
+    sqcPort(csprintf("%s-port", name()), this),
+    sqcTLBPort(csprintf("%s-port", name()), this),
     _cacheLineSize(p->system->cacheLineSize()),
     _numBarrierSlots(p->num_barrier_slots),
     globalSeqNum(0), wavefrontSize(p->wf_size),
@@ -169,16 +174,18 @@
         fatal("Invalid WF execution policy (CU)\n");
     }

-    memPort.resize(wfSize());
+    for (int i = 0; i < p->port_memory_port_connection_count; ++i) {
+        memPort.emplace_back(csprintf("%s-port%d", name(), i), this, i);
+    }
+
+    for (int i = 0; i < p->port_translation_port_connection_count; ++i) {
+        tlbPort.emplace_back(csprintf("%s-port%d", name(), i), this, i);
+    }

     // Setup tokens for slave ports. The number of tokens in memSlaveTokens
     // is the total token count for the entire vector port (i.e., this CU).
     memPortTokens = new TokenManager(p->max_cu_tokens);

-    // resize the tlbPort vectorArray
-    int tlbPort_width = perLaneTLB ? wfSize() : 1;
-    tlbPort.resize(tlbPort_width);
-
     registerExitCallback([this]() { exitCallback(); });

     lastExecCycle.resize(numVectorALUs, 0);
@@ -214,7 +221,6 @@
         lastVaddrSimd[j].clear();
     }
     lastVaddrCU.clear();
-    delete ldsPort;
 }

 int
@@ -781,7 +787,7 @@
     // appropriate cycle to process the timing memory response
     // This delay represents the pipeline delay
     SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
-    int index = sender_state->port_index;
+    PortID index = sender_state->port_index;
     GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
     GPUDispatcher &dispatcher = computeUnit->shader->dispatcher();

@@ -886,7 +892,7 @@
     }

     EventFunctionWrapper *mem_resp_event =
-        computeUnit->memPort[index]->createMemRespEvent(pkt);
+        computeUnit->memPort[index].createMemRespEvent(pkt);

     DPRINTF(GPUPort,

"CU%d: WF[%d][%d]: gpuDynInst: %d, index %d, addr %#xreceived!\n",

@@ -1007,7 +1013,7 @@
 }

 void

-ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtrpkt)+ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtrpkt)

 {
     // There must be a way around this check to do the globalMemStart...
     Addr tmp_vaddr = pkt->req->getVaddr();
@@ -1039,7 +1045,7 @@
     tlbCycles -= curTick();
     ++tlbRequests;

-    int tlbPort_index = perLaneTLB ? index : 0;
+    PortID tlbPort_index = perLaneTLB ? index : 0;

     if (shader->timingSim) {
         if (debugSegFault) {
@@ -1074,7 +1080,7 @@
         pkt->senderState = translation_state;

         if (functionalTLB) {
-            tlbPort[tlbPort_index]->sendFunctional(pkt);
+            tlbPort[tlbPort_index].sendFunctional(pkt);

             // update the hitLevel distribution
             int hit_level = translation_state->hitLevel;
@@ -1117,33 +1123,33 @@
             // translation is done. Schedule the mem_req_event at the
             // appropriate cycle to send the timing memory request to ruby
             EventFunctionWrapper *mem_req_event =
-                memPort[index]->createMemReqEvent(pkt);
+                memPort[index].createMemReqEvent(pkt);

             DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x data "
                     "scheduled\n", cu_id, gpuDynInst->simdId,
                     gpuDynInst->wfSlotId, index, pkt->req->getPaddr());

             schedule(mem_req_event, curTick() + req_tick_latency);
-        } else if (tlbPort[tlbPort_index]->isStalled()) {
-            assert(tlbPort[tlbPort_index]->retries.size() > 0);
+        } else if (tlbPort[tlbPort_index].isStalled()) {
+            assert(tlbPort[tlbPort_index].retries.size() > 0);

             DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Translation for addr %#x "
                     "failed!\n", cu_id, gpuDynInst->simdId,
                     gpuDynInst->wfSlotId, tmp_vaddr);

-            tlbPort[tlbPort_index]->retries.push_back(pkt);
-        } else if (!tlbPort[tlbPort_index]->sendTimingReq(pkt)) {
+            tlbPort[tlbPort_index].retries.push_back(pkt);
+        } else if (!tlbPort[tlbPort_index].sendTimingReq(pkt)) {
             // Stall the data port;
             // No more packet will be issued till
             // ruby indicates resources are freed by
             // a recvReqRetry() call back on this port.
-            tlbPort[tlbPort_index]->stallPort();
+            tlbPort[tlbPort_index].stallPort();

             DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Translation for addr %#x "
                     "failed!\n", cu_id, gpuDynInst->simdId,
                     gpuDynInst->wfSlotId, tmp_vaddr);

-            tlbPort[tlbPort_index]->retries.push_back(pkt);
+            tlbPort[tlbPort_index].retries.push_back(pkt);
         } else {
            DPRINTF(GPUTLB,
                    "CU%d: WF[%d][%d]: Translation for addr %#x sent!\n",
@@ -1163,7 +1169,7 @@
         pkt->senderState = new TheISA::GpuTLB::TranslationState(TLB_mode,

shader->gpuTc);


-        tlbPort[tlbPort_index]->sendFunctional(pkt);
+        tlbPort[tlbPort_index].sendFunctional(pkt);

// the addr of the packet is not modified, so we need to create anew

         // packet, or otherwise the memory access will have the old virtual
@@ -1173,7 +1179,7 @@
         new_pkt->dataStatic(pkt->getPtr<uint8_t>());

         // Translation is done. It is safe to send the packet to memory.
-        memPort[0]->sendFunctional(new_pkt);
+        memPort[0].sendFunctional(new_pkt);

         DPRINTF(GPUMem, "Functional sendRequest\n");
         DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index %d: addr %#x\n", cu_id,
@@ -1205,12 +1211,12 @@

new TheISA::GpuTLB::TranslationState(tlb_mode, shader->gpuTc,false,

                                              pkt->senderState);

-    if (scalarDTLBPort->isStalled()) {
-        assert(scalarDTLBPort->retries.size());
-        scalarDTLBPort->retries.push_back(pkt);
-    } else if (!scalarDTLBPort->sendTimingReq(pkt)) {
-        scalarDTLBPort->stallPort();
-        scalarDTLBPort->retries.push_back(pkt);
+    if (scalarDTLBPort.isStalled()) {
+        assert(scalarDTLBPort.retries.size());
+        scalarDTLBPort.retries.push_back(pkt);
+    } else if (!scalarDTLBPort.sendTimingReq(pkt)) {
+        scalarDTLBPort.stallPort();
+        scalarDTLBPort.retries.push_back(pkt);
     } else {

DPRINTF(GPUTLB, "sent scalar %s translation request foraddr %#x\n",

                 tlb_mode == BaseTLB::Read ? "read" : "write",
@@ -1246,7 +1252,7 @@

new ComputeUnit::DataPort::SenderState(gpuDynInst, 0,nullptr));


             EventFunctionWrapper *mem_req_event =
-              memPort[0]->createMemReqEvent(pkt);
+              memPort[0].createMemReqEvent(pkt);

DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#xscheduling "

                     "an acquire\n", cu_id, gpuDynInst->simdId,
@@ -1266,7 +1272,7 @@

new ComputeUnit::DataPort::SenderState(gpuDynInst, 0,nullptr));


           EventFunctionWrapper *mem_req_event =
-            memPort[0]->createMemReqEvent(pkt);
+            memPort[0].createMemReqEvent(pkt);

DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#xscheduling "

                   "a release\n", cu_id, gpuDynInst->simdId,
@@ -1284,7 +1290,7 @@

new ComputeUnit::DataPort::SenderState(gpuDynInst, 0,nullptr));


         EventFunctionWrapper *mem_req_event =
-          memPort[0]->createMemReqEvent(pkt);
+          memPort[0].createMemReqEvent(pkt);

         DPRINTF(GPUPort,
                 "CU%d: WF[%d][%d]: index %d, addr %#x sync scheduled\n",
@@ -1308,7 +1314,7 @@

     DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Response for addr %#x, index %d\n",
             compute_unit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
-            pkt->req->getPaddr(), index);
+            pkt->req->getPaddr(), id);

     Addr paddr = pkt->req->getPaddr();

@@ -1321,7 +1327,7 @@
     int index = gpuDynInst->memStatusVector[paddr].back();

     DPRINTF(GPUMem, "Response for addr %#x, index %d\n",
-            pkt->req->getPaddr(), index);
+            pkt->req->getPaddr(), id);

     gpuDynInst->memStatusVector[paddr].pop_back();
     gpuDynInst->pAddr = pkt->req->getPaddr();
@@ -1425,7 +1431,7 @@
         safe_cast<DTLBPort::SenderState*>(pkt->senderState);

     GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
-    int mp_index = sender_state->portIndex;
+    PortID mp_index = sender_state->portIndex;
     Addr vaddr = pkt->req->getVaddr();
     gpuDynInst->memStatusVector[line].push_back(mp_index);
     gpuDynInst->tlbHitLevel[mp_index] = hit_level;
@@ -1535,7 +1541,7 @@
     // translation is done. Schedule the mem_req_event at the appropriate
     // cycle to send the timing memory request to ruby
     EventFunctionWrapper *mem_req_event =
-        computeUnit->memPort[mp_index]->createMemReqEvent(new_pkt);
+        computeUnit->memPort[mp_index].createMemReqEvent(new_pkt);

DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x datascheduled\n",

             computeUnit->cu_id, gpuDynInst->simdId,
@@ -1575,14 +1581,13 @@

         DPRINTF(GPUPort,
                 "CU%d: WF[%d][%d]: index %d, addr %#x data req failed!\n",
-                compute_unit->cu_id, gpuDynInst->simdId,
-                gpuDynInst->wfSlotId, index,
-                pkt->req->getPaddr());

+ compute_unit->cu_id, gpuDynInst->simdId,gpuDynInst->wfSlotId,

+                id, pkt->req->getPaddr());
     } else {
         DPRINTF(GPUPort,

"CU%d: WF[%d][%d]: gpuDynInst: %d, index %d, addr %#xdata "

                 "req sent!\n", compute_unit->cu_id, gpuDynInst->simdId,
-                gpuDynInst->wfSlotId, gpuDynInst->seqNum(), index,
+                gpuDynInst->wfSlotId, gpuDynInst->seqNum(), id,
                 pkt->req->getPaddr());
     }
 }
@@ -1598,22 +1603,21 @@
 {
     SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
     GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
-    ComputeUnit *compute_unit M5_VAR_USED = scalarDataPort->computeUnit;
+    ComputeUnit *compute_unit M5_VAR_USED = scalarDataPort.computeUnit;

-    if (!(scalarDataPort->sendTimingReq(pkt))) {
-        scalarDataPort->retries.push_back(pkt);
+    if (!(scalarDataPort.sendTimingReq(pkt))) {
+        scalarDataPort.retries.push_back(pkt);

         DPRINTF(GPUPort,
-                "CU%d: WF[%d][%d]: index %d, addr %#x data req failed!\n",
+                "CU%d: WF[%d][%d]: addr %#x data req failed!\n",
                 compute_unit->cu_id, gpuDynInst->simdId,
-                gpuDynInst->wfSlotId, scalarDataPort->index,
-                pkt->req->getPaddr());
+                gpuDynInst->wfSlotId, pkt->req->getPaddr());
     } else {
         DPRINTF(GPUPort,

- "CU%d: WF[%d][%d]: gpuDynInst: %d, index %d, addr %#xdata "

+                "CU%d: WF[%d][%d]: gpuDynInst: %d, addr %#x data "
                 "req sent!\n", compute_unit->cu_id, gpuDynInst->simdId,
                 gpuDynInst->wfSlotId, gpuDynInst->seqNum(),
-                scalarDataPort->index, pkt->req->getPaddr());
+                pkt->req->getPaddr());
     }
 }

@@ -1702,8 +1706,8 @@
     req_pkt->senderState =
         new ComputeUnit::ScalarDataPort::SenderState(gpuDynInst);

-    if (!computeUnit->scalarDataPort->sendTimingReq(req_pkt)) {
-        computeUnit->scalarDataPort->retries.push_back(req_pkt);
+    if (!computeUnit->scalarDataPort.sendTimingReq(req_pkt)) {
+        computeUnit->scalarDataPort.retries.push_back(req_pkt);
         DPRINTF(GPUMem, "send scalar req failed for: %s\n",
                 gpuDynInst->disassemble());
     } else {
@@ -2544,7 +2548,7 @@
     // This is the SenderState needed upon return
     newPacket->senderState = new LDSPort::SenderState(gpuDynInst);

-    return ldsPort->sendTimingReq(newPacket);
+    return ldsPort.sendTimingReq(newPacket);
 }

 /**

diff --git a/src/gpu-compute/compute_unit.hhb/src/gpu-compute/compute_unit.hh

index 211dd53..f7484af 100644
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -448,7 +448,7 @@
     void doSmReturn(GPUDynInstPtr gpuDynInst);

     virtual void init() override;
-    void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);

+ void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtrpkt);

     void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt);
     void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
                               bool kernelMemSync,
@@ -652,16 +652,15 @@
     class DataPort : public RequestPort
     {
       public:
-        DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
-            : RequestPort(_name, _cu), computeUnit(_cu),
-              index(_index) { }
+        DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
+            : RequestPort(_name, _cu, id), computeUnit(_cu) { }

         bool snoopRangeSent;

         struct SenderState : public Packet::SenderState
         {
             GPUDynInstPtr _gpuDynInst;
-            int port_index;
+            PortID port_index;
             Packet::SenderState *saved;

             SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,
@@ -681,7 +680,6 @@

       protected:
         ComputeUnit *computeUnit;
-        int index;

         virtual bool recvTimingResp(PacketPtr pkt);
         virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
@@ -702,11 +700,9 @@
     class ScalarDataPort : public RequestPort
     {
       public:
-        ScalarDataPort(const std::string &_name, ComputeUnit *_cu,
-                       PortID _index)

- : RequestPort(_name, _cu, _index), computeUnit(_cu),index(_index)

+        ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
+            : RequestPort(_name, _cu), computeUnit(_cu)
         {
-            (void)index;
         }

         bool recvTimingResp(PacketPtr pkt) override;
@@ -727,11 +723,11 @@
         class MemReqEvent : public Event
         {
           private:
-            ScalarDataPort *scalarDataPort;
+            ScalarDataPort &scalarDataPort;
             PacketPtr pkt;

           public:
-            MemReqEvent(ScalarDataPort *_scalar_data_port, PacketPtr _pkt)
+            MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)
                 : Event(), scalarDataPort(_scalar_data_port), pkt(_pkt)
             {
               setFlags(Event::AutoDelete);
@@ -745,16 +741,14 @@

       private:
         ComputeUnit *computeUnit;
-        PortID index;
     };

     // Instruction cache access port
     class SQCPort : public RequestPort
     {
       public:
-        SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
-            : RequestPort(_name, _cu), computeUnit(_cu),
-              index(_index) { }
+        SQCPort(const std::string &_name, ComputeUnit *_cu)
+            : RequestPort(_name, _cu), computeUnit(_cu) { }

         bool snoopRangeSent;

@@ -775,7 +769,6 @@

       protected:
         ComputeUnit *computeUnit;
-        int index;

         virtual bool recvTimingResp(PacketPtr pkt);
         virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
@@ -795,9 +788,9 @@
     class DTLBPort : public RequestPort
     {
       public:
-        DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
-            : RequestPort(_name, _cu), computeUnit(_cu),
-              index(_index), stalled(false)
+        DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
+            : RequestPort(_name, _cu, id), computeUnit(_cu),
+              stalled(false)
         { }

         bool isStalled() { return stalled; }
@@ -820,7 +813,7 @@

             // the lane in the memInst this is associated with, so we send
             // the memory request down the right port
-            int portIndex;
+            PortID portIndex;

             // constructor used for packets involved in timing accesses
             SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
@@ -830,7 +823,6 @@

       protected:
         ComputeUnit *computeUnit;
-        int index;
         bool stalled;

         virtual bool recvTimingResp(PacketPtr pkt);
@@ -913,8 +905,8 @@
     class LDSPort : public RequestPort
     {
       public:
-        LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
-        : RequestPort(_name, _cu, _id), computeUnit(_cu)
+        LDSPort(const std::string &_name, ComputeUnit *_cu)
+        : RequestPort(_name, _cu), computeUnit(_cu)
         {
         }

@@ -983,13 +975,7 @@
     /** The port to access the Local Data Store
      *  Can be connected to a LDS object
      */
-    LDSPort *ldsPort = nullptr;
-
-    LDSPort *
-    getLdsPort() const
-    {
-        return ldsPort;
-    }
+    LDSPort ldsPort;

     TokenManager *
     getTokenManager()
@@ -1000,54 +986,37 @@
     /** The memory port for SIMD data accesses.
      *  Can be connected to PhysMem for Ruby for timing simulations
      */
-    std::vector<DataPort*> memPort;
+    std::vector<DataPort> memPort;
     // port to the TLB hierarchy (i.e., the L1 TLB)
-    std::vector<DTLBPort*> tlbPort;
+    std::vector<DTLBPort> tlbPort;
     // port to the scalar data cache
-    ScalarDataPort *scalarDataPort;
+    ScalarDataPort scalarDataPort;
     // port to the scalar data TLB
-    ScalarDTLBPort *scalarDTLBPort;
+    ScalarDTLBPort scalarDTLBPort;
     // port to the SQC (i.e. the I-cache)
-    SQCPort *sqcPort;
+    SQCPort sqcPort;
     // port to the SQC TLB (there's a separate TLB for each I-cache)
-    ITLBPort *sqcTLBPort;
+    ITLBPort sqcTLBPort;

     Port &
     getPort(const std::string &if_name, PortID idx) override
     {
-        if (if_name == "memory_port") {
-            memPort[idx] = new DataPort(csprintf("%s-port%d", name(), idx),
-                                        this, idx);
-            return *memPort[idx];
-        } else if (if_name == "translation_port") {
-            tlbPort[idx] = new DTLBPort(csprintf("%s-port%d", name(), idx),
-                                        this, idx);
-            return *tlbPort[idx];
+        if (if_name == "memory_port" && idx < memPort.size()) {
+            return memPort[idx];
+        } else if (if_name == "translation_port" && idx < tlbPort.size()) {
+            return tlbPort[idx];
         } else if (if_name == "scalar_port") {

- scalarDataPort = new ScalarDataPort(csprintf("%s-port%d",name(),

-                                                idx), this, idx);
-            return *scalarDataPort;
+            return scalarDataPort;
         } else if (if_name == "scalar_tlb_port") {

- scalarDTLBPort = new ScalarDTLBPort(csprintf("%s-port",name()),

-                                                this);
-            return *scalarDTLBPort;
+            return scalarDTLBPort;
         } else if (if_name == "sqc_port") {
-            sqcPort = new SQCPort(csprintf("%s-port%d", name(), idx),
-                                  this, idx);
-            return *sqcPort;
+            return sqcPort;
         } else if (if_name == "sqc_tlb_port") {
-            sqcTLBPort = new ITLBPort(csprintf("%s-port", name()), this);
-            return *sqcTLBPort;
+            return sqcTLBPort;
         } else if (if_name == "ldsPort") {
-            if (ldsPort) {
-                fatal("an LDS port was already allocated");
-            }
-            ldsPort = new LDSPort(csprintf("%s-port", name()), this, idx);
-            return *ldsPort;
-        } else if (if_name == "gmTokenPort") {
-            return gmTokenPort;
+            return ldsPort;
         } else {
-            panic("incorrect port name");
+            return ClockedObject::getPort(if_name, idx);
         }
     }

diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc
index ac9a5a6..3a139f5 100644
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -174,24 +174,24 @@
                                                  computeUnit.shader->gpuTc,
                                                  false, pkt->senderState);

-        if (computeUnit.sqcTLBPort->isStalled()) {
-            assert(computeUnit.sqcTLBPort->retries.size() > 0);
+        if (computeUnit.sqcTLBPort.isStalled()) {
+            assert(computeUnit.sqcTLBPort.retries.size() > 0);

             DPRINTF(GPUTLB, "Failed to send TLB req for FETCH addr %#x\n",
                     vaddr);

-            computeUnit.sqcTLBPort->retries.push_back(pkt);
-        } else if (!computeUnit.sqcTLBPort->sendTimingReq(pkt)) {
+            computeUnit.sqcTLBPort.retries.push_back(pkt);
+        } else if (!computeUnit.sqcTLBPort.sendTimingReq(pkt)) {
             // Stall the data port;
             // No more packet is issued till
             // ruby indicates resources are freed by
             // a recvReqRetry() call back on this port.
-            computeUnit.sqcTLBPort->stallPort();
+            computeUnit.sqcTLBPort.stallPort();

             DPRINTF(GPUTLB, "Failed to send TLB req for FETCH addr %#x\n",
                     vaddr);

-            computeUnit.sqcTLBPort->retries.push_back(pkt);
+            computeUnit.sqcTLBPort.retries.push_back(pkt);
         } else {

DPRINTF(GPUTLB, "sent FETCH translation request for %#x\n",vaddr);

         }
@@ -200,7 +200,7 @@
             new TheISA::GpuTLB::TranslationState(BaseTLB::Execute,

computeUnit.shader->gpuTc);


-        computeUnit.sqcTLBPort->sendFunctional(pkt);
+        computeUnit.sqcTLBPort.sendFunctional(pkt);

         TheISA::GpuTLB::TranslationState *sender_state =

safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);

@@ -257,8 +257,8 @@
     if (timingSim) {
         // translation is done. Send the appropriate timing memory request.

-        if (!computeUnit.sqcPort->sendTimingReq(pkt)) {
-            computeUnit.sqcPort->retries.push_back(std::make_pair(pkt,
+        if (!computeUnit.sqcPort.sendTimingReq(pkt)) {
+            computeUnit.sqcPort.retries.push_back(std::make_pair(pkt,

wavefront));


             DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Fetch addr %#x failed!\n",
@@ -270,7 +270,7 @@
                     pkt->req->getPaddr());
         }
     } else {
-        computeUnit.sqcPort->sendFunctional(pkt);
+        computeUnit.sqcPort.sendFunctional(pkt);
         processFetchReturn(pkt);
     }
 }
diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc
index 1d88e85..7b4f20f 100644
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -400,8 +400,8 @@

         // fixme: this should be cuList[cu_id] if cu_id != n_cu
         // The latter requires a memPort in the dispatcher
-        cuList[0]->memPort[0]->sendFunctional(new_pkt1);
-        cuList[0]->memPort[0]->sendFunctional(new_pkt2);
+        cuList[0]->memPort[0].sendFunctional(new_pkt1);
+        cuList[0]->memPort[0].sendFunctional(new_pkt2);

         delete new_pkt1;
         delete new_pkt2;
@@ -419,7 +419,7 @@

         // fixme: this should be cuList[cu_id] if cu_id != n_cu
         // The latter requires a memPort in the dispatcher
-        cuList[0]->memPort[0]->sendFunctional(new_pkt);
+        cuList[0]->memPort[0].sendFunctional(new_pkt);

         delete new_pkt;
         delete pkt;
@@ -507,7 +507,7 @@
     // it's ok tp send all accesses through lane 0
     // since the lane # is not known here,
     // This isn't important since these are functional accesses.
-    cuList[cu_id]->tlbPort[0]->sendFunctional(pkt);
+    cuList[cu_id]->tlbPort[0].sendFunctional(pkt);

     /* safe_cast the senderState */
     TheISA::GpuTLB::TranslationState *sender_state =

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/32836

To unsubscribe, or for help writing mail filters, visithttps://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Id18386f5f53800a6447d968380676d8fd9bac9df
Gerrit-Change-Number: 32836
Gerrit-PatchSet: 5
Gerrit-Owner: Anthony Gutierrez <[email protected]>
Gerrit-Reviewer: Alexandru Duțu <[email protected]>
Gerrit-Reviewer: Anthony Gutierrez <[email protected]>
Gerrit-Reviewer: Bradford Beckmann <[email protected]>
Gerrit-Reviewer: Jason Lowe-Power <[email protected]>
Gerrit-Reviewer: Matt Sinclair <[email protected]>
Gerrit-Reviewer: Matthew Poremba <[email protected]>
Gerrit-Reviewer: Tony Gutierrez <[email protected]>
Gerrit-Reviewer: kokoro <[email protected]>
Gerrit-MessageType: merged

_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Create CU's ports in the standard way

Reply via email to