gpu-compute.

Gabe Black (Gerrit) via gem5-dev Mon, 26 Oct 2020 13:33:43 -0700

Gabe Black has submitted this change. (https://gem5-review.googlesource.com/c/public/gem5/+/34174 )


Change subject: gpu: Use X86ISA instead of TheISA in src/gpu-compute.
......................................................................


gpu: Use X86ISA instead of TheISA in src/gpu-compute.

These files are nominally not tied to the X86ISA, but in reality they
are because they reach into the GPU TLB, which is defined unchangeably in
the X86ISA namespaces, and uses data structures within it. Rather than try
to pretend that these structures are generic, we'll instead just use X86ISA
instead of TheISA. If this really does become generic in the future, a
base class with the ISA agnostic essentials defined in it can be used
instead, and the ISA specific TLBs can defined their own derived class
which has whatever else they need. Really the compute unit shouldn't be
communicating with the TLB using sender state since those are supposed
to be little notes for the sender to keep with a transaction, not for
communicating between entities across a port.

Change-Id: Ie6573396f6c77a9a02194f5f4595eefa45d6d66b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/34174
Reviewed-by: Bobby R. Bruce <[email protected]>
Maintainer: Bobby R. Bruce <[email protected]>
Tested-by: kokoro <[email protected]>
---
M src/gpu-compute/compute_unit.cc
M src/gpu-compute/gpu_tlb.cc
M src/gpu-compute/shader.cc
M src/gpu-compute/tlb_coalescer.cc
4 files changed, 31 insertions(+), 31 deletions(-)

Approvals:
  Bobby R. Bruce: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass

diff --git a/src/gpu-compute/compute_unit.ccb/src/gpu-compute/compute_unit.cc

index c39dec8..b98e5a9 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -1047,8 +1047,8 @@
         pkt->senderState = new DTLBPort::SenderState(gpuDynInst, index);

         // This is the senderState needed by the TLB hierarchy to function
-        TheISA::GpuTLB::TranslationState *translation_state =

- new TheISA::GpuTLB::TranslationState(TLB_mode, shader->gpuTc,false,

+        X86ISA::GpuTLB::TranslationState *translation_state =

+ new X86ISA::GpuTLB::TranslationState(TLB_mode, shader->gpuTc,false,

                                                pkt->senderState);

         pkt->senderState = translation_state;
@@ -1140,7 +1140,7 @@
         delete pkt->senderState;

         // Because it's atomic operation, only need TLB translation state
-        pkt->senderState = new TheISA::GpuTLB::TranslationState(TLB_mode,
+        pkt->senderState = new X86ISA::GpuTLB::TranslationState(TLB_mode,

shader->gpuTc);


         tlbPort[tlbPort_index].sendFunctional(pkt);
@@ -1161,8 +1161,8 @@
                 new_pkt->req->getPaddr());

         // safe_cast the senderState
-        TheISA::GpuTLB::TranslationState *sender_state =

-safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);

+        X86ISA::GpuTLB::TranslationState *sender_state =

+safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);


         delete sender_state->tlbEntry;
         delete new_pkt;
@@ -1182,7 +1182,7 @@
         new ComputeUnit::ScalarDTLBPort::SenderState(gpuDynInst);

     pkt->senderState =

- new TheISA::GpuTLB::TranslationState(tlb_mode, shader->gpuTc,false,+ new X86ISA::GpuTLB::TranslationState(tlb_mode, shader->gpuTc,false,

                                              pkt->senderState);

     if (scalarDTLBPort.isStalled()) {
@@ -1373,8 +1373,8 @@
     computeUnit->tlbCycles += curTick();

     // pop off the TLB translation state
-    TheISA::GpuTLB::TranslationState *translation_state =

-safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);

+    X86ISA::GpuTLB::TranslationState *translation_state =

+safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);


     // no PageFaults are permitted for data accesses
     if (!translation_state->tlbEntry) {
@@ -1446,8 +1446,8 @@
         DPRINTF(GPUPrefetch, "CU[%d][%d][%d][%d]: %#x was last\n",
                 computeUnit->cu_id, simdId, wfSlotId, mp_index, last);

-        int stride = last ? (roundDown(vaddr, TheISA::PageBytes) -

- roundDown(last, TheISA::PageBytes)) >>TheISA::PageShift

+        int stride = last ? (roundDown(vaddr, X86ISA::PageBytes) -

+ roundDown(last, X86ISA::PageBytes)) >>X86ISA::PageShift

                      : 0;

         DPRINTF(GPUPrefetch, "Stride is %d\n", stride);
@@ -1467,13 +1467,13 @@
         // Prefetch Next few pages atomically
         for (int pf = 1; pf <= computeUnit->prefetchDepth; ++pf) {
             DPRINTF(GPUPrefetch, "%d * %d: %#x\n", pf, stride,
-                    vaddr+stride*pf*TheISA::PageBytes);
+                    vaddr + stride * pf * X86ISA::PageBytes);

             if (!stride)
                 break;

             RequestPtr prefetch_req = std::make_shared<Request>(
-                vaddr + stride * pf * TheISA::PageBytes,
+                vaddr + stride * pf * X86ISA::PageBytes,
                 sizeof(uint8_t), 0,
                 computeUnit->requestorId(),
                 0, 0, nullptr);
@@ -1484,15 +1484,15 @@

// Because it's atomic operation, only need TLB translationstate

             prefetch_pkt->senderState =
-                new TheISA::GpuTLB::TranslationState(TLB_mode,
+                new X86ISA::GpuTLB::TranslationState(TLB_mode,
                     computeUnit->shader->gpuTc, true);

// Currently prefetches are zero-latency, hence thesendFunctional

             sendFunctional(prefetch_pkt);

             /* safe_cast the senderState */
-            TheISA::GpuTLB::TranslationState *tlb_state =
-                 safe_cast<TheISA::GpuTLB::TranslationState*>(
+            X86ISA::GpuTLB::TranslationState *tlb_state =
+                 safe_cast<X86ISA::GpuTLB::TranslationState*>(
                          prefetch_pkt->senderState);


@@ -1639,8 +1639,8 @@
 {
     assert(pkt->senderState);

-    TheISA::GpuTLB::TranslationState *translation_state =
-        safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
+    X86ISA::GpuTLB::TranslationState *translation_state =
+        safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);

     // Page faults are not allowed
     fatal_if(!translation_state->tlbEntry,
@@ -1704,8 +1704,8 @@
     assert(pkt->senderState);

     // pop off the TLB translation state
-    TheISA::GpuTLB::TranslationState *translation_state
-        = safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
+    X86ISA::GpuTLB::TranslationState *translation_state
+        = safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);

     bool success = translation_state->tlbEntry != nullptr;
     delete translation_state->tlbEntry;
@@ -2429,7 +2429,7 @@
 void
 ComputeUnit::updatePageDivergenceDist(Addr addr)
 {
-    Addr virt_page_addr = roundDown(addr, TheISA::PageBytes);
+    Addr virt_page_addr = roundDown(addr, X86ISA::PageBytes);

     if (!pagesTouched.count(virt_page_addr))
         pagesTouched[virt_page_addr] = 1;
diff --git a/src/gpu-compute/gpu_tlb.cc b/src/gpu-compute/gpu_tlb.cc
index c4450fa..7a939e6 100644
--- a/src/gpu-compute/gpu_tlb.cc
+++ b/src/gpu-compute/gpu_tlb.cc
@@ -94,7 +94,7 @@
          * @warning: the set-associative version assumes you have a
          * fixed page size of 4KB.
          * If the page size is greather than 4KB (as defined in the
-         * TheISA::PageBytes), then there are various issues w/ the current
+         * X86ISA::PageBytes), then there are various issues w/ the current
          * implementation (you'd have the same 8KB page being replicated in
          * different sets etc)
          */
@@ -754,7 +754,7 @@
         assert(pkt->senderState);

         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
-                                        TheISA::PageBytes);
+                                        X86ISA::PageBytes);

         TranslationState *sender_state =
                 safe_cast<TranslationState*>(pkt->senderState);
@@ -1159,7 +1159,7 @@
             local_entry = new_entry;

             if (allocationPolicy) {
-                Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
+                Addr virt_page_addr = roundDown(vaddr, X86ISA::PageBytes);

                 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
                         virt_page_addr);
@@ -1210,7 +1210,7 @@
         bool update_stats = !sender_state->prefetch;

         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
-                                        TheISA::PageBytes);
+                                        X86ISA::PageBytes);

         if (update_stats)
             tlb->updatePageFootprint(virt_page_addr);
@@ -1339,7 +1339,7 @@
     GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
     {
         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
-                                        TheISA::PageBytes);
+                                        X86ISA::PageBytes);

         DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
                 virt_page_addr);
diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc
index 0b41193..9db0347 100644
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -105,7 +105,7 @@
     Addr start;

     // round up length to the next page
-    length = roundUp(length, TheISA::PageBytes);
+    length = roundUp(length, X86ISA::PageBytes);

     Process *proc = gpuTc->getProcessPtr();
     auto mem_state = proc->memState;

diff --git a/src/gpu-compute/tlb_coalescer.ccb/src/gpu-compute/tlb_coalescer.cc

index 55be11e..a22b2c8 100644
--- a/src/gpu-compute/tlb_coalescer.cc
+++ b/src/gpu-compute/tlb_coalescer.cc
@@ -106,10 +106,10 @@
     // Rule 1: Coalesce requests only if they
     // fall within the same virtual page
     Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(),
-                                             TheISA::PageBytes);
+                                             X86ISA::PageBytes);

Addr coalesced_virt_page_addr =roundDown(coalesced_pkt->req->getVaddr(),

-                                              TheISA::PageBytes);
+                                              X86ISA::PageBytes);

     if (incoming_virt_page_addr != coalesced_virt_page_addr)
         return false;
@@ -139,7 +139,7 @@
 void
 TLBCoalescer::updatePhysAddresses(PacketPtr pkt)
 {

- Addr virt_page_addr = roundDown(pkt->req->getVaddr(),TheISA::PageBytes);+ Addr virt_page_addr = roundDown(pkt->req->getVaddr(),X86ISA::PageBytes);

DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs forpage %#x\n",issuedTranslationsTable[virt_page_addr].size(),virt_page_addr);

@@ -345,7 +345,7 @@
     // print a warning message. This is a temporary caveat of
     // the current simulator where atomic and timing requests can
     // coexist. FIXME remove this check/warning in the future.

- Addr virt_page_addr = roundDown(pkt->req->getVaddr(),TheISA::PageBytes);+ Addr virt_page_addr = roundDown(pkt->req->getVaddr(),X86ISA::PageBytes);int map_count =coalescer->issuedTranslationsTable.count(virt_page_addr);


     if (map_count) {
@@ -430,7 +430,7 @@

             // compute virtual page address for this request
             Addr virt_page_addr = roundDown(first_packet->req->getVaddr(),
-                    TheISA::PageBytes);
+                    X86ISA::PageBytes);

             // is there another outstanding request for the same page addr?
             int pending_reqs =

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/34174

To unsubscribe, or for help writing mail filters, visithttps://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ie6573396f6c77a9a02194f5f4595eefa45d6d66b
Gerrit-Change-Number: 34174
Gerrit-PatchSet: 7
Gerrit-Owner: Gabe Black <[email protected]>
Gerrit-Reviewer: Anthony Gutierrez <[email protected]>
Gerrit-Reviewer: Bobby R. Bruce <[email protected]>
Gerrit-Reviewer: Gabe Black <[email protected]>
Gerrit-Reviewer: kokoro <[email protected]>
Gerrit-CC: Matthew Poremba <[email protected]>
Gerrit-MessageType: merged

_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: gpu: Use X86ISA instead of TheISA in src/gpu-compute.

Reply via email to