changeset d426728892fe in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=d426728892fe
description:
        gpu-compute: Adding context serialization methods to Wavefront
        This patch adds methods to serialize the context of a particular 
wavefront
        to the simulated system memory. Context serialization is used when a 
wavefront
        is preempeted (i.e. context switch).

diffstat:

 src/gpu-compute/wavefront.cc |  125 +++++++++++++++++++++++++++++++++++++++++++
 src/gpu-compute/wavefront.hh |   12 ++++
 2 files changed, 137 insertions(+), 0 deletions(-)

diffs (154 lines):

diff -r 42a1873be45c -r d426728892fe src/gpu-compute/wavefront.cc
--- a/src/gpu-compute/wavefront.cc      Fri Sep 16 12:31:46 2016 -0400
+++ b/src/gpu-compute/wavefront.cc      Fri Sep 16 12:32:36 2016 -0400
@@ -941,3 +941,128 @@
            sizeof(privBase) + sizeof(spillBase) + sizeof(ldsChunk) +
            computeUnit->wfSize() * sizeof(ReconvergenceStackEntry);
 }
+
+void
+Wavefront::getContext(const void *out)
+{
+    uint8_t *iter = (uint8_t *)out;
+    for (int i = 0; i < barCnt.size(); i++) {
+        *(int *)iter = barCnt[i]; iter += sizeof(barCnt[i]);
+    }
+    *(int *)iter = wfId; iter += sizeof(wfId);
+    *(int *)iter = maxBarCnt; iter += sizeof(maxBarCnt);
+    *(int *)iter = oldBarrierCnt; iter += sizeof(oldBarrierCnt);
+    *(int *)iter = barrierCnt; iter += sizeof(barrierCnt);
+    *(int *)iter = computeUnit->cu_id; iter += sizeof(computeUnit->cu_id);
+    *(uint32_t *)iter = wgId; iter += sizeof(wgId);
+    *(uint32_t *)iter = barrierId; iter += sizeof(barrierId);
+    *(uint64_t *)iter = initMask.to_ullong(); iter += 
sizeof(initMask.to_ullong());
+    *(Addr *)iter = privBase; iter += sizeof(privBase);
+    *(Addr *)iter = spillBase; iter += sizeof(spillBase);
+
+    int stackSize = reconvergenceStack.size();
+    ReconvergenceStackEntry empty = {std::numeric_limits<uint32_t>::max(),
+                                    std::numeric_limits<uint32_t>::max(),
+                                    std::numeric_limits<uint64_t>::max()};
+    for (int i = 0; i < workItemId[0].size(); i++) {
+        if (i < stackSize) {
+            *(ReconvergenceStackEntry *)iter = *reconvergenceStack.back();
+            iter += sizeof(ReconvergenceStackEntry);
+            reconvergenceStack.pop_back();
+        } else {
+            *(ReconvergenceStackEntry *)iter = empty;
+            iter += sizeof(ReconvergenceStackEntry);
+        }
+    }
+
+    int wf_size = computeUnit->wfSize();
+    for (int i = 0; i < maxSpVgprs; i++) {
+        uint32_t vgprIdx = remap(i, sizeof(uint32_t), 1);
+        for (int lane = 0; lane < wf_size; lane++) {
+            uint32_t regVal = computeUnit->vrf[simdId]->
+                            read<uint32_t>(vgprIdx,lane);
+            *(uint32_t *)iter = regVal; iter += sizeof(regVal);
+        }
+    }
+
+    for (int i = 0; i < maxDpVgprs; i++) {
+        uint32_t vgprIdx = remap(i, sizeof(uint64_t), 1);
+        for (int lane = 0; lane < wf_size; lane++) {
+            uint64_t regVal = computeUnit->vrf[simdId]->
+                            read<uint64_t>(vgprIdx,lane);
+            *(uint64_t *)iter = regVal; iter += sizeof(regVal);
+        }
+    }
+
+    for (int i = 0; i < condRegState->numRegs(); i++) {
+        for (int lane = 0; lane < wf_size; lane++) {
+            uint64_t regVal = condRegState->read<uint64_t>(i, lane);
+            *(uint64_t *)iter = regVal; iter += sizeof(regVal);
+        }
+    }
+
+    /* saving LDS content */
+    if (ldsChunk)
+        for (int i = 0; i < ldsChunk->size(); i++) {
+            char val = ldsChunk->read<char>(i);
+            *(char *) iter = val; iter += sizeof(val);
+        }
+}
+
+void
+Wavefront::setContext(const void *in)
+{
+    uint8_t *iter = (uint8_t *)in;
+    for (int i = 0; i < barCnt.size(); i++) {
+        barCnt[i] = *(int *)iter; iter += sizeof(barCnt[i]);
+    }
+    wfId = *(int *)iter; iter += sizeof(wfId);
+    maxBarCnt = *(int *)iter; iter += sizeof(maxBarCnt);
+    oldBarrierCnt = *(int *)iter; iter += sizeof(oldBarrierCnt);
+    barrierCnt = *(int *)iter; iter += sizeof(barrierCnt);
+    computeUnit->cu_id = *(int *)iter; iter += sizeof(computeUnit->cu_id);
+    wgId = *(uint32_t *)iter; iter += sizeof(wgId);
+    barrierId = *(uint32_t *)iter; iter += sizeof(barrierId);
+    initMask = VectorMask(*(uint64_t *)iter); iter += sizeof(initMask);
+    privBase = *(Addr *)iter; iter += sizeof(privBase);
+    spillBase = *(Addr *)iter; iter += sizeof(spillBase);
+
+    for (int i = 0; i < workItemId[0].size(); i++) {
+        ReconvergenceStackEntry newEntry = *(ReconvergenceStackEntry *)iter;
+        iter += sizeof(ReconvergenceStackEntry);
+        if (newEntry.pc != std::numeric_limits<uint32_t>::max()) {
+            pushToReconvergenceStack(newEntry.pc, newEntry.rpc,
+                                     newEntry.execMask);
+        }
+    }
+    int wf_size = computeUnit->wfSize();
+
+    for (int i = 0; i < maxSpVgprs; i++) {
+        uint32_t vgprIdx = remap(i, sizeof(uint32_t), 1);
+        for (int lane = 0; lane < wf_size; lane++) {
+            uint32_t regVal = *(uint32_t *)iter; iter += sizeof(regVal);
+            computeUnit->vrf[simdId]->write<uint32_t>(vgprIdx, regVal, lane);
+        }
+    }
+
+    for (int i = 0; i < maxDpVgprs; i++) {
+        uint32_t vgprIdx = remap(i, sizeof(uint64_t), 1);
+        for (int lane = 0; lane < wf_size; lane++) {
+            uint64_t regVal = *(uint64_t *)iter; iter += sizeof(regVal);
+            computeUnit->vrf[simdId]->write<uint64_t>(vgprIdx, regVal, lane);
+        }
+    }
+
+    for (int i = 0; i < condRegState->numRegs(); i++) {
+        for (int lane = 0; lane < wf_size; lane++) {
+            uint64_t regVal = *(uint64_t *)iter; iter += sizeof(regVal);
+            condRegState->write<uint64_t>(i, lane, regVal);
+        }
+    }
+    /** Restoring LDS contents */
+    if (ldsChunk)
+        for (int i = 0; i < ldsChunk->size(); i++) {
+            char val = *(char *) iter; iter += sizeof(val);
+            ldsChunk->write<char>(i, val);
+        }
+}
diff -r 42a1873be45c -r d426728892fe src/gpu-compute/wavefront.hh
--- a/src/gpu-compute/wavefront.hh      Fri Sep 16 12:31:46 2016 -0400
+++ b/src/gpu-compute/wavefront.hh      Fri Sep 16 12:32:36 2016 -0400
@@ -354,6 +354,18 @@
      */
     uint32_t getStaticContextSize() const;
 
+    /**
+     * Returns the hardware context as a stream of bytes
+     * This method is designed for HSAIL execution
+     */
+    void getContext(const void *out);
+
+    /**
+     * Sets the hardware context fromt a stream of bytes
+     * This method is designed for HSAIL execution
+     */
+    void setContext(const void *in);
+
   private:
     /**
      * Stack containing Control Flow Graph nodes (i.e., kernel instructions)
_______________________________________________
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev

Reply via email to