changeset 42a1873be45c in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=42a1873be45c
description:
gpu-compute: Refactoring Wavefront::dynWaveId
diffstat:
src/arch/hsail/gen.py | 2 +-
src/gpu-compute/compute_unit.cc | 24 ++++++++++++------------
src/gpu-compute/wavefront.cc | 2 +-
src/gpu-compute/wavefront.hh | 3 ++-
4 files changed, 16 insertions(+), 15 deletions(-)
diffs (115 lines):
diff -r 46cffde5d8a6 -r 42a1873be45c src/arch/hsail/gen.py
--- a/src/arch/hsail/gen.py Fri Sep 16 12:30:05 2016 -0400
+++ b/src/arch/hsail/gen.py Fri Sep 16 12:31:46 2016 -0400
@@ -755,7 +755,7 @@
gen_special('GridGroups',
'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
gen_special('LaneId', 'lane')
-gen_special('WaveId', 'w->dynWaveId')
+gen_special('WaveId', 'w->wfId')
gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
# gen_special('CU'', ')
diff -r 46cffde5d8a6 -r 42a1873be45c src/gpu-compute/compute_unit.cc
--- a/src/gpu-compute/compute_unit.cc Fri Sep 16 12:30:05 2016 -0400
+++ b/src/gpu-compute/compute_unit.cc Fri Sep 16 12:31:46 2016 -0400
@@ -221,7 +221,7 @@
void
ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal,
- int cnt, LdsChunk *ldsChunk, NDRange *ndr)
+ int waveId, LdsChunk *ldsChunk, NDRange *ndr)
{
static int _n_wave = 0;
@@ -232,20 +232,20 @@
init_mask.reset();
for (int k = 0; k < wfSize(); ++k) {
- if (k + cnt * wfSize() < trueWgSizeTotal)
+ if (k + waveId * wfSize() < trueWgSizeTotal)
init_mask[k] = 1;
}
w->kernId = ndr->dispatchId;
- w->dynWaveId = cnt;
+ w->wfId = waveId;
w->initMask = init_mask.to_ullong();
for (int k = 0; k < wfSize(); ++k) {
- w->workItemId[0][k] = (k+cnt*wfSize()) % trueWgSize[0];
+ w->workItemId[0][k] = (k + waveId * wfSize()) % trueWgSize[0];
w->workItemId[1][k] =
- ((k + cnt * wfSize()) / trueWgSize[0]) % trueWgSize[1];
+ ((k + waveId * wfSize()) / trueWgSize[0]) % trueWgSize[1];
w->workItemId[2][k] =
- (k + cnt * wfSize()) / (trueWgSize[0] * trueWgSize[1]);
+ (k + waveId * wfSize()) / (trueWgSize[0] * trueWgSize[1]);
w->workItemFlatId[k] = w->workItemId[2][k] * trueWgSize[0] *
trueWgSize[1] + w->workItemId[1][k] * trueWgSize[0] +
@@ -294,8 +294,8 @@
// is this the last wavefront in the workgroup
// if set the spillWidth to be the remaining work-items
// so that the vector access is correct
- if ((cnt + 1) * wfSize() >= trueWgSizeTotal) {
- w->spillWidth = trueWgSizeTotal - (cnt * wfSize());
+ if ((waveId + 1) * wfSize() >= trueWgSizeTotal) {
+ w->spillWidth = trueWgSizeTotal - (waveId * wfSize());
} else {
w->spillWidth = wfSize();
}
@@ -341,7 +341,7 @@
// calculate the number of 32-bit vector registers required by wavefront
int vregDemand = ndr->q.sRegCount + (2 * ndr->q.dRegCount);
- int cnt = 0;
+ int wave_id = 0;
// Assign WFs by spreading them across SIMDs, 1 WF per SIMD at a time
for (int m = 0; m < shader->n_wf * numSIMDs; ++m) {
@@ -352,7 +352,7 @@
if (w->status == Wavefront::S_STOPPED) {
// if we have scheduled all work items then stop
// scheduling wavefronts
- if (cnt * wfSize() >= trueWgSizeTotal)
+ if (wave_id * wfSize() >= trueWgSizeTotal)
break;
// reserve vector registers for the scheduled wavefront
@@ -365,8 +365,8 @@
w->reservedVectorRegs = normSize;
vectorRegsReserved[m % numSIMDs] += w->reservedVectorRegs;
- StartWF(w, trueWgSize, trueWgSizeTotal, cnt, ldsChunk, ndr);
- ++cnt;
+ StartWF(w, trueWgSize, trueWgSizeTotal, wave_id, ldsChunk, ndr);
+ ++wave_id;
}
}
++barrier_id;
diff -r 46cffde5d8a6 -r 42a1873be45c src/gpu-compute/wavefront.cc
--- a/src/gpu-compute/wavefront.cc Fri Sep 16 12:30:05 2016 -0400
+++ b/src/gpu-compute/wavefront.cc Fri Sep 16 12:31:46 2016 -0400
@@ -935,7 +935,7 @@
uint32_t
Wavefront::getStaticContextSize() const
{
- return barCnt.size() * sizeof(int) + sizeof(dynWaveId) + sizeof(maxBarCnt)
+
+ return barCnt.size() * sizeof(int) + sizeof(wfId) + sizeof(maxBarCnt) +
sizeof(oldBarrierCnt) + sizeof(barrierCnt) + sizeof(wgId) +
sizeof(computeUnit->cu_id) + sizeof(barrierId) + sizeof(initMask) +
sizeof(privBase) + sizeof(spillBase) + sizeof(ldsChunk) +
diff -r 46cffde5d8a6 -r 42a1873be45c src/gpu-compute/wavefront.hh
--- a/src/gpu-compute/wavefront.hh Fri Sep 16 12:30:05 2016 -0400
+++ b/src/gpu-compute/wavefront.hh Fri Sep 16 12:31:46 2016 -0400
@@ -194,7 +194,8 @@
uint32_t gridSz[3];
uint32_t wgId;
uint32_t wgSz;
- uint32_t dynWaveId;
+ // wavefront id within a workgroup
+ uint32_t wfId;
uint32_t maxDynWaveId;
uint32_t dispatchId;
// outstanding global+local memory requests
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev