[gem5-dev] Change in gem5/gem5[develop]: dev: add support for HSA's barrier bit kernel synchronization

2020-06-22 Thread Anthony Gutierrez (Gerrit) via gem5-dev
Anthony Gutierrez has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/29925 )


Change subject: dev: add support for HSA's barrier bit kernel  
synchronization

..

dev: add support for HSA's barrier bit kernel synchronization

This commit adds support for the HSA's barrier bit version of
synchronization.  This method of synchronization is used for all
HIP benchmarks, and thus is necessary to ensure that multiple
kernels from the same queue are synchronizing properly.

Change-Id: I64f2d311a3970b71194e0555e2b932800df65e98
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29925
Reviewed-by: Anthony Gutierrez 
Reviewed-by: Matt Sinclair 
Maintainer: Anthony Gutierrez 
Tested-by: kokoro 
---
M src/dev/hsa/hsa_packet_processor.cc
M src/dev/hsa/hsa_packet_processor.hh
2 files changed, 39 insertions(+), 3 deletions(-)

Approvals:
  Anthony Gutierrez: Looks good to me, approved; Looks good to me, approved
  Matt Sinclair: Looks good to me, approved
  kokoro: Regressions pass



diff --git a/src/dev/hsa/hsa_packet_processor.cc  
b/src/dev/hsa/hsa_packet_processor.cc

index f9880e4..4143019 100644
--- a/src/dev/hsa/hsa_packet_processor.cc
+++ b/src/dev/hsa/hsa_packet_processor.cc
@@ -60,6 +60,11 @@
 #define PKT_TYPE(PKT) ((hsa_packet_type_t)(((PKT->header) >> \
 HSA_PACKET_HEADER_TYPE) & (HSA_PACKET_HEADER_WIDTH_TYPE - 1)))

+// checks if the barrier bit is set in the header -- shift the barrier bit
+// to LSB, then bitwise "and" to mask off all other bits
+#define IS_BARRIER(PKT) ((hsa_packet_header_t)(((PKT->header) >> \
+HSA_PACKET_HEADER_BARRIER) & HSA_PACKET_HEADER_WIDTH_BARRIER))
+
 HSAPP_EVENT_DESCRIPTION_GENERATOR(UpdateReadDispIdDmaEvent)
 HSAPP_EVENT_DESCRIPTION_GENERATOR(CmdQueueCmdDmaEvent)
 HSAPP_EVENT_DESCRIPTION_GENERATOR(QueueProcessEvent)
@@ -280,7 +285,7 @@
 HSAPacketProcessor::schedAQLProcessing(uint32_t rl_idx)
 {
 RQLEntry *queue = regdQList[rl_idx];
-if (!queue->aqlProcessEvent.scheduled()) {
+if (!queue->aqlProcessEvent.scheduled() && !queue->getBarrierBit()) {
 Tick processingTick = curTick() + pktProcessDelay;
 schedule(queue->aqlProcessEvent, processingTick);
 DPRINTF(HSAPacketProcessor, "AQL processing scheduled at  
tick: %d\n",

@@ -316,6 +321,16 @@
 // Submit packet to HSA device (dispatcher)
 hsa_device->submitDispatchPkt((void *)disp_pkt, rl_idx,  
host_pkt_addr);

 is_submitted = true;
+/*
+  If this packet is using the "barrier bit" to enforce ordering  
with

+  subsequent kernels, set the bit for this queue now, after
+  dispatching.
+*/
+if (IS_BARRIER(disp_pkt)) {
+DPRINTF(HSAPacketProcessor, "%s: setting barrier bit for  
active" \

+" list ID = %d\n", __FUNCTION__, rl_idx);
+regdQList[rl_idx]->setBarrierBit(true);
+}
 } else if (pkt_type == HSA_PACKET_TYPE_BARRIER_AND) {
 DPRINTF(HSAPacketProcessor, "%s: Processing barrier packet" \
 " active list ID = %d\n", __FUNCTION__, rl_idx);
@@ -631,6 +646,23 @@
 HSAPacketProcessor::finishPkt(void *pvPkt, uint32_t rl_idx)
 {
 HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
+
+// if barrier bit was set, unset it here -- we assume that finishPkt is
+// only called after the completion of a kernel
+if (regdQList[rl_idx]->getBarrierBit()) {
+DPRINTF(HSAPacketProcessor,
+"Unset barrier bit for active list ID %d\n", rl_idx);
+regdQList[rl_idx]->setBarrierBit(false);
+// if pending kernels in the queue after this kernel, reschedule
+if (regdQList[rl_idx]->dispPending()) {
+DPRINTF(HSAPacketProcessor,
+"Rescheduling active list ID %d after unsetting  
barrier "

+"bit\n", rl_idx);
+schedAQLProcessing(rl_idx);
+}
+}
+
+// If set, then blocked schedule, so need to reschedule
 if (regdQList[rl_idx]->qCntxt.aqlBuf->freeEntry(pvPkt))
 updateReadIndex(0, rl_idx);
 DPRINTF(HSAPacketProcessor,
diff --git a/src/dev/hsa/hsa_packet_processor.hh  
b/src/dev/hsa/hsa_packet_processor.hh

index 206d9ab..3ff7ad2 100644
--- a/src/dev/hsa/hsa_packet_processor.hh
+++ b/src/dev/hsa/hsa_packet_processor.hh
@@ -168,11 +168,13 @@
 typedef struct QueueContext {
 HSAQueueDescriptor* qDesc;
 AQLRingBuffer* aqlBuf;
+// used for HSA packets that enforce synchronization with barrier bit
+bool barrierBit;
 QueueContext(HSAQueueDescriptor* q_desc,
  AQLRingBuffer* aql_buf)
- : qDesc(q_desc), aqlBuf(aql_buf)
+ : qDesc(q_desc), aqlBuf(aql_buf), barrierBit(false)
 {}
-QueueContext() : qDesc(NULL), aqlBuf(NULL) {}
+QueueContext() : qDesc(NULL), aqlBuf(NULL), barrierBit(false) {}
 } QCntxt;

 class 

[gem5-dev] Change in gem5/gem5[develop]: dev: add support for HSA's barrier bit kernel synchronization

2020-06-03 Thread Anthony Gutierrez (Gerrit) via gem5-dev

Hello Tony Gutierrez,

I'd like you to do a code review. Please visit

https://gem5-review.googlesource.com/c/public/gem5/+/29925

to review the following change.


Change subject: dev: add support for HSA's barrier bit kernel  
synchronization

..

dev: add support for HSA's barrier bit kernel synchronization

This commit adds support for the HSA's barrier bit version of
synchronization.  This method of synchronization is used for all
HIP benchmarks, and thus is necessary to ensure that multiple
kernels from the same queue are synchronizing properly.

Change-Id: I64f2d311a3970b71194e0555e2b932800df65e98
---
M src/dev/hsa/hsa_packet_processor.cc
M src/dev/hsa/hsa_packet_processor.hh
2 files changed, 39 insertions(+), 3 deletions(-)



diff --git a/src/dev/hsa/hsa_packet_processor.cc  
b/src/dev/hsa/hsa_packet_processor.cc

index bd05016..3da87b1 100644
--- a/src/dev/hsa/hsa_packet_processor.cc
+++ b/src/dev/hsa/hsa_packet_processor.cc
@@ -60,6 +60,11 @@
 #define PKT_TYPE(PKT) ((hsa_packet_type_t)(((PKT->header) >> \
 HSA_PACKET_HEADER_TYPE) & (HSA_PACKET_HEADER_WIDTH_TYPE - 1)))

+// checks if the barrier bit is set in the header -- shift the barrier bit
+// to LSB, then bitwise "and" to mask off all other bits
+#define IS_BARRIER(PKT) ((hsa_packet_header_t)(((PKT->header) >> \
+HSA_PACKET_HEADER_BARRIER) & HSA_PACKET_HEADER_WIDTH_BARRIER))
+
 HSAPP_EVENT_DESCRIPTION_GENERATOR(UpdateReadDispIdDmaEvent)
 HSAPP_EVENT_DESCRIPTION_GENERATOR(CmdQueueCmdDmaEvent)
 HSAPP_EVENT_DESCRIPTION_GENERATOR(QueueProcessEvent)
@@ -280,7 +285,7 @@
 HSAPacketProcessor::schedAQLProcessing(uint32_t rl_idx)
 {
 RQLEntry *queue = regdQList[rl_idx];
-if (!queue->aqlProcessEvent.scheduled()) {
+if (!queue->aqlProcessEvent.scheduled() && !queue->getBarrierBit()) {
 Tick processingTick = curTick() + pktProcessDelay;
 schedule(queue->aqlProcessEvent, processingTick);
 DPRINTF(HSAPacketProcessor, "AQL processing scheduled at  
tick: %d\n",

@@ -316,6 +321,16 @@
 // Submit packet to HSA device (dispatcher)
 hsa_device->submitDispatchPkt((void *)disp_pkt, rl_idx,  
host_pkt_addr);

 is_submitted = true;
+/*
+  If this packet is using the "barrier bit" to enforce ordering  
with

+  subsequent kernels, set the bit for this queue now, after
+  dispatching.
+*/
+if (IS_BARRIER(disp_pkt)) {
+DPRINTF(HSAPacketProcessor, "%s: setting barrier bit for  
active" \

+" list ID = %d\n", __FUNCTION__, rl_idx);
+regdQList[rl_idx]->setBarrierBit(true);
+}
 } else if (pkt_type == HSA_PACKET_TYPE_BARRIER_AND) {
 DPRINTF(HSAPacketProcessor, "%s: Processing barrier packet" \
 " active list ID = %d\n", __FUNCTION__, rl_idx);
@@ -631,6 +646,23 @@
 HSAPacketProcessor::finishPkt(void *pvPkt, uint32_t rl_idx)
 {
 HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
+
+// if barrier bit was set, unset it here -- we assume that finishPkt is
+// only called after the completion of a kernel
+if (regdQList[rl_idx]->getBarrierBit()) {
+DPRINTF(HSAPacketProcessor,
+"Unset barrier bit for active list ID %d\n", rl_idx);
+regdQList[rl_idx]->setBarrierBit(false);
+// if pending kernels in the queue after this kernel, reschedule
+if (regdQList[rl_idx]->dispPending()) {
+DPRINTF(HSAPacketProcessor,
+"Rescheduling active list ID %d after unsetting  
barrier "

+"bit\n", rl_idx);
+schedAQLProcessing(rl_idx);
+}
+}
+
+// If set, then blocked schedule, so need to reschedule
 if (regdQList[rl_idx]->qCntxt.aqlBuf->freeEntry(pvPkt))
 updateReadIndex(0, rl_idx);
 DPRINTF(HSAPacketProcessor,
diff --git a/src/dev/hsa/hsa_packet_processor.hh  
b/src/dev/hsa/hsa_packet_processor.hh

index 206d9ab..3ff7ad2 100644
--- a/src/dev/hsa/hsa_packet_processor.hh
+++ b/src/dev/hsa/hsa_packet_processor.hh
@@ -168,11 +168,13 @@
 typedef struct QueueContext {
 HSAQueueDescriptor* qDesc;
 AQLRingBuffer* aqlBuf;
+// used for HSA packets that enforce synchronization with barrier bit
+bool barrierBit;
 QueueContext(HSAQueueDescriptor* q_desc,
  AQLRingBuffer* aql_buf)
- : qDesc(q_desc), aqlBuf(aql_buf)
+ : qDesc(q_desc), aqlBuf(aql_buf), barrierBit(false)
 {}
-QueueContext() : qDesc(NULL), aqlBuf(NULL) {}
+QueueContext() : qDesc(NULL), aqlBuf(NULL), barrierBit(false) {}
 } QCntxt;

 class HSAPacketProcessor: public DmaDevice
@@ -233,6 +235,8 @@
 bool dispPending() { return qCntxt.aqlBuf->dispPending() > 0; }
 SignalState depSignalRdState;
 QueueProcessEvent aqlProcessEvent;
+void setBarrierBit(bool set_val) { qCntxt.barrierBit = set_val; }
+