[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3, gpu-compute: Implement out-of-range accesses

2020-07-13 Thread Anthony Gutierrez (Gerrit) via gem5-dev
Anthony Gutierrez has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/29935 )


Change subject: arch-gcn3, gpu-compute: Implement out-of-range accesses
..

arch-gcn3, gpu-compute: Implement out-of-range accesses

Certain buffer out-of-range memory accesses should be special
cased and not generate memory accesses. This patch implements
those special cases and supresses lanes from accessing memory
when the calculated address falls in an ISA-specified out-of-range
condition.

Change-Id: I8298f861c6b59587789853a01e503ba7d98cb13d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29935
Tested-by: kokoro 
Reviewed-by: Anthony Gutierrez 
Reviewed-by: Matt Sinclair 
Maintainer: Anthony Gutierrez 
---
M src/arch/gcn3/insts/instructions.cc
M src/arch/gcn3/insts/op_encodings.hh
M src/gpu-compute/global_memory_pipeline.cc
3 files changed, 96 insertions(+), 6 deletions(-)

Approvals:
  Anthony Gutierrez: Looks good to me, approved; Looks good to me, approved
  Matt Sinclair: Looks good to me, but someone else must approve
  kokoro: Regressions pass



diff --git a/src/arch/gcn3/insts/instructions.cc  
b/src/arch/gcn3/insts/instructions.cc

index b923eae..2e39bf5 100644
--- a/src/arch/gcn3/insts/instructions.cc
+++ b/src/arch/gcn3/insts/instructions.cc
@@ -34453,8 +34453,12 @@

 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
 if (gpuDynInst->exec_mask[lane]) {
-vdst[lane] = (VecElemU32)((reinterpret_cast(
-gpuDynInst->d_data))[lane]);
+if (!oobMask[lane]) {
+vdst[lane] =  
(VecElemU32)((reinterpret_cast(

+gpuDynInst->d_data))[lane]);
+} else {
+vdst[lane] = 0;
+}
 }
 }

@@ -34580,8 +34584,12 @@

 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
 if (gpuDynInst->exec_mask[lane]) {
-vdst[lane] = (VecElemU32)((reinterpret_cast(
-gpuDynInst->d_data))[lane]);
+if (!oobMask[lane]) {
+vdst[lane] =  
(VecElemU32)((reinterpret_cast(

+gpuDynInst->d_data))[lane]);
+} else {
+vdst[lane] = 0;
+}
 }
 }

@@ -34707,8 +34715,12 @@

 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
 if (gpuDynInst->exec_mask[lane]) {
-vdst[lane] = (reinterpret_cast(
-gpuDynInst->d_data))[lane];
+if (!oobMask[lane]) {
+vdst[lane] = (reinterpret_cast(
+gpuDynInst->d_data))[lane];
+} else {
+vdst[lane] = 0;
+}
 }
 }

diff --git a/src/arch/gcn3/insts/op_encodings.hh  
b/src/arch/gcn3/insts/op_encodings.hh

index 308560a..22c146a 100644
--- a/src/arch/gcn3/insts/op_encodings.hh
+++ b/src/arch/gcn3/insts/op_encodings.hh
@@ -40,6 +40,7 @@
 #include "arch/gcn3/gpu_mem_helpers.hh"
 #include "arch/gcn3/insts/gpu_static_inst.hh"
 #include "arch/gcn3/operand.hh"
+#include "debug/GCN3.hh"
 #include "debug/GPUExec.hh"
 #include "mem/ruby/system/RubySystem.hh"

@@ -489,14 +490,26 @@
 void
 initMemRead(GPUDynInstPtr gpuDynInst)
 {
+// temporarily modify exec_mask to supress memory accesses to  
oob
+// regions.  Only issue memory requests for lanes that have  
their

+// exec_mask set and are not out of bounds.
+VectorMask old_exec_mask = gpuDynInst->exec_mask;
+gpuDynInst->exec_mask &= ~oobMask;
 initMemReqHelper(gpuDynInst, MemCmd::ReadReq);
+gpuDynInst->exec_mask = old_exec_mask;
 }

 template
 void
 initMemWrite(GPUDynInstPtr gpuDynInst)
 {
+// temporarily modify exec_mask to supress memory accesses to  
oob
+// regions.  Only issue memory requests for lanes that have  
their

+// exec_mask set and are not out of bounds.
+VectorMask old_exec_mask = gpuDynInst->exec_mask;
+gpuDynInst->exec_mask &= ~oobMask;
 initMemReqHelper(gpuDynInst, MemCmd::WriteReq);
+gpuDynInst->exec_mask = old_exec_mask;
 }

 void
@@ -566,6 +579,42 @@

 buf_off = v_off[lane] + inst_offset;

+
+/**
+ * Range check behavior causes out of range accesses to
+ * to be treated differently. Out of range accesses  
return

+ * 0 for loads and are ignored for stores. For
+ * non-formatted accesses, this is done on a per-lane
+ * basis.
+ */
+if (rsrc_desc.stride == 0 || 

[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3, gpu-compute: Implement out-of-range accesses

2020-06-03 Thread Anthony Gutierrez (Gerrit) via gem5-dev

Hello Michael LeBeane, Tony Gutierrez,

I'd like you to do a code review. Please visit

https://gem5-review.googlesource.com/c/public/gem5/+/29935

to review the following change.


Change subject: arch-gcn3, gpu-compute: Implement out-of-range accesses
..

arch-gcn3, gpu-compute: Implement out-of-range accesses

Certain buffer out-of-range memory accesses should be special
cased and not generate memory accesses. This patch implements
those special cases and supresses lanes from accessing memory
when the calculated address falls in an ISA-specified out-of-range
condition.

Change-Id: I8298f861c6b59587789853a01e503ba7d98cb13d
---
M src/arch/gcn3/insts/instructions.cc
M src/arch/gcn3/insts/op_encodings.hh
M src/gpu-compute/global_memory_pipeline.cc
3 files changed, 96 insertions(+), 6 deletions(-)



diff --git a/src/arch/gcn3/insts/instructions.cc  
b/src/arch/gcn3/insts/instructions.cc

index b923eae..2e39bf5 100644
--- a/src/arch/gcn3/insts/instructions.cc
+++ b/src/arch/gcn3/insts/instructions.cc
@@ -34453,8 +34453,12 @@

 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
 if (gpuDynInst->exec_mask[lane]) {
-vdst[lane] = (VecElemU32)((reinterpret_cast(
-gpuDynInst->d_data))[lane]);
+if (!oobMask[lane]) {
+vdst[lane] =  
(VecElemU32)((reinterpret_cast(

+gpuDynInst->d_data))[lane]);
+} else {
+vdst[lane] = 0;
+}
 }
 }

@@ -34580,8 +34584,12 @@

 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
 if (gpuDynInst->exec_mask[lane]) {
-vdst[lane] = (VecElemU32)((reinterpret_cast(
-gpuDynInst->d_data))[lane]);
+if (!oobMask[lane]) {
+vdst[lane] =  
(VecElemU32)((reinterpret_cast(

+gpuDynInst->d_data))[lane]);
+} else {
+vdst[lane] = 0;
+}
 }
 }

@@ -34707,8 +34715,12 @@

 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
 if (gpuDynInst->exec_mask[lane]) {
-vdst[lane] = (reinterpret_cast(
-gpuDynInst->d_data))[lane];
+if (!oobMask[lane]) {
+vdst[lane] = (reinterpret_cast(
+gpuDynInst->d_data))[lane];
+} else {
+vdst[lane] = 0;
+}
 }
 }

diff --git a/src/arch/gcn3/insts/op_encodings.hh  
b/src/arch/gcn3/insts/op_encodings.hh

index 308560a..22c146a 100644
--- a/src/arch/gcn3/insts/op_encodings.hh
+++ b/src/arch/gcn3/insts/op_encodings.hh
@@ -40,6 +40,7 @@
 #include "arch/gcn3/gpu_mem_helpers.hh"
 #include "arch/gcn3/insts/gpu_static_inst.hh"
 #include "arch/gcn3/operand.hh"
+#include "debug/GCN3.hh"
 #include "debug/GPUExec.hh"
 #include "mem/ruby/system/RubySystem.hh"

@@ -489,14 +490,26 @@
 void
 initMemRead(GPUDynInstPtr gpuDynInst)
 {
+// temporarily modify exec_mask to supress memory accesses to  
oob
+// regions.  Only issue memory requests for lanes that have  
their

+// exec_mask set and are not out of bounds.
+VectorMask old_exec_mask = gpuDynInst->exec_mask;
+gpuDynInst->exec_mask &= ~oobMask;
 initMemReqHelper(gpuDynInst, MemCmd::ReadReq);
+gpuDynInst->exec_mask = old_exec_mask;
 }

 template
 void
 initMemWrite(GPUDynInstPtr gpuDynInst)
 {
+// temporarily modify exec_mask to supress memory accesses to  
oob
+// regions.  Only issue memory requests for lanes that have  
their

+// exec_mask set and are not out of bounds.
+VectorMask old_exec_mask = gpuDynInst->exec_mask;
+gpuDynInst->exec_mask &= ~oobMask;
 initMemReqHelper(gpuDynInst, MemCmd::WriteReq);
+gpuDynInst->exec_mask = old_exec_mask;
 }

 void
@@ -566,6 +579,42 @@

 buf_off = v_off[lane] + inst_offset;

+
+/**
+ * Range check behavior causes out of range accesses to
+ * to be treated differently. Out of range accesses  
return

+ * 0 for loads and are ignored for stores. For
+ * non-formatted accesses, this is done on a per-lane
+ * basis.
+ */
+if (rsrc_desc.stride == 0 || !rsrc_desc.swizzleEn) {
+if (buf_off + stride * buf_idx >=
+rsrc_desc.numRecords - s_offset.rawData()) {
+DPRINTF(GCN3, "mubuf out-of-bounds condition  
1: "

+"lane = %d, buffer_offset =