[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3, gpu-compute: Implement out-of-range accesses
Anthony Gutierrez has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/29935 ) Change subject: arch-gcn3, gpu-compute: Implement out-of-range accesses .. arch-gcn3, gpu-compute: Implement out-of-range accesses Certain buffer out-of-range memory accesses should be special cased and not generate memory accesses. This patch implements those special cases and supresses lanes from accessing memory when the calculated address falls in an ISA-specified out-of-range condition. Change-Id: I8298f861c6b59587789853a01e503ba7d98cb13d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29935 Tested-by: kokoro Reviewed-by: Anthony Gutierrez Reviewed-by: Matt Sinclair Maintainer: Anthony Gutierrez --- M src/arch/gcn3/insts/instructions.cc M src/arch/gcn3/insts/op_encodings.hh M src/gpu-compute/global_memory_pipeline.cc 3 files changed, 96 insertions(+), 6 deletions(-) Approvals: Anthony Gutierrez: Looks good to me, approved; Looks good to me, approved Matt Sinclair: Looks good to me, but someone else must approve kokoro: Regressions pass diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index b923eae..2e39bf5 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -34453,8 +34453,12 @@ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { -vdst[lane] = (VecElemU32)((reinterpret_cast( -gpuDynInst->d_data))[lane]); +if (!oobMask[lane]) { +vdst[lane] = (VecElemU32)((reinterpret_cast( +gpuDynInst->d_data))[lane]); +} else { +vdst[lane] = 0; +} } } @@ -34580,8 +34584,12 @@ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { -vdst[lane] = (VecElemU32)((reinterpret_cast( -gpuDynInst->d_data))[lane]); +if (!oobMask[lane]) { +vdst[lane] = (VecElemU32)((reinterpret_cast( +gpuDynInst->d_data))[lane]); +} else { +vdst[lane] = 0; +} } } @@ -34707,8 +34715,12 @@ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { -vdst[lane] = (reinterpret_cast( -gpuDynInst->d_data))[lane]; +if (!oobMask[lane]) { +vdst[lane] = (reinterpret_cast( +gpuDynInst->d_data))[lane]; +} else { +vdst[lane] = 0; +} } } diff --git a/src/arch/gcn3/insts/op_encodings.hh b/src/arch/gcn3/insts/op_encodings.hh index 308560a..22c146a 100644 --- a/src/arch/gcn3/insts/op_encodings.hh +++ b/src/arch/gcn3/insts/op_encodings.hh @@ -40,6 +40,7 @@ #include "arch/gcn3/gpu_mem_helpers.hh" #include "arch/gcn3/insts/gpu_static_inst.hh" #include "arch/gcn3/operand.hh" +#include "debug/GCN3.hh" #include "debug/GPUExec.hh" #include "mem/ruby/system/RubySystem.hh" @@ -489,14 +490,26 @@ void initMemRead(GPUDynInstPtr gpuDynInst) { +// temporarily modify exec_mask to supress memory accesses to oob +// regions. Only issue memory requests for lanes that have their +// exec_mask set and are not out of bounds. +VectorMask old_exec_mask = gpuDynInst->exec_mask; +gpuDynInst->exec_mask &= ~oobMask; initMemReqHelper(gpuDynInst, MemCmd::ReadReq); +gpuDynInst->exec_mask = old_exec_mask; } template void initMemWrite(GPUDynInstPtr gpuDynInst) { +// temporarily modify exec_mask to supress memory accesses to oob +// regions. Only issue memory requests for lanes that have their +// exec_mask set and are not out of bounds. +VectorMask old_exec_mask = gpuDynInst->exec_mask; +gpuDynInst->exec_mask &= ~oobMask; initMemReqHelper(gpuDynInst, MemCmd::WriteReq); +gpuDynInst->exec_mask = old_exec_mask; } void @@ -566,6 +579,42 @@ buf_off = v_off[lane] + inst_offset; + +/** + * Range check behavior causes out of range accesses to + * to be treated differently. Out of range accesses return + * 0 for loads and are ignored for stores. For + * non-formatted accesses, this is done on a per-lane + * basis. + */ +if (rsrc_desc.stride == 0 ||
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3, gpu-compute: Implement out-of-range accesses
Hello Michael LeBeane, Tony Gutierrez, I'd like you to do a code review. Please visit https://gem5-review.googlesource.com/c/public/gem5/+/29935 to review the following change. Change subject: arch-gcn3, gpu-compute: Implement out-of-range accesses .. arch-gcn3, gpu-compute: Implement out-of-range accesses Certain buffer out-of-range memory accesses should be special cased and not generate memory accesses. This patch implements those special cases and supresses lanes from accessing memory when the calculated address falls in an ISA-specified out-of-range condition. Change-Id: I8298f861c6b59587789853a01e503ba7d98cb13d --- M src/arch/gcn3/insts/instructions.cc M src/arch/gcn3/insts/op_encodings.hh M src/gpu-compute/global_memory_pipeline.cc 3 files changed, 96 insertions(+), 6 deletions(-) diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index b923eae..2e39bf5 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -34453,8 +34453,12 @@ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { -vdst[lane] = (VecElemU32)((reinterpret_cast( -gpuDynInst->d_data))[lane]); +if (!oobMask[lane]) { +vdst[lane] = (VecElemU32)((reinterpret_cast( +gpuDynInst->d_data))[lane]); +} else { +vdst[lane] = 0; +} } } @@ -34580,8 +34584,12 @@ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { -vdst[lane] = (VecElemU32)((reinterpret_cast( -gpuDynInst->d_data))[lane]); +if (!oobMask[lane]) { +vdst[lane] = (VecElemU32)((reinterpret_cast( +gpuDynInst->d_data))[lane]); +} else { +vdst[lane] = 0; +} } } @@ -34707,8 +34715,12 @@ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { -vdst[lane] = (reinterpret_cast( -gpuDynInst->d_data))[lane]; +if (!oobMask[lane]) { +vdst[lane] = (reinterpret_cast( +gpuDynInst->d_data))[lane]; +} else { +vdst[lane] = 0; +} } } diff --git a/src/arch/gcn3/insts/op_encodings.hh b/src/arch/gcn3/insts/op_encodings.hh index 308560a..22c146a 100644 --- a/src/arch/gcn3/insts/op_encodings.hh +++ b/src/arch/gcn3/insts/op_encodings.hh @@ -40,6 +40,7 @@ #include "arch/gcn3/gpu_mem_helpers.hh" #include "arch/gcn3/insts/gpu_static_inst.hh" #include "arch/gcn3/operand.hh" +#include "debug/GCN3.hh" #include "debug/GPUExec.hh" #include "mem/ruby/system/RubySystem.hh" @@ -489,14 +490,26 @@ void initMemRead(GPUDynInstPtr gpuDynInst) { +// temporarily modify exec_mask to supress memory accesses to oob +// regions. Only issue memory requests for lanes that have their +// exec_mask set and are not out of bounds. +VectorMask old_exec_mask = gpuDynInst->exec_mask; +gpuDynInst->exec_mask &= ~oobMask; initMemReqHelper(gpuDynInst, MemCmd::ReadReq); +gpuDynInst->exec_mask = old_exec_mask; } template void initMemWrite(GPUDynInstPtr gpuDynInst) { +// temporarily modify exec_mask to supress memory accesses to oob +// regions. Only issue memory requests for lanes that have their +// exec_mask set and are not out of bounds. +VectorMask old_exec_mask = gpuDynInst->exec_mask; +gpuDynInst->exec_mask &= ~oobMask; initMemReqHelper(gpuDynInst, MemCmd::WriteReq); +gpuDynInst->exec_mask = old_exec_mask; } void @@ -566,6 +579,42 @@ buf_off = v_off[lane] + inst_offset; + +/** + * Range check behavior causes out of range accesses to + * to be treated differently. Out of range accesses return + * 0 for loads and are ignored for stores. For + * non-formatted accesses, this is done on a per-lane + * basis. + */ +if (rsrc_desc.stride == 0 || !rsrc_desc.swizzleEn) { +if (buf_off + stride * buf_idx >= +rsrc_desc.numRecords - s_offset.rawData()) { +DPRINTF(GCN3, "mubuf out-of-bounds condition 1: " +"lane = %d, buffer_offset =