-----Original Message----- From: Yang, Rong R Sent: Friday, April 17, 2015 5:12 PM To: Guo, Yejun; beignet@lists.freedesktop.org Cc: Guo, Yejun Subject: RE: [Beignet] [PATCH V2 1/2] add simd level function __gen_ocl_get_simd_id
> -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Guo Yejun > Sent: Friday, April 17, 2015 14:47 > To: beignet@lists.freedesktop.org > Cc: Guo, Yejun > Subject: [Beignet] [PATCH V2 1/2] add simd level function > __gen_ocl_get_simd_id > > uint __gen_ocl_get_simd_id(); > return value ranges from 0 to simdsize - 1 > > V2: use function sel.selReg to refine code > Signed-off-by: Guo Yejun <yejun....@intel.com> > --- > backend/src/backend/gen_context.cpp | 9 ++++++++- > backend/src/backend/gen_insn_selection.cpp | 6 ++++++ > backend/src/backend/program.h | 1 + > backend/src/ir/instruction.cpp | 1 + > backend/src/ir/instruction.hpp | 2 ++ > backend/src/ir/instruction.hxx | 1 + > backend/src/ir/liveness.cpp | 5 +++++ > backend/src/ir/profile.cpp | 2 ++ > backend/src/ir/profile.hpp | 5 +++-- > backend/src/libocl/tmpl/ocl_simd.tmpl.h | 1 + > backend/src/llvm/llvm_gen_backend.cpp | 7 +++++++ > backend/src/llvm/llvm_gen_ocl_function.hxx | 1 + > src/cl_command_queue_gen7.c | 8 ++++++++ > 13 files changed, 46 insertions(+), 3 deletions(-) > > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index 684ecaf..62fd596 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -2013,9 +2013,14 @@ namespace gbe > if (curbeRegs.find(reg) != curbeRegs.end()) continue; \ > allocCurbeReg(reg, GBE_CURBE_##PATCH); \ > } else > - > + > + bool needLaneID = false; > fn.foreachInstruction([&](ir::Instruction &insn) { > const uint32_t srcNum = insn.getSrcNum(); > + if (insn.getOpcode() == ir::OP_SIMD_ID) { > + GBE_ASSERT(srcNum == 0); > + needLaneID = true; > + } > for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { > const ir::Register reg = insn.getSrc(srcID); > if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { @@ -2054,6 +2059,8 > @@ namespace gbe > }); > #undef INSERT_REG > > + if (needLaneID) > + allocCurbeReg(laneid, GBE_CURBE_LANE_ID); > Seems need add curbeRegs.find(laneid) check here. If has allocate curbe before, need not allocate again. [yejun] in my understanding, here is the only place to allocate laneid, so don't need to check first. > // After this point the vector is immutable. Sorting it will make > // research faster > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 026a858..19a3c24 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -2137,6 +2137,12 @@ namespace gbe > sel.MOV(dst, src); > } > break; > + case ir::OP_SIMD_ID: > + { > + const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, > ir::TYPE_U32); > + sel.MOV(dst, selLaneID); > + } > + break; > default: NOT_SUPPORTED; > } > sel.pop(); > diff --git a/backend/src/backend/program.h > b/backend/src/backend/program.h index 554fb16..8c171f5 100644 > --- a/backend/src/backend/program.h > +++ b/backend/src/backend/program.h > @@ -101,6 +101,7 @@ enum gbe_curbe_type { > GBE_CURBE_THREAD_NUM, > GBE_CURBE_ZERO, > GBE_CURBE_ONE, > + GBE_CURBE_LANE_ID, > GBE_CURBE_SLM_OFFSET, > }; > > diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp > index 86148bc..7723b90 100644 > --- a/backend/src/ir/instruction.cpp > +++ b/backend/src/ir/instruction.cpp > @@ -1614,6 +1614,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, > getImageIndex(void), getImageIndex > } > > DECL_EMIT_FUNCTION(SIMD_SIZE) > + DECL_EMIT_FUNCTION(SIMD_ID) > > #undef DECL_EMIT_FUNCTION > > diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp > index c603d9e..436bfd2 100644 > --- a/backend/src/ir/instruction.hpp > +++ b/backend/src/ir/instruction.hpp > @@ -572,6 +572,8 @@ namespace ir { > Instruction ALU0(Opcode opcode, Type type, Register dst); > /*! simd_size.type dst */ > Instruction SIMD_SIZE(Type type, Register dst); > + /*! simd_id.type dst */ > + Instruction SIMD_ID(Type type, Register dst); > /*! alu1.type dst src */ > Instruction ALU1(Opcode opcode, Type type, Register dst, Register src); > /*! mov.type dst src */ > diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx > index f86cfbb..3f08a92 100644 > --- a/backend/src/ir/instruction.hxx > +++ b/backend/src/ir/instruction.hxx > @@ -26,6 +26,7 @@ > * \author Benjamin Segovia <benjamin.sego...@intel.com> > */ > DECL_INSN(SIMD_SIZE, NullaryInstruction) > +DECL_INSN(SIMD_ID, NullaryInstruction) > DECL_INSN(MOV, UnaryInstruction) > DECL_INSN(COS, UnaryInstruction) > DECL_INSN(SIN, UnaryInstruction) > diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp index > 2b1ffdb..26c4129 100644 > --- a/backend/src/ir/liveness.cpp > +++ b/backend/src/ir/liveness.cpp > @@ -66,6 +66,11 @@ namespace ir { > const uint32_t srcNum = insn.getSrcNum(); > const uint32_t dstNum = insn.getDstNum(); > bool uniform = true; > + > + //have no way to decide the dst uniform if there is no source > + if (srcNum == 0) > + uniform = false; > + > for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { > const Register reg = insn.getSrc(srcID); > if (!fn.isUniformRegister(reg)) diff --git > a/backend/src/ir/profile.cpp > b/backend/src/ir/profile.cpp index ec7ab94..2f6539a 100644 > --- a/backend/src/ir/profile.cpp > +++ b/backend/src/ir/profile.cpp > @@ -44,6 +44,7 @@ namespace ir { > "retVal", "slm_offset", > "printf_buffer_pointer", "printf_index_buffer_pointer", > "dwblockip", > + "lane_id", > "invalid" > }; > > @@ -88,6 +89,7 @@ namespace ir { > DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1); > DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1); > DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0); > + DECL_NEW_REG(FAMILY_DWORD, laneid, 0); > DECL_NEW_REG(FAMILY_DWORD, invalid, 1); > } > #undef DECL_NEW_REG > diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index > 8f69320..4de6fe0 100644 > --- a/backend/src/ir/profile.hpp > +++ b/backend/src/ir/profile.hpp > @@ -72,8 +72,9 @@ namespace ir { > static const Register printfbptr = Register(28); // printf buffer > address . > static const Register printfiptr = Register(29); // printf index buffer > address. > static const Register dwblockip = Register(30); // blockip > - static const Register invalid = Register(31); // used for valid > comparation. > - static const uint32_t regNum = 32; // number of special > registers > + static const Register laneid = Register(31); // lane id. > + static const Register invalid = Register(32); // used for valid > comparation. > + static const uint32_t regNum = 33; // number of special > registers > extern const char *specialRegMean[]; // special register name. > } /* namespace ocl */ > > diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h > b/backend/src/libocl/tmpl/ocl_simd.tmpl.h > index b992902..620e329 100644 > --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h > +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h > @@ -25,3 +25,4 @@ > ///////////////////////////////////////////////////////////////////////////// > > uint __gen_ocl_get_simd_size(void); > +uint __gen_ocl_get_simd_id(void); > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index ac67add..f46bc79 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -2806,6 +2806,7 @@ namespace gbe > case GEN_OCL_SIMD_SIZE: > case GEN_OCL_READ_TM: > case GEN_OCL_REGION: > + case GEN_OCL_SIMD_ID: > this->newRegister(&I); > break; > case GEN_OCL_PRINTF: > @@ -3461,6 +3462,12 @@ namespace gbe > ctx.ALU0(ir::OP_SIMD_SIZE, getType(ctx, I.getType()), dst); > break; > } > + case GEN_OCL_SIMD_ID: > + { > + const ir::Register dst = this->getRegister(&I); > + ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst); > + break; > + } > default: break; > } > } > diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx > b/backend/src/llvm/llvm_gen_ocl_function.hxx > index 2b151f2..e2bffde 100644 > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx > @@ -155,6 +155,7 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, > __gen_ocl_f32to16) DECL_LLVM_GEN_FUNCTION(SIMD_ANY, > __gen_ocl_simd_any) DECL_LLVM_GEN_FUNCTION(SIMD_ALL, > __gen_ocl_simd_all) DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, > __gen_ocl_get_simd_size) > +DECL_LLVM_GEN_FUNCTION(SIMD_ID, __gen_ocl_get_simd_id) > > DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm) > DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region) diff --git > a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index > 4adbd2b..e27a211 100644 > --- a/src/cl_command_queue_gen7.c > +++ b/src/cl_command_queue_gen7.c > @@ -210,6 +210,14 @@ cl_curbe_fill(cl_kernel ker, > UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD > > + /* __gen_ocl_get_simd_id needs it */ > + if ((offset = interp_kernel_get_curbe_offset(ker->opaque, > GBE_CURBE_LANE_ID, 0)) >= 0) { > + const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque); > + uint32_t *laneid = (uint32_t *) (ker->curbe + offset); > + int32_t i; > + for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i; } > + > /* Write identity for the stack pointer. This is required by the stack > pointer > * computation in the kernel > */ > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet