Because the mov's dst is tmp register, not the finally dst, so need not set p->curr.quarterControl here.
The patch LGTM, thanks. > -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Yang, Rong R > Sent: Monday, August 10, 2015 13:47 > To: Guo, Yejun; [email protected] > Subject: Re: [Beignet] [PATCH] generate sub_group_id inside kernel instead > of payload > > Ok, pop and push don't need here. > But you should set the correct p->curr.quarterControl when doing the half of > 16 lanes, otherwise the flag mask is not correct. > > > -----Original Message----- > > From: Guo, Yejun > > Sent: Monday, August 10, 2015 12:06 > > To: Yang, Rong R; [email protected] > > Subject: RE: [Beignet] [PATCH] generate sub_group_id inside kernel > > instead of payload > > > > The sel.push()/pop() is already at the beginning/end of the function, > > shall I add it again? > > > > Btw, I plan to encapsulate into a function so that others can get the > > lane id easily. > > > > -----Original Message----- > > From: Yang, Rong R > > Sent: Monday, August 10, 2015 11:37 AM > > To: Guo, Yejun; [email protected] > > Cc: Guo, Yejun > > Subject: RE: [Beignet] [PATCH] generate sub_group_id inside kernel > > instead of payload > > > > One comment. > > > > > -----Original Message----- > > > From: Beignet [mailto:[email protected]] On > > > Behalf Of Guo Yejun > > > Sent: Friday, August 7, 2015 08:51 > > > To: [email protected] > > > Cc: Guo, Yejun > > > Subject: [Beignet] [PATCH] generate sub_group_id inside kernel > > > instead of payload > > > > > > get_sub_group_id ranges at [0, 7] for SIMD8 and [0, 15] for SIMD16, > > > previously we set up the values in kernel payload, now change it to > > > generate the values inside kernel with packed integer vector. > > > > > > Signed-off-by: Guo Yejun <[email protected]> > > > --- > > > backend/src/backend/gen_context.cpp | 8 -------- > > > backend/src/backend/gen_insn_selection.cpp | 18 > ++++++++++++++++-- > > > backend/src/backend/program.h | 1 - > > > backend/src/ir/profile.cpp | 1 - > > > backend/src/ir/profile.hpp | 7 +++---- > > > src/cl_command_queue_gen7.c | 8 -------- > > > 6 files changed, 19 insertions(+), 24 deletions(-) > > > > > > diff --git a/backend/src/backend/gen_context.cpp > > > b/backend/src/backend/gen_context.cpp > > > index e16b0a9..29b58df 100644 > > > --- a/backend/src/backend/gen_context.cpp > > > +++ b/backend/src/backend/gen_context.cpp > > > @@ -2217,13 +2217,8 @@ namespace gbe > > > allocCurbeReg(reg, GBE_CURBE_##PATCH); \ > > > } else > > > > > > - bool needLaneID = false; > > > fn.foreachInstruction([&](ir::Instruction &insn) { > > > const uint32_t srcNum = insn.getSrcNum(); > > > - if (insn.getOpcode() == ir::OP_SIMD_ID) { > > > - GBE_ASSERT(srcNum == 0); > > > - needLaneID = true; > > > - } > > > for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { > > > const ir::Register reg = insn.getSrc(srcID); > > > if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { @@ -2262,9 > > > +2257,6 @@ namespace gbe > > > }); > > > #undef INSERT_REG > > > > > > - if (needLaneID) > > > - allocCurbeReg(laneid, GBE_CURBE_LANE_ID); > > > - > > > // After this point the vector is immutable. Sorting it will make > > > // research faster > > > std::sort(kernel->patches.begin(), kernel->patches.end()); diff > > > --git a/backend/src/backend/gen_insn_selection.cpp > > > b/backend/src/backend/gen_insn_selection.cpp > > > index b0ba9e3..9772ad1 100644 > > > --- a/backend/src/backend/gen_insn_selection.cpp > > > +++ b/backend/src/backend/gen_insn_selection.cpp > > > @@ -2299,8 +2299,22 @@ namespace gbe > > > break; > > > case ir::OP_SIMD_ID: > > > { > > > - const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, > > ir::TYPE_U32); > > > - sel.MOV(dst, selLaneID); > > > + const GenRegister landID = GenRegister::immv(0x76543210); > > > + ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_WORD); > > > + const GenRegister dst_ = sel.selReg(r, ir::TYPE_U16); > > > + > > > + uint32_t execWidth = sel.curr.execWidth; > > > + sel.curr.execWidth = 8; > > > + sel.MOV(dst_, landID); > > > + > > > + if (execWidth == 16) { > > > + //Packed Unsigned Half-Byte Integer Vector does not work > > > + //have to mock by adding 8 to the singed vector > > > + const GenRegister eight = GenRegister::immuw(8); > > > + sel.ADD(GenRegister::offset(dst_, 0, 16), dst_, eight); > > > + sel.curr.execWidth = 16; > > > + } > > > > Because you have changed the state, I think it is better to use > > sel.push(), sel.pop(). > > > > > + sel.MOV(dst, dst_); > > > } > > > break; > > > default: NOT_SUPPORTED; > > > diff --git a/backend/src/backend/program.h > > > b/backend/src/backend/program.h index 3637ebb..56db1a1 100644 > > > --- a/backend/src/backend/program.h > > > +++ b/backend/src/backend/program.h > > > @@ -101,7 +101,6 @@ enum gbe_curbe_type { > > > GBE_CURBE_THREAD_NUM, > > > GBE_CURBE_ZERO, > > > GBE_CURBE_ONE, > > > - GBE_CURBE_LANE_ID, > > > GBE_CURBE_SLM_OFFSET, > > > GBE_CURBE_BTI_UTIL, > > > }; > > > diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp > > > index af9f698..37f2d3d 100644 > > > --- a/backend/src/ir/profile.cpp > > > +++ b/backend/src/ir/profile.cpp > > > @@ -90,7 +90,6 @@ namespace ir { > > > DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1); > > > DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1); > > > DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0); > > > - DECL_NEW_REG(FAMILY_DWORD, laneid, 0); > > > DECL_NEW_REG(FAMILY_DWORD, invalid, 1); > > > DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1); > > > } > > > diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp > > > index 9323824..bf909be 100644 > > > --- a/backend/src/ir/profile.hpp > > > +++ b/backend/src/ir/profile.hpp > > > @@ -72,10 +72,9 @@ namespace ir { > > > static const Register printfbptr = Register(28); // printf buffer > > > address . > > > static const Register printfiptr = Register(29); // printf > > > index buffer > > address. > > > static const Register dwblockip = Register(30); // blockip > > > - static const Register laneid = Register(31); // lane id. > > > - static const Register invalid = Register(32); // used for valid > comparation. > > > - static const Register btiUtil = Register(33); // used for mixed > > > pointer as > > bti > > > utility. > > > - static const uint32_t regNum = 34; // number of special > > > registers > > > + static const Register invalid = Register(31); // used for valid > comparation. > > > + static const Register btiUtil = Register(32); // used for > > > + mixed pointer as bti > > > utility. > > > + static const uint32_t regNum = 33; // number of special > > > registers > > > extern const char *specialRegMean[]; // special register > > > name. > > > } /* namespace ocl */ > > > > > > diff --git a/src/cl_command_queue_gen7.c > > b/src/cl_command_queue_gen7.c > > > index 89f39b3..4adbd2b 100644 > > > --- a/src/cl_command_queue_gen7.c > > > +++ b/src/cl_command_queue_gen7.c > > > @@ -210,14 +210,6 @@ cl_curbe_fill(cl_kernel ker, > > > UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD > > > > > > - /* get_sub_group_id needs it */ > > > - if ((offset = interp_kernel_get_curbe_offset(ker->opaque, > > > GBE_CURBE_LANE_ID, 0)) >= 0) { > > > - const uint32_t simd_sz = interp_kernel_get_simd_width(ker- > >opaque); > > > - uint32_t *laneid = (uint32_t *) (ker->curbe + offset); > > > - int32_t i; > > > - for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i; > > > - } > > > - > > > /* Write identity for the stack pointer. This is required by the > > > stack > > pointer > > > * computation in the kernel > > > */ > > > -- > > > 1.9.1 > > > > > > _______________________________________________ > > > Beignet mailing list > > > [email protected] > > > http://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
