Pushed, thanks.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Pan, Xiuli > Sent: Wednesday, December 28, 2016 15:55 > To: Guo, Yejun <[email protected]>; [email protected] > Cc: Guo, Yejun <[email protected]> > Subject: Re: [Beignet] [PATCH] enable sends to write SLM for workgroup op > > LGTM. > > -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Guo, Yejun > Sent: Friday, December 23, 2016 5:43 PM > To: [email protected] > Cc: Guo, Yejun <[email protected]> > Subject: [Beignet] [PATCH] enable sends to write SLM for workgroup op > > Signed-off-by: Guo, Yejun <[email protected]> > --- > backend/src/backend/gen8_context.cpp | 12 +++---- > backend/src/backend/gen_context.cpp | 8 ++--- > backend/src/backend/gen_insn_selection.cpp | 50 > +++++++++++++++++++++--------- > backend/src/backend/gen_insn_selection.hpp | 5 ++- > 4 files changed, 49 insertions(+), 26 deletions(-) > > diff --git a/backend/src/backend/gen8_context.cpp > b/backend/src/backend/gen8_context.cpp > index a3045ce..eede52c 100644 > --- a/backend/src/backend/gen8_context.cpp > +++ b/backend/src/backend/gen8_context.cpp > @@ -1738,7 +1738,7 @@ namespace gbe > GenRegister barrierId = > ra->genReg(GenRegister::ud1grf(ir::ocl::barrierid)); > GenRegister localBarrier = ra->genReg(insn.src(5)); > > - uint32_t wg_op = insn.extra.workgroupOp; > + uint32_t wg_op = insn.extra.wgop.workgroupOp; > uint32_t simd = p->curr.execWidth; > int32_t jip0, jip1; > > @@ -1757,8 +1757,8 @@ namespace gbe > /* use of continuous GRF allocation from insn selection */ > GenRegister msg = GenRegister::retype(ra->genReg(insn.dst(2)), > dst.type); > GenRegister msgSlmOff = GenRegister::retype(ra->genReg(insn.src(4)), > GEN_TYPE_UD); > - GenRegister msgAddr = GenRegister::retype(GenRegister::offset(msg, 0), > GEN_TYPE_UD); > - GenRegister msgData = GenRegister::retype(GenRegister::offset(msg, 1), > dst.type); > + GenRegister msgAddr = GenRegister::retype(msg, GEN_TYPE_UD); > + GenRegister msgData = GenRegister::retype(ra->genReg(insn.dst(3)), > + dst.type); > > /* do some calculation within each thread */ > wgOpPerformThread(dst, theVal, threadData, tmp, simd, wg_op, p); @@ - > 1799,7 +1799,7 @@ namespace gbe > p->curr.execWidth = 8; > p->MUL(msgAddr, threadId, GenRegister::immd(0x8)); > p->ADD(msgAddr, msgAddr, msgSlmOff); > - p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2, false); > + p->UNTYPED_WRITE(msgAddr, msgData, GenRegister::immw(0xFE), 2, > + insn.extra.wgop.splitSend); > } > else > { > @@ -1807,7 +1807,7 @@ namespace gbe > p->MOV(msgData, threadData); > p->MUL(msgAddr, threadId, GenRegister::immd(0x4)); > p->ADD(msgAddr, msgAddr, msgSlmOff); > - p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1, false); > + p->UNTYPED_WRITE(msgAddr, msgData, GenRegister::immw(0xFE), 1, > + insn.extra.wgop.splitSend); > } > > /* init partialData register, it will hold the final result */ @@ -1945,7 > +1945,7 @@ namespace gbe > const GenRegister theVal = GenRegister::retype(ra->genReg(insn.src(0)), > dst.type); > GenRegister threadData = ra->genReg(insn.src(1)); > > - uint32_t wg_op = insn.extra.workgroupOp; > + uint32_t wg_op = insn.extra.wgop.workgroupOp; > uint32_t simd = p->curr.execWidth; > > /* masked elements should be properly set to init value */ diff --git > a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index c8019e3..5d8861b 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -3252,7 +3252,7 @@ namespace gbe > GenRegister barrierId = > ra->genReg(GenRegister::ud1grf(ir::ocl::barrierid)); > GenRegister localBarrier = ra->genReg(insn.src(5)); > > - uint32_t wg_op = insn.extra.workgroupOp; > + uint32_t wg_op = insn.extra.wgop.workgroupOp; > uint32_t simd = p->curr.execWidth; > int32_t jip0, jip1; > > @@ -3271,8 +3271,8 @@ namespace gbe > /* use of continuous GRF allocation from insn selection */ > GenRegister msg = GenRegister::retype(ra->genReg(insn.dst(2)), > dst.type); > GenRegister msgSlmOff = GenRegister::retype(ra->genReg(insn.src(4)), > GEN_TYPE_UD); > - GenRegister msgAddr = GenRegister::retype(GenRegister::offset(msg, 0), > GEN_TYPE_UD); > - GenRegister msgData = GenRegister::retype(GenRegister::offset(msg, 1), > dst.type); > + GenRegister msgAddr = GenRegister::retype(msg, GEN_TYPE_UD); > + GenRegister msgData = GenRegister::retype(ra->genReg(insn.dst(3)), > + dst.type); > > /* do some calculation within each thread */ > wgOpPerformThread(dst, theVal, threadData, tmp, simd, wg_op, p); @@ - > 3459,7 +3459,7 @@ namespace gbe > const GenRegister theVal = GenRegister::retype(ra->genReg(insn.src(0)), > dst.type); > GenRegister threadData = ra->genReg(insn.src(1)); > > - uint32_t wg_op = insn.extra.workgroupOp; > + uint32_t wg_op = insn.extra.wgop.workgroupOp; > uint32_t simd = p->curr.execWidth; > > /* masked elements should be properly set to init value */ diff --git > a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 128c2bc..bcdba12 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -753,7 +753,7 @@ namespace gbe > GenRegister tmpData1, > GenRegister localThreadID, GenRegister localThreadNUM, > GenRegister tmpData2, GenRegister slmOff, > - vector<GenRegister> msg, uint32_t msgSizeReq, > + vector<GenRegister> msg, > GenRegister localBarrier); > /*! Sub Group Operations */ > void SUBGROUP_OP(uint32_t wg_op, Reg dst, GenRegister src, @@ - > 2255,19 +2255,11 @@ namespace gbe > GenRegister tmpData2, > GenRegister slmOff, > vector<GenRegister> msg, > - uint32_t msgSizeReq, > GenRegister localBarrier) > { > SelectionInstruction *insn = this->appendInsn(SEL_OP_WORKGROUP_OP, > 2 + msg.size(), 6); > - SelectionVector *vector = this->appendVector(); > > - /* allocate continuous GRF registers for READ/WRITE to SLM */ > - GBE_ASSERT(msg.size() >= msgSizeReq); > - vector->regNum = msg.size(); > - vector->offsetID = 0; > - vector->reg = &insn->dst(2); > - vector->isSrc = 0; > - insn->extra.workgroupOp = wg_op; > + insn->extra.wgop.workgroupOp = wg_op; > > insn->dst(0) = dst; > insn->dst(1) = tmpData1; > @@ -2280,6 +2272,29 @@ namespace gbe > insn->src(3) = tmpData2; > insn->src(4) = slmOff; > insn->src(5) = localBarrier; > + > + if (hasSends()) { > + insn->extra.wgop.splitSend = 1; > + SelectionVector *vector = this->appendVector(); > + > + vector->regNum = 1; > + vector->offsetID = 2; > + vector->reg = &insn->dst(2); > + vector->isSrc = 0; > + > + vector = this->appendVector(); > + vector->regNum = msg.size() - 1; > + vector->offsetID = 3; > + vector->reg = &insn->dst(3); > + vector->isSrc = 0; > + } else { > + /* allocate continuous GRF registers for READ/WRITE to SLM */ > + SelectionVector *vector = this->appendVector(); > + vector->regNum = msg.size(); > + vector->offsetID = 2; > + vector->reg = &insn->dst(2); > + vector->isSrc = 0; > + } > } > > void Selection::Opaque::SUBGROUP_OP(uint32_t wg_op, @@ -2290,7 > +2305,7 @@ namespace gbe > { > SelectionInstruction *insn = this->appendInsn(SEL_OP_SUBGROUP_OP, 2, > 2); > > - insn->extra.workgroupOp = wg_op; > + insn->extra.wgop.workgroupOp = wg_op; > > insn->dst(0) = dst; > insn->dst(1) = tmpData1; > @@ -7451,10 +7466,15 @@ extern bool OCL_DEBUGINFO; // first defined by > calling BVAR in program.cpp > GenRegister localBarrier = GenRegister::ud8grf(sel.reg(FAMILY_DWORD)); > > /* Allocate registers for message sending > - * (read/write to shared local memory) */ > + * (read/write to shared local memory), > + * only one data (ud/ul) is needed for thread communication, > + * we will always use SIMD8 to do the read/write > + */ > vector<GenRegister> msg; > - for(uint32_t i = 0; i < 6; i++) > - msg.push_back(sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32)); > + msg.push_back(GenRegister::ud8grf(sel.reg(ir::FAMILY_REG))); > //address > + msg.push_back(GenRegister::ud8grf(sel.reg(ir::FAMILY_REG))); //data > + if(dst.type == GEN_TYPE_UL || dst.type == GEN_TYPE_L) > + msg.push_back(GenRegister::ud8grf(sel.reg(ir::FAMILY_REG))); > + //data > > /* Insert a barrier to make sure all the var we are interested in > have been assigned the final value. */ @@ -7466,7 +7486,7 @@ extern > bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp > > /* Perform workgroup op */ > sel.WORKGROUP_OP(workGroupOp, dst, src, tmpData1, > - localThreadID, localThreadNUM, tmpData2, slmOff, msg, > 6, > + localThreadID, localThreadNUM, tmpData2, slmOff, > + msg, > localBarrier); > > return true; > diff --git a/backend/src/backend/gen_insn_selection.hpp > b/backend/src/backend/gen_insn_selection.hpp > index 01999a2..8846372 100644 > --- a/backend/src/backend/gen_insn_selection.hpp > +++ b/backend/src/backend/gen_insn_selection.hpp > @@ -159,7 +159,10 @@ namespace gbe > uint32_t continueFlag:8; > uint16_t printfSize; > }; > - uint32_t workgroupOp; > + struct { > + uint16_t workgroupOp; > + uint16_t splitSend:1; > + }wgop; > } extra; > /*! Gen opcode */ > uint8_t opcode; > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
