On 四, 2015-01-08 at 13:14 +0800, Zhigang Gong wrote: > On Tue, Jan 06, 2015 at 06:01:54PM +0800, junyan...@inbox.com wrote: > > From: Junyan He <junyan...@linux.intel.com> > > > > The conversion logic is too complicated. > > We split it more clearly for each case. > > Notice: For I64 to I8, the conversion can not be completed > > within one step because of the hardware hstride restriction. > > So we need to convert it to i32 and than convert it to i8. > typo here, should be then. > > > > Signed-off-by: Junyan He <junyan...@linux.intel.com> > > --- > > backend/src/backend/gen8_context.cpp | 8 +- > > backend/src/backend/gen_insn_selection.cpp | 195 > > ++++++++++++++++++++++++----- > > 2 files changed, 168 insertions(+), 35 deletions(-) > > > > diff --git a/backend/src/backend/gen8_context.cpp > > b/backend/src/backend/gen8_context.cpp > > index cffb10d..18a3425 100644 > > --- a/backend/src/backend/gen8_context.cpp > > +++ b/backend/src/backend/gen8_context.cpp > > @@ -55,7 +55,9 @@ namespace gbe > > { > > switch (insn.opcode) { > > case SEL_OP_CONVI64_TO_I: > > - > > + /* Should never come to here, just use the common OPCODE. */ > > + GBE_ASSERT(0); > > + break; > > default: > > GenContext::emitUnaryInstruction(insn); > > } > > @@ -65,7 +67,9 @@ namespace gbe > > { > > switch (insn.opcode) { > > case SEL_OP_CONVI_TO_I64: > > - > > + /* Should never come to here, just use the common OPCODE. */ > > + GBE_ASSERT(0); > > + break; > > default: > > GenContext::emitUnaryWithTempInstruction(insn); > > } > > diff --git a/backend/src/backend/gen_insn_selection.cpp > > b/backend/src/backend/gen_insn_selection.cpp > > index b6a13bf..60f45f7 100644 > > --- a/backend/src/backend/gen_insn_selection.cpp > > +++ b/backend/src/backend/gen_insn_selection.cpp > > @@ -349,9 +349,17 @@ namespace gbe > > const ir::RegisterData ®Data = getRegisterData(reg); > > return regData.isUniform(); > > } > > + INLINE bool isLongReg(const ir::Register ®) const { > > + const ir::RegisterData ®Data = getRegisterData(reg); > > + return regData.family == ir::FAMILY_QWORD; > > + } > > + > > + INLINE GenRegister unpacked_ud(const ir::Register ®) const { > > + return GenRegister::unpacked_ud(reg, isScalarReg(reg)); > > + } > > > > INLINE GenRegister unpacked_uw(const ir::Register ®) const { > > - return GenRegister::unpacked_uw(reg, isScalarReg(reg)); > > + return GenRegister::unpacked_uw(reg, isScalarReg(reg), > > isLongReg(reg)); > > } > > > > INLINE GenRegister unpacked_ub(const ir::Register ®) const { > > @@ -3658,7 +3666,7 @@ namespace gbe > > sel.F32TO16(unpacked, src); > > sel.pop(); > > sel.MOV(dst, unpacked); > > - } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && > > (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) { > > + } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && > > srcFamily == FAMILY_DWORD) {//convert i32 to small int > > GenRegister unpacked; > > if (dstFamily == FAMILY_WORD) { > > const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : > > GEN_TYPE_W; > > @@ -3675,27 +3683,115 @@ namespace gbe > > } else > > unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), > > type); > > } > > - if(srcFamily == FAMILY_QWORD) { > > + > > + sel.push(); > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > + } > > + sel.MOV(unpacked, src); > > + sel.pop(); > > + > > + if (unpacked.reg() != dst.reg()) > > + sel.MOV(dst, unpacked); > > + } else if (dstFamily == FAMILY_WORD && srcFamily == FAMILY_QWORD) { > > //convert i64 to i16 > > + const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : > > GEN_TYPE_W; > > + GenRegister unpacked; > > + if (!sel.isScalarReg(dst.reg())) { > > + if (sel.hasLongType()) { > > + unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, > > sel.isScalarReg(insn.getSrc(0)))); > > + } else { > > + unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, > > sel.isScalarReg(insn.getSrc(0)))); > > + } > > + unpacked = GenRegister::retype(unpacked, type); > > + } else { > > + unpacked = GenRegister::retype(sel.unpacked_uw(dst.reg()), type); > > + } > > + > > + if(!sel.hasLongType()) { > > You already remove (|| srcFamily == FAMILY_QWORD at Line 3658, why still > do the following code which is to convert I64 source operand to I32? > It looks incorrect for me. The following else branch should be put here > unconditional. > I think here we are converting 64bits to 16bits, we first mov 64 bits to 32bits and then mov it to 16bits. I do not modify the origin logic here, but really we can optimize it.
> > GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD)); > > tmp.type = GEN_TYPE_D; > > sel.CONVI64_TO_I(tmp, src); > > sel.MOV(unpacked, tmp); > > } else { > > sel.push(); > > - if (sel.isScalarReg(insn.getSrc(0))) { > > - sel.curr.execWidth = 1; > > - sel.curr.predicate = GEN_PREDICATE_NONE; > > - sel.curr.noMask = 1; > > - } > > - sel.MOV(unpacked, src); > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > + } > > + sel.MOV(unpacked, src); > > sel.pop(); > > } > > - if (unpacked.reg() != dst.reg()) > > + > > + if (unpacked.reg() != dst.reg()) { > > + sel.MOV(dst, unpacked); > > + } > > + } else if (dstFamily == FAMILY_BYTE && srcFamily == FAMILY_QWORD) { > > //convert i64 to i8 > > + GenRegister unpacked; > > + const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : > > GEN_TYPE_B; > > + if (!sel.isScalarReg(dst.reg())) { > > + if (sel.hasLongType()) { > > + /* When convert i64 to i8, the hstride should be 8, but the > > hstride do not > > + support more than 4, so we need to split it to 2 steps. */ > > + unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, > > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, dstType == TYPE_U8 ? > > GEN_TYPE_UW : GEN_TYPE_W); > > + } else { > > + unpacked = sel.unpacked_ub(sel.reg(FAMILY_DWORD, > > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, type); > > + } > > + } else { > > + unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type); > > + } > > + > The logic seems broken for me, as the unpacked register which is > computed above but is overrided > in the following code before use it. I think the unpacked here is used as the dst or temp result, what is the problem? > > + if(!sel.hasLongType()) { > > + GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD)); > > + tmp.type = GEN_TYPE_D; > > + sel.CONVI64_TO_I(tmp, src); > > > + sel.MOV(unpacked, tmp); > > + } else { > > + sel.push(); > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > + } > > + sel.MOV(unpacked, src); > > + sel.pop(); > > + } > > + > > + if (unpacked.reg() != dst.reg()) { > > sel.MOV(dst, unpacked); > > + } > > } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && > > - (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) > > - sel.CONVI64_TO_I(dst, src); > > - else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || > > srcType == ir::TYPE_S64)) { > > + (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {// > > Convert i64 to i32 > > + if (sel.hasLongType()) { > > + GenRegister unpacked; > > + const uint32_t type = dstType == TYPE_U32 ? GEN_TYPE_UD : > > GEN_TYPE_D; > > + if (!sel.isScalarReg(dst.reg())) { > > + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, > > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, dstType == TYPE_U32 ? > > GEN_TYPE_UD : GEN_TYPE_D); > > + } else { > > + unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), > > type); > > + } > > + > > + sel.push(); > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > + } > > + sel.MOV(unpacked, src); > > + sel.pop(); > > + > > + if (unpacked.reg() != dst.reg()) { > > + sel.MOV(dst, unpacked); > > + } > > + } else { > > + sel.CONVI64_TO_I(dst, src); > > + } > > + } else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || > > srcType == ir::TYPE_S64)) { //i64 to float > > auto dag = sel.regDAG[src.reg()]; > > // FIXME, in the future, we need to do a common I64 lower to I32 > > analysis > > // at llvm IR layer which could cover more cases then just this > > one. > > @@ -3729,37 +3825,70 @@ namespace gbe > > } > > } > > } > > - GenRegister tmp[6]; > > - for(int i=0; i<6; i++) { > > - tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > > - } > > - sel.push(); > > + > > + if (!sel.hasLongType()) { > > + GenRegister tmp[6]; > > + for(int i=0; i<6; i++) { > > + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > > + } > > + sel.push(); > The following sel.curr.xxx between the push()/pop() pair should > have 2 extra space indent > > sel.curr.flag = 0; > > sel.curr.subFlag = 1; > > sel.CONVI64_TO_F(dst, src, tmp); > > - sel.pop(); > > + sel.pop(); > > + } else { > > + GenRegister unpacked; > > + const uint32_t type = GEN_TYPE_F; > > + if (!sel.isScalarReg(dst.reg())) { > > + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, > > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, type); > > + } else { > > + unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), > > type); > > + } > > + > > + sel.push(); > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > + } > > + sel.MOV(unpacked, src); > > + sel.pop(); > > + > > + if (unpacked.reg() != dst.reg()) { > > + sel.MOV(dst, unpacked); > > + } > > + } > > } else if ((dst.isdf() && srcType == ir::TYPE_FLOAT) || > > - (src.isdf() && dstType == ir::TYPE_FLOAT)) { > > + (src.isdf() && dstType == ir::TYPE_FLOAT)) { // float and > > double conversion > > ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD); > > sel.MOV_DF(dst, src, sel.selReg(r, TYPE_U64)); > > - } else if (dst.isint64()) { > > + } else if (dst.isint64()) { // promote to i64 > > switch(src.type) { > > case GEN_TYPE_F: > > - { > > - GenRegister tmp[2]; > > - tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > > - tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT); > > - sel.push(); > > - sel.curr.flag = 0; > > - sel.curr.subFlag = 1; > > - sel.CONVF_TO_I64(dst, src, tmp); > > - sel.pop(); > > - break; > > - } > > + { > > + if (!sel.hasLongType()) { > > + GenRegister tmp[2]; > > + tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > > + tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT); > > + sel.push(); > need fix sel.xxx indent between push()/pop() pair. > > + sel.curr.flag = 0; > > + sel.curr.subFlag = 1; > > + sel.CONVF_TO_I64(dst, src, tmp); > > + sel.pop(); > > + } else { > > + sel.MOV(dst, src); > > + } > > + break; > > + } > > case GEN_TYPE_DF: > > NOT_IMPLEMENTED; > > default: > > - sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD))); > > + if (sel.hasLongType()) { > > + sel.MOV(dst, src); > > + } else { > > + sel.CONVI_TO_I64(dst, src, > > sel.selReg(sel.reg(FAMILY_DWORD))); > > + } > > } > > } else > > sel.MOV(dst, src); > > -- > > 1.9.1 > > > > _______________________________________________ > > Beignet mailing list > > Beignet@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet