LGTM, pushed, thanks.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > He Junyan > Sent: Wednesday, October 14, 2015 14:49 > To: [email protected] > Subject: Re: [Beignet] [PATCH] Backend: Refine ConvertInstruction logic in > insn_selection > > Ping for review. > > On Tue, Sep 22, 2015 at 06:29:23PM +0800, [email protected] wrote: > > Date: Tue, 22 Sep 2015 18:29:23 +0800 > > From: [email protected] > > To: [email protected] > > Subject: [Beignet] [PATCH] Backend: Refine ConvertInstruction logic in > > insn_selection > > X-Mailer: git-send-email 1.7.9.5 > > > > From: Junyan He <[email protected]> > > > > The ConvertInstruction now need to handle a lot of special cases > > instead of simple MOV. The judgement of native long support, half > > support and reg restriction of long type and the situation very > > complicated. The current code logic is too verbose and hard to read. > > We now use sub routine functions to make it clear and readable. > > > > Signed-off-by: Junyan He <[email protected]> > > --- > > backend/src/backend/gen_insn_selection.cpp | 780 > > +++++++++++++++++----------- > > 1 file changed, 475 insertions(+), 305 deletions(-) > > > > diff --git a/backend/src/backend/gen_insn_selection.cpp > > b/backend/src/backend/gen_insn_selection.cpp > > index ab00269..4800f7f 100644 > > --- a/backend/src/backend/gen_insn_selection.cpp > > +++ b/backend/src/backend/gen_insn_selection.cpp > > @@ -4124,148 +4124,132 @@ namespace gbe > > return false; > > } > > > > - INLINE bool emitOne(Selection::Opaque &sel, const > ir::ConvertInstruction &insn, bool &markChildren) const > > + INLINE void convertBetweenHalfFloat(Selection::Opaque &sel, const > > + ir::ConvertInstruction &insn, bool &markChildren) const > > { > > using namespace ir; > > const Type dstType = insn.getDstType(); > > const Type srcType = insn.getSrcType(); > > - const RegisterFamily dstFamily = getFamily(dstType); > > - const RegisterFamily srcFamily = getFamily(srcType); > > const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > > const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > const Opcode opcode = insn.getOpcode(); > > - sel.push(); > > - if (sel.isScalarReg(insn.getDst(0)) == true) { > > - sel.curr.execWidth = 1; > > - sel.curr.predicate = GEN_PREDICATE_NONE; > > - sel.curr.noMask = 1; > > - } > > - if(opcode == ir::OP_SAT_CVT) > > - sel.curr.saturate = 1; > > > > - // We need two instructions to make the conversion > > if (opcode == OP_F16TO32) { > > sel.F16TO32(dst, src); > > } else if (opcode == OP_F32TO16) { > > + // We need two instructions to make the conversion > > GenRegister unpacked; > > unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))); > > sel.push(); > > - if (sel.isScalarReg(insn.getSrc(0))) { > > - sel.curr.execWidth = 1; > > - sel.curr.predicate = GEN_PREDICATE_NONE; > > - sel.curr.noMask = 1; > > - } > > - sel.F32TO16(unpacked, src); > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > + } > > + sel.F32TO16(unpacked, src); > > sel.pop(); > > sel.MOV(dst, unpacked); > > - } else if (dstFamily != FAMILY_DWORD && dstFamily != > FAMILY_QWORD && srcFamily == FAMILY_DWORD) {//convert i32 to small > int and half > > - GenRegister unpacked; > > - if (dstFamily == FAMILY_WORD) { > > - uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : > GEN_TYPE_W; > > - > > - /* The special case, when dst is half, float->word->half will lose > accuracy. */ > > - if (dstType == TYPE_HALF) { > > - GBE_ASSERT(sel.hasHalfType()); > > - type = GEN_TYPE_HF; > > - } > > + } else { > > + GBE_ASSERT("Not conversion between float and half\n"); > > + } > > + } > > > > - if (!sel.isScalarReg(dst.reg())) { > > - unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - unpacked = GenRegister::retype(unpacked, type); > > - } else > > - unpacked = GenRegister::retype(sel.unpacked_uw(dst.reg()), > > type); > > - } else { > > - const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : > GEN_TYPE_B; > > - if (!sel.isScalarReg(dst.reg())) { > > - unpacked = sel.unpacked_ub(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - unpacked = GenRegister::retype(unpacked, type); > > - } else > > - unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), > > type); > > - } > > + INLINE void convert32bitsToSmall(Selection::Opaque &sel, const > ir::ConvertInstruction &insn, bool &markChildren) const > > + { > > + using namespace ir; > > + const Type dstType = insn.getDstType(); > > + const Type srcType = insn.getSrcType(); > > + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > > + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > + GenRegister unpacked; > > + const RegisterFamily dstFamily = getFamily(dstType); > > > > - sel.push(); > > - if (sel.isScalarReg(insn.getSrc(0))) { > > - sel.curr.execWidth = 1; > > - sel.curr.predicate = GEN_PREDICATE_NONE; > > - sel.curr.noMask = 1; > > - } > > - sel.MOV(unpacked, src); > > - sel.pop(); > > + if (dstFamily == FAMILY_WORD) { > > + uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : > > + GEN_TYPE_W; > > > > - if (unpacked.reg() != dst.reg()) > > - sel.MOV(dst, unpacked); > > - } else if (dstFamily == FAMILY_WORD && srcFamily == FAMILY_QWORD) > { //convert i64 to i16 and half. > > + /* The special case, when dst is half, float->word->half will > > + lose accuracy. */ > > if (dstType == TYPE_HALF) { > > - /* There is no MOV for Long <---> Half. So Long-->Float-->half. > > */ > > - GBE_ASSERT(sel.hasLongType()); > > GBE_ASSERT(sel.hasHalfType()); > > - sel.push(); > > - if (sel.isScalarReg(insn.getSrc(0))) { > > - sel.curr.execWidth = 1; > > - sel.curr.predicate = GEN_PREDICATE_NONE; > > - sel.curr.noMask = 1; > > - } > > + type = GEN_TYPE_HF; > > + } > > > > - GenRegister funpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - funpacked = GenRegister::retype(funpacked, GEN_TYPE_F); > > - sel.MOV(funpacked, src); > > - GenRegister ftmp = sel.selReg(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - ftmp = GenRegister::retype(ftmp, GEN_TYPE_F); > > - sel.MOV(ftmp, funpacked); > > - GenRegister unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - unpacked = GenRegister::retype(unpacked, GEN_TYPE_HF); > > - sel.MOV(unpacked, ftmp); > > - sel.pop(); > > - sel.MOV(dst, unpacked); > > - } else { > > - uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : > GEN_TYPE_W; > > + if (!sel.isScalarReg(dst.reg())) { > > + unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, type); > > + } else > > + unpacked = GenRegister::retype(sel.unpacked_uw(dst.reg()), type); > > + } else { > > + const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : > GEN_TYPE_B; > > + if (!sel.isScalarReg(dst.reg())) { > > + unpacked = sel.unpacked_ub(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, type); > > + } else > > + unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type); > > + } > > > > - GenRegister unpacked; > > - if (!sel.isScalarReg(dst.reg())) { > > - if (sel.hasLongType()) { > > - unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - } else { > > - unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - } > > - unpacked = GenRegister::retype(unpacked, type); > > - } else { > > - unpacked = GenRegister::retype(sel.unpacked_uw(dst.reg()), > > type); > > - } > > + sel.push(); > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > + } > > + sel.MOV(unpacked, src); > > + sel.pop(); > > > > - if(!sel.hasLongType()) { > > - GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD)); > > - tmp.type = GEN_TYPE_D; > > - sel.CONVI64_TO_I(tmp, src); > > - sel.MOV(unpacked, tmp); > > - } else { > > - sel.push(); > > - if (sel.isScalarReg(insn.getSrc(0))) { > > - sel.curr.execWidth = 1; > > - sel.curr.predicate = GEN_PREDICATE_NONE; > > - sel.curr.noMask = 1; > > - } > > - sel.MOV(unpacked, src); > > - sel.pop(); > > - } > > + if (unpacked.reg() != dst.reg()) > > + sel.MOV(dst, unpacked); > > + } > > > > - if (unpacked.reg() != dst.reg()) { > > - sel.MOV(dst, unpacked); > > - } > > + INLINE void convertI64To16bits(Selection::Opaque &sel, const > ir::ConvertInstruction &insn, bool &markChildren) const > > + { > > + using namespace ir; > > + const Type dstType = insn.getDstType(); > > + const Type srcType = insn.getSrcType(); > > + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > > + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > + > > + if (dstType == TYPE_HALF) { > > + /* There is no MOV for Long <---> Half. So Long-->Float-->half. */ > > + GBE_ASSERT(sel.hasLongType()); > > + GBE_ASSERT(sel.hasHalfType()); > > + sel.push(); > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > } > > - } else if (dstFamily == FAMILY_BYTE && srcFamily == FAMILY_QWORD) > { //convert i64 to i8 > > - GenRegister unpacked; > > - const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : > GEN_TYPE_B; > > > > - if (sel.hasLongType()) { // handle the native long logic. > > - if (!sel.isScalarReg(dst.reg())) { > > - /* When convert i64 to i8, the hstride should be 8, but the > > hstride do > not > > - support more than 4, so we need to split it to 2 steps. */ > > + GenRegister funpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > + funpacked = GenRegister::retype(funpacked, GEN_TYPE_F); > > + sel.MOV(funpacked, src); > > + GenRegister ftmp = sel.selReg(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))); > > + ftmp = GenRegister::retype(ftmp, GEN_TYPE_F); > > + sel.MOV(ftmp, funpacked); > > + GenRegister unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, GEN_TYPE_HF); > > + sel.MOV(unpacked, ftmp); > > + sel.pop(); > > + sel.MOV(dst, unpacked); > > + } else { > > + uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : > > + GEN_TYPE_W; > > + > > + GenRegister unpacked; > > + if (!sel.isScalarReg(dst.reg())) { > > + if (sel.hasLongType()) { > > unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - unpacked = GenRegister::retype(unpacked, dstType == TYPE_U8 ? > GEN_TYPE_UW : GEN_TYPE_W); > > } else { > > - unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), > > type); > > + unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, > > + sel.isScalarReg(insn.getSrc(0)))); > > } > > + unpacked = GenRegister::retype(unpacked, type); > > + } else { > > + unpacked = GenRegister::retype(sel.unpacked_uw(dst.reg()), type); > > + } > > > > + if(!sel.hasLongType()) { > > + GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD)); > > + tmp.type = GEN_TYPE_D; > > + sel.CONVI64_TO_I(tmp, src); > > + sel.MOV(unpacked, tmp); > > + } else { > > sel.push(); > > if (sel.isScalarReg(insn.getSrc(0))) { > > sel.curr.execWidth = 1; > > @@ -4274,229 +4258,263 @@ namespace gbe > > } > > sel.MOV(unpacked, src); > > sel.pop(); > > + } > > > > - if (unpacked.reg() != dst.reg()) { > > - sel.MOV(dst, unpacked); > > - } > > - } else { // Do not have native long > > - if (!sel.isScalarReg(dst.reg())) { > > - unpacked = sel.unpacked_ub(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - unpacked = GenRegister::retype(unpacked, type); > > - } else { > > - unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), > > type); > > - } > > + if (unpacked.reg() != dst.reg()) { > > + sel.MOV(dst, unpacked); > > + } > > + } > > + } > > > > - GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD)); > > - tmp.type = GEN_TYPE_D; > > - sel.CONVI64_TO_I(tmp, src); > > - sel.MOV(unpacked, tmp); > > + INLINE void convertI64ToI8(Selection::Opaque &sel, const > ir::ConvertInstruction &insn, bool &markChildren) const > > + { > > + using namespace ir; > > + const Type dstType = insn.getDstType(); > > + const Type srcType = insn.getSrcType(); > > + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > > + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > + GenRegister unpacked; > > + const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : > > + GEN_TYPE_B; > > + > > + if (sel.hasLongType()) { // handle the native long logic. > > + if (!sel.isScalarReg(dst.reg())) { > > + /* When convert i64 to i8, the hstride should be 8, but the > > hstride do > not > > + support more than 4, so we need to split it to 2 steps. */ > > + unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, dstType == TYPE_U8 ? > GEN_TYPE_UW : GEN_TYPE_W); > > + } else { > > + unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type); > > + } > > > > - if (unpacked.reg() != dst.reg()) { > > - sel.MOV(dst, unpacked); > > - } > > + sel.push(); > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > } > > - } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && > > - (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {// > > Convert > i64 to i32 > > - if (sel.hasLongType()) { > > - GenRegister unpacked; > > - const uint32_t type = dstType == TYPE_U32 ? GEN_TYPE_UD : > GEN_TYPE_D; > > - if (!sel.isScalarReg(dst.reg())) { > > - unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - unpacked = GenRegister::retype(unpacked, dstType == TYPE_U32 ? > GEN_TYPE_UD : GEN_TYPE_D); > > - } else { > > - unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), > > type); > > - } > > + sel.MOV(unpacked, src); > > + sel.pop(); > > > > - sel.push(); > > - if (sel.isScalarReg(insn.getSrc(0))) { > > - sel.curr.execWidth = 1; > > - sel.curr.predicate = GEN_PREDICATE_NONE; > > - sel.curr.noMask = 1; > > - } > > - sel.MOV(unpacked, src); > > - sel.pop(); > > + if (unpacked.reg() != dst.reg()) { > > + sel.MOV(dst, unpacked); > > + } > > + } else { // Do not have native long > > + if (!sel.isScalarReg(dst.reg())) { > > + unpacked = sel.unpacked_ub(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, type); > > + } else { > > + unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type); > > + } > > > > - if (unpacked.reg() != dst.reg()) { > > - sel.MOV(dst, unpacked); > > - } > > + GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD)); > > + tmp.type = GEN_TYPE_D; > > + sel.CONVI64_TO_I(tmp, src); > > + sel.MOV(unpacked, tmp); > > + > > + if (unpacked.reg() != dst.reg()) { > > + sel.MOV(dst, unpacked); > > + } > > + } > > + } > > + > > + INLINE void convertI64ToI32(Selection::Opaque &sel, const > ir::ConvertInstruction &insn, bool &markChildren) const > > + { > > + using namespace ir; > > + const Type dstType = insn.getDstType(); > > + const Type srcType = insn.getSrcType(); > > + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > > + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > + if (sel.hasLongType()) { > > + GenRegister unpacked; > > + const uint32_t type = dstType == TYPE_U32 ? GEN_TYPE_UD : > GEN_TYPE_D; > > + if (!sel.isScalarReg(dst.reg())) { > > + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, dstType == > > + TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D); > > } else { > > - sel.CONVI64_TO_I(dst, src); > > + unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), > > + type); > > } > > - } else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || > srcType == ir::TYPE_S64)) { //i64 to float > > - auto dag = sel.regDAG[src.reg()]; > > - // FIXME, in the future, we need to do a common I64 lower to I32 > analysis > > - // at llvm IR layer which could cover more cases then just this > > one. > > - SelectionDAG *dag0, *dag1; > > - if (dag && dag->child[0] && dag->child[1]) { > > - if (dag->child[0]->insn.getOpcode() == OP_LOADI) { > > - dag0 = dag->child[1]; > > - dag1 = dag->child[0]; > > - } else { > > - dag0 = dag->child[0]; > > - dag1 = dag->child[1]; > > - } > > - GBE_ASSERT(!(dag->child[0]->insn.getOpcode() == OP_LOADI && > > - dag->child[1]->insn.getOpcode() == OP_LOADI)); > > - if (dag->insn.getOpcode() == OP_AND || > > - dag->insn.getOpcode() == OP_OR || > > - dag->insn.getOpcode() == OP_XOR) { > > - GenRegister src0; > > - GenRegister src1; > > - if (lowerI64Reg(sel, dag0, src0, GEN_TYPE_UD) && > > - lowerI64Reg(sel, dag1, src1, GEN_TYPE_UD)) { > > - switch (dag->insn.getOpcode()) { > > - default: > > - case OP_AND: sel.AND(GenRegister::retype(dst, GEN_TYPE_UD), > src0, src1); break; > > - case OP_OR: sel.OR(GenRegister::retype(dst, GEN_TYPE_UD), > src0, src1); break; > > - case OP_XOR: sel.XOR(GenRegister::retype(dst, GEN_TYPE_UD), > src0, src1); break; > > - } > > - sel.MOV(dst, GenRegister::retype(dst, GEN_TYPE_UD)); > > - markChildren = false; > > - return true; > > + > > + sel.push(); > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > + } > > + sel.MOV(unpacked, src); > > + sel.pop(); > > + > > + if (unpacked.reg() != dst.reg()) { > > + sel.MOV(dst, unpacked); > > + } > > + } else { > > + sel.CONVI64_TO_I(dst, src); > > + } > > + } > > + > > + INLINE void convertI64ToFloat(Selection::Opaque &sel, const > ir::ConvertInstruction &insn, bool &markChildren) const > > + { > > + using namespace ir; > > + const Type dstType = insn.getDstType(); > > + const Type srcType = insn.getSrcType(); > > + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > > + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > + auto dag = sel.regDAG[src.reg()]; > > + > > + // FIXME, in the future, we need to do a common I64 lower to I32 > analysis > > + // at llvm IR layer which could cover more cases then just this one. > > + SelectionDAG *dag0, *dag1; > > + if (dag && dag->child[0] && dag->child[1]) { > > + if (dag->child[0]->insn.getOpcode() == OP_LOADI) { > > + dag0 = dag->child[1]; > > + dag1 = dag->child[0]; > > + } else { > > + dag0 = dag->child[0]; > > + dag1 = dag->child[1]; > > + } > > + GBE_ASSERT(!(dag->child[0]->insn.getOpcode() == OP_LOADI && > > + dag->child[1]->insn.getOpcode() == OP_LOADI)); > > + if (dag->insn.getOpcode() == OP_AND || > > + dag->insn.getOpcode() == OP_OR || > > + dag->insn.getOpcode() == OP_XOR) { > > + GenRegister src0; > > + GenRegister src1; > > + if (lowerI64Reg(sel, dag0, src0, GEN_TYPE_UD) && > > + lowerI64Reg(sel, dag1, src1, GEN_TYPE_UD)) { > > + switch (dag->insn.getOpcode()) { > > + default: > > + case OP_AND: sel.AND(GenRegister::retype(dst, GEN_TYPE_UD), > src0, src1); break; > > + case OP_OR: sel.OR(GenRegister::retype(dst, GEN_TYPE_UD), > src0, src1); break; > > + case OP_XOR: sel.XOR(GenRegister::retype(dst, > > + GEN_TYPE_UD), src0, src1); break; > > } > > + sel.MOV(dst, GenRegister::retype(dst, GEN_TYPE_UD)); > > + markChildren = false; > > + return; > > } > > } > > + } > > > > - if (!sel.hasLongType()) { > > - GenRegister tmp[6]; > > - for(int i=0; i<6; i++) { > > - tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > > - } > > - sel.push(); > > - sel.curr.flag = 0; > > - sel.curr.subFlag = 1; > > - sel.CONVI64_TO_F(dst, src, tmp); > > - sel.pop(); > > - } else { > > - GenRegister unpacked; > > - const uint32_t type = GEN_TYPE_F; > > - unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - unpacked = GenRegister::retype(unpacked, type); > > + if (!sel.hasLongType()) { > > + GenRegister tmp[6]; > > + for(int i=0; i<6; i++) { > > + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > > + } > > + sel.push(); > > + sel.curr.flag = 0; > > + sel.curr.subFlag = 1; > > + sel.CONVI64_TO_F(dst, src, tmp); > > + sel.pop(); > > + } else { > > + GenRegister unpacked; > > + const uint32_t type = GEN_TYPE_F; > > + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, type); > > > > - sel.push(); > > - if (sel.isScalarReg(insn.getSrc(0))) { > > - sel.curr.execWidth = 1; > > - sel.curr.predicate = GEN_PREDICATE_NONE; > > - sel.curr.noMask = 1; > > - } > > - sel.MOV(unpacked, src); > > - sel.pop(); > > + sel.push(); > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > + } > > + sel.MOV(unpacked, src); > > + sel.pop(); > > > > - if (unpacked.reg() != dst.reg()) { > > - sel.MOV(dst, unpacked); > > - } > > + if (unpacked.reg() != dst.reg()) { > > + sel.MOV(dst, unpacked); > > } > > - } else if (sel.hasLongType() && sel.hasLongRegRestrict() && > > dstFamily > == FAMILY_QWORD && srcFamily != FAMILY_QWORD) { > > - // Convert i32/i16/i8/float to i64/double if > > hasLongRegRestrict(src and > dst hstride must be aligned to the same qword). > > + } > > + } > > + > > + INLINE void convertSmallIntsToI64(Selection::Opaque &sel, const > ir::ConvertInstruction &insn, bool &markChildren) const > > + { > > + using namespace ir; > > + const Type dstType = insn.getDstType(); > > + const Type srcType = insn.getSrcType(); > > + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > > + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > + const RegisterFamily srcFamily = getFamily(srcType); > > + > > + if (sel.hasLongType() && sel.hasLongRegRestrict()) { > > + // Convert i32/i16/i8 to i64 if hasLongRegRestrict(src and dst > > hstride > must be aligned to the same qword). > > GenRegister unpacked; > > GenRegister unpacked_src = src; > > > > sel.push(); > > - if (sel.isScalarReg(insn.getSrc(0))) { > > - sel.curr.execWidth = 1; > > - sel.curr.predicate = GEN_PREDICATE_NONE; > > - sel.curr.noMask = 1; > > - } > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > + } > > > > - if (srcType == ir::TYPE_FLOAT) { > > - unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - unpacked = GenRegister::retype(unpacked, GEN_TYPE_F); > > - } else if(srcFamily == FAMILY_DWORD) { > > - unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - unpacked = GenRegister::retype(unpacked, srcType == TYPE_U32 ? > GEN_TYPE_UD : GEN_TYPE_D); > > - } else if(srcFamily == FAMILY_WORD) { > > - unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - unpacked = GenRegister::retype(unpacked, srcType == TYPE_U16 ? > GEN_TYPE_UW : GEN_TYPE_W); > > - } else if(srcFamily == FAMILY_BYTE) { > > - GenRegister tmp = sel.selReg(sel.reg(FAMILY_WORD, > sel.isScalarReg(insn.getSrc(0)))); > > - tmp = GenRegister::retype(tmp, srcType == TYPE_U8 ? > GEN_TYPE_UW : GEN_TYPE_W); > > - unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > - unpacked = GenRegister::retype(unpacked, srcType == TYPE_U8 ? > GEN_TYPE_UW : GEN_TYPE_W); > > - sel.MOV(tmp, src); > > - unpacked_src = tmp; > > - } else > > - GBE_ASSERT(0); > > + if(srcFamily == FAMILY_DWORD) { > > + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, srcType == TYPE_U32 ? > GEN_TYPE_UD : GEN_TYPE_D); > > + } else if(srcFamily == FAMILY_WORD) { > > + unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, srcType == TYPE_U16 ? > GEN_TYPE_UW : GEN_TYPE_W); > > + } else if(srcFamily == FAMILY_BYTE) { > > + GenRegister tmp = sel.selReg(sel.reg(FAMILY_WORD, > sel.isScalarReg(insn.getSrc(0)))); > > + tmp = GenRegister::retype(tmp, srcType == TYPE_U8 ? > GEN_TYPE_UW : GEN_TYPE_W); > > + unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, srcType == TYPE_U8 ? > GEN_TYPE_UW : GEN_TYPE_W); > > + sel.MOV(tmp, src); > > + unpacked_src = tmp; > > + } else > > + GBE_ASSERT(0); > > > > - sel.MOV(unpacked, unpacked_src); > > + sel.MOV(unpacked, unpacked_src); > > sel.pop(); > > sel.MOV(dst, unpacked); > > - }else if ((dst.isdf() && srcType == ir::TYPE_FLOAT) || > > - (src.isdf() && dstType == ir::TYPE_FLOAT)) { // float and > > double > conversion > > - ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD); > > - sel.MOV_DF(dst, src, sel.selReg(r, TYPE_U64)); > > - } else if (dst.isint64()) { // promote to i64 > > - switch(src.type) { > > - case GEN_TYPE_F: > > - { > > - if (!sel.hasLongType()) { > > - GenRegister tmp[2]; > > - tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > > - tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT); > > - sel.push(); > > - sel.curr.flag = 0; > > - sel.curr.subFlag = 1; > > - sel.CONVF_TO_I64(dst, src, tmp); > > - sel.pop(); > > - } else { > > - sel.MOV(dst, src); > > - } > > - break; > > - } > > - case GEN_TYPE_HF: > > - { > > - GBE_ASSERT(sel.hasLongType()); > > - GBE_ASSERT(sel.hasHalfType()); > > - uint32_t type = dstType == TYPE_U64 ? GEN_TYPE_UD : > GEN_TYPE_D; > > - GenRegister tmp = > GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0))), TYPE_U32), type); > > - sel.push(); > > - if (sel.isScalarReg(insn.getSrc(0))) { > > - sel.curr.execWidth = 1; > > - sel.curr.predicate = GEN_PREDICATE_NONE; > > - sel.curr.noMask = 1; > > - } > > - sel.MOV(tmp, src); > > - sel.pop(); > > - sel.MOV(dst, tmp); > > - break; > > - } > > - case GEN_TYPE_DF: > > - NOT_IMPLEMENTED; > > - default: > > - if (sel.hasLongType()) { > > - sel.MOV(dst, src); > > - } else { > > - sel.CONVI_TO_I64(dst, src, > > sel.selReg(sel.reg(FAMILY_DWORD))); > > - } > > - } > > - } else if (srcType == ir::TYPE_HALF && (dstFamily == FAMILY_BYTE || > dstFamily == FAMILY_WORD)) { > > - // Special case, half -> char/short. > > - /* [DevBDW+]: Format conversion to or from HF (Half Float) must be > DWord-aligned and > > - strided by a DWord on the destination. */ > > - GBE_ASSERT(sel.hasHalfType()); > > - GenRegister tmp; > > + } else if (sel.hasLongType()) { > > + sel.MOV(dst, src); > > + } else { > > + sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD))); > > + } > > + } > > + > > + INLINE void convertFToI64(Selection::Opaque &sel, const > ir::ConvertInstruction &insn, bool &markChildren) const > > + { > > + using namespace ir; > > + const Type dstType = insn.getDstType(); > > + const Type srcType = insn.getSrcType(); > > + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > > + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > + > > + if (sel.hasLongType() && sel.hasLongRegRestrict() && srcType == > ir::TYPE_FLOAT) { // typical bsw float->long case > > + // Convert float to i64 if hasLongRegRestrict(src and dst hstride > > must > be aligned to the same qword). > > + GenRegister unpacked; > > + GenRegister unpacked_src = src; > > + > > sel.push(); > > if (sel.isScalarReg(insn.getSrc(0))) { > > sel.curr.execWidth = 1; > > sel.curr.predicate = GEN_PREDICATE_NONE; > > sel.curr.noMask = 1; > > } > > - if (dstFamily == FAMILY_BYTE) { > > - const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : > GEN_TYPE_B; > > - tmp = > GenRegister::retype(sel.unpacked_ub(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))), type); > > - sel.MOV(tmp, src); > > - } else { > > - const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : > GEN_TYPE_W; > > - tmp = > GenRegister::retype(sel.unpacked_uw(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))), type); > > - sel.MOV(tmp, src); > > - } > > + > > + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, > sel.isScalarReg(insn.getSrc(0)))); > > + unpacked = GenRegister::retype(unpacked, GEN_TYPE_F); > > + sel.MOV(unpacked, unpacked_src); > > sel.pop(); > > - sel.MOV(dst, tmp); > > - } else if (dstType == ir::TYPE_HALF && (srcFamily == FAMILY_BYTE || > srcFamily == FAMILY_WORD)) { > > - // Special case, char/uchar -> half > > - /* [DevBDW+]: Format conversion to or from HF (Half Float) must be > DWord-aligned and > > - strided by a DWord on the destination. */ > > + sel.MOV(dst, unpacked); > > + } else if (srcType == ir::TYPE_FLOAT) { > > + if (sel.hasLongType()) { // typical bdw float->long case > > + sel.MOV(dst, src); > > + } else { // typical old platform float->long case > > + GenRegister tmp[2]; > > + tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > > + tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT); > > + sel.push(); > > + sel.curr.flag = 0; > > + sel.curr.subFlag = 1; > > + sel.CONVF_TO_I64(dst, src, tmp); > > + sel.pop(); > > + } > > + } else if (srcType == ir::TYPE_HALF) { // TODO: We may consider bsw's > hasLongRegRestrict case here. > > + /* No need to consider old platform. if we support half, we must > > have > native long. */ > > + GBE_ASSERT(sel.hasLongType()); > > GBE_ASSERT(sel.hasHalfType()); > > - GenRegister tmp = > GenRegister::retype(sel.unpacked_uw(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))), GEN_TYPE_HF); > > + uint32_t type = dstType == TYPE_U64 ? GEN_TYPE_UD : GEN_TYPE_D; > > + GenRegister tmp = > > + GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD, > > + sel.isScalarReg(insn.getSrc(0))), TYPE_U32), type); > > sel.push(); > > if (sel.isScalarReg(insn.getSrc(0))) { > > sel.curr.execWidth = 1; > > @@ -4506,11 +4524,163 @@ namespace gbe > > sel.MOV(tmp, src); > > sel.pop(); > > sel.MOV(dst, tmp); > > - } else > > - sel.MOV(dst, src); > > + } else if (src.type == GEN_TYPE_DF) { > > + //TODO: > > + GBE_ASSERT(0); > > + } else { > > + /* Invalid case. */ > > + GBE_ASSERT(0); > > + } > > + } > > + > > + INLINE void convertBetweenFloatDouble(Selection::Opaque &sel, const > ir::ConvertInstruction &insn, bool &markChildren) const > > + { > > + using namespace ir; > > + const Type dstType = insn.getDstType(); > > + const Type srcType = insn.getSrcType(); > > + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > > + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > + > > + > > + //TODO: > > + ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD); > > + sel.MOV_DF(dst, src, sel.selReg(r, TYPE_U64)); > > + } > > + > > + INLINE void convertBetweenHalfDouble(Selection::Opaque &sel, const > ir::ConvertInstruction &insn, bool &markChildren) const > > + { > > + using namespace ir; > > + const Type dstType = insn.getDstType(); > > + const Type srcType = insn.getSrcType(); > > + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > > + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > > > + //TODO: > > + ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD); > > + sel.MOV_DF(dst, src, sel.selReg(r, TYPE_U64)); > > + } > > + > > + INLINE void convertHalfToSmallInts(Selection::Opaque &sel, const > ir::ConvertInstruction &insn, bool &markChildren) const > > + { > > + using namespace ir; > > + const Type dstType = insn.getDstType(); > > + const Type srcType = insn.getSrcType(); > > + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > > + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > + const RegisterFamily dstFamily = getFamily(dstType); > > + > > + // Special case, half -> char/short. > > + /* [DevBDW+]: Format conversion to or from HF (Half Float) > > must be > DWord-aligned and > > + strided by a DWord on the destination. */ > > + GBE_ASSERT(sel.hasHalfType()); > > + GenRegister tmp; > > + sel.push(); > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > + } > > + if (dstFamily == FAMILY_BYTE) { > > + const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : > GEN_TYPE_B; > > + tmp = > GenRegister::retype(sel.unpacked_ub(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))), type); > > + sel.MOV(tmp, src); > > + } else { > > + const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : > GEN_TYPE_W; > > + tmp = > GenRegister::retype(sel.unpacked_uw(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))), type); > > + sel.MOV(tmp, src); > > + } > > sel.pop(); > > + sel.MOV(dst, tmp); > > + } > > > > + INLINE void convertSmallIntsToHalf(Selection::Opaque &sel, const > ir::ConvertInstruction &insn, bool &markChildren) const > > + { > > + using namespace ir; > > + const Type dstType = insn.getDstType(); > > + const Type srcType = insn.getSrcType(); > > + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > > + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > + > > + // Special case, char/uchar -> half > > + /* [DevBDW+]: Format conversion to or from HF (Half Float) must be > DWord-aligned and > > + strided by a DWord on the destination. */ > > + GBE_ASSERT(sel.hasHalfType()); > > + GenRegister tmp = > GenRegister::retype(sel.unpacked_uw(sel.reg(FAMILY_DWORD, > sel.isScalarReg(insn.getSrc(0)))), GEN_TYPE_HF); > > + sel.push(); > > + if (sel.isScalarReg(insn.getSrc(0))) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > + } > > + sel.MOV(tmp, src); > > + sel.pop(); > > + sel.MOV(dst, tmp); > > + } > > + > > + INLINE bool emitOne(Selection::Opaque &sel, const > ir::ConvertInstruction &insn, bool &markChildren) const > > + { > > + using namespace ir; > > + const Type dstType = insn.getDstType(); > > + const Type srcType = insn.getSrcType(); > > + const RegisterFamily dstFamily = getFamily(dstType); > > + const RegisterFamily srcFamily = getFamily(srcType); > > + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > > + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > + const Opcode opcode = insn.getOpcode(); > > + sel.push(); > > + if (sel.isScalarReg(insn.getDst(0)) == true) { > > + sel.curr.execWidth = 1; > > + sel.curr.predicate = GEN_PREDICATE_NONE; > > + sel.curr.noMask = 1; > > + } > > + if(opcode == ir::OP_SAT_CVT) > > + sel.curr.saturate = 1; > > + > > + if (opcode == OP_F16TO32 || opcode == OP_F32TO16) { /* Conversion > between float and half. */ > > + convertBetweenHalfFloat(sel, insn, markChildren); > > + } else if (dstFamily != FAMILY_DWORD && dstFamily != > FAMILY_QWORD && srcFamily == FAMILY_DWORD) { > > + //convert i32/float to small int/half > > + convert32bitsToSmall(sel, insn, markChildren); > > + } else if (dstFamily == FAMILY_WORD && srcFamily == > FAMILY_QWORD && srcType != ir::TYPE_DOUBLE) { > > + //convert i64 to i16 and half. > > + convertI64To16bits(sel, insn, markChildren); > > + } else if (dstFamily == FAMILY_BYTE && srcFamily == FAMILY_QWORD > && srcType != ir::TYPE_DOUBLE) { > > + //convert i64 to i8 > > + convertI64ToI8(sel, insn, markChildren); > > + } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && > > + (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {// > > Convert i64 > to i32 > > + convertI64ToI32(sel, insn, markChildren); > > + } else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || > srcType == ir::TYPE_S64)) { > > + convertI64ToFloat(sel, insn, markChildren); > > + } else if (dstType == ir::TYPE_DOUBLE && (srcType == ir::TYPE_U64 || > srcType == ir::TYPE_S64)) { > > + // TODO: long -> double > > + GBE_ASSERT(0); > > + } else if ((dstType == ir::TYPE_U64 || dstType == ir::TYPE_S64) > > + && (srcFamily != FAMILY_QWORD && srcType != ir::TYPE_FLOAT && > srcType != ir::TYPE_HALF)) { > > + convertSmallIntsToI64(sel, insn, markChildren); > > + } else if ((dstType == ir::TYPE_U64 || dstType == ir::TYPE_S64) > > + && (srcType == ir::TYPE_FLOAT || srcType == ir::TYPE_HALF || > srcType == ir::TYPE_DOUBLE)) { > > + convertFToI64(sel, insn, markChildren); > > + } else if ((srcType == ir::TYPE_FLOAT && dstType == ir::TYPE_DOUBLE) > || > > + (dstType == ir::TYPE_FLOAT && srcType == ir::TYPE_DOUBLE)) { > > + // float and double conversion > > + convertBetweenFloatDouble(sel, insn, markChildren); > > + } else if ((srcType == ir::TYPE_HALF && dstType == ir::TYPE_DOUBLE) > > || > > + (dstType == ir::TYPE_HALF && srcType == ir::TYPE_DOUBLE)) { > > + // float and double conversion > > + convertBetweenHalfDouble(sel, insn, markChildren); > > + } else if (srcType == ir::TYPE_HALF && (dstFamily == FAMILY_BYTE || > dstFamily == FAMILY_WORD)) { > > + // Convert half to small int > > + convertHalfToSmallInts(sel, insn, markChildren); > > + } else if (dstType == ir::TYPE_HALF && (srcFamily == FAMILY_BYTE || > srcFamily == FAMILY_WORD)) { > > + // Convert small int to half > > + convertSmallIntsToHalf(sel, insn, markChildren); > > + } else { > > + /* All special cases has been handled, just MOV. */ > > + sel.MOV(dst, src); > > + } > > + > > + sel.pop(); > > return true; > > } > > DECL_CTOR(ConvertInstruction, 1, 1); > > -- > > 1.7.9.5 > > > > > > > > _______________________________________________ > > Beignet mailing list > > [email protected] > > http://lists.freedesktop.org/mailman/listinfo/beignet > > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
