On Wed, Aug 7, 2013 at 8:26 AM, Xing, Homer <[email protected]> wrote:
> Bspec says, nibControl + quarterControl can be used when the real > execSize is four. For example, when execSize is eight and stride is two, > the real execSize is four. > I really hope so and in practice, it works like above. But I just can't find the statement in the spec. I only found the following statement for IVB: [DevIVB]: NibCtrl is only allowed for SIMD4 instructions with (DF) double precision source and/or destination. Could you tell me where can I find the description which support the same behavior with a non DW destination with 2-horizontal stride? That will be helpful as I also used this "undocumented" feature in my patch when I implemented 64 bit data reading. The only difference is I disabled the predication there. > **** > > ** ** > > Concentrating different function calls into one place is difficult. But I > can change the four times repeated code inside each “case SEL_OP_XX” into a > “for-loop”. > That's good, let's reduce part of the duplication. And fix the rest in the future. > **** > > ** ** > > *From:* zhigang gong [mailto:[email protected]] > *Sent:* Tuesday, August 6, 2013 4:38 PM > *To:* Xing, Homer > *Cc:* [email protected] > *Subject:* Re: [Beignet] [PATCH 2/3] support 64bit-integer AND(&), OR(|), > XOR(^) arithmetic**** > > ** ** > > Sorry that, the mutt crashed and sent out an incomplete email. Here is the > complete version:**** > > ** ** > > Homer,**** > > ** ** > > The logical of handling Long/ULong for many operations are very similar to > each other. If you can concentrate**** > > all of them into one place, then it will avoid to write duplicate code and > it will reduce the maintenance effort.**** > > ** ** > > And even in the same function, the logical seems repeat four times to > handle each 4 elements. The different**** > > is the register's offset and the quarterControl and nibCotnrol.**** > > ** ** > > Another interesting issue is which I ignored in your first patch which is > about the nibControl usage.**** > > It seems you are using quarterControl + nibControl to handle predication > for a DW type with a **** > > 2-element-horizontal. I haven't found anything in the Gen spec which > support this type of usage.**** > > From the spec, the only use case for nibControl is when the destination is > a DF type. Right?**** > > Any comments here? **** > > ** ** > > On Tue, Aug 6, 2013 at 4:16 PM, Zhigang Gong <[email protected]> > wrote:**** > > Homer, > > The logical of handling Long/ULong for many operations are > very similar to each other. > > handle**** > > On Tue, Aug 06, 2013 at 04:01:30PM +0800, Homer Hsing wrote: > > > > Signed-off-by: Homer Hsing <[email protected]> > > --- > > backend/src/backend/gen_context.cpp | 102 > +++++++++++++++++++++++++++++ > > backend/src/backend/gen_insn_selection.cpp | 24 ++++++- > > backend/src/backend/gen_insn_selection.hxx | 3 + > > backend/src/ir/instruction.cpp | 1 + > > 4 files changed, 127 insertions(+), 3 deletions(-) > > > > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > > index 69dab85..bbe16d0 100644 > > --- a/backend/src/backend/gen_context.cpp > > +++ b/backend/src/backend/gen_context.cpp > > @@ -162,6 +162,108 @@ namespace gbe > > case SEL_OP_AND: p->AND(dst, src0, src1); break; > > case SEL_OP_OR: p->OR (dst, src0, src1); break; > > case SEL_OP_XOR: p->XOR(dst, src0, src1); break; > > + case SEL_OP_I64AND: > > + { > > + GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL), > > + xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL), > > + xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL); > > + int execWidth = p->curr.execWidth; > > + p->push(); > > + p->curr.execWidth = 8; > > + p->AND(xdst.bottom_half(), xsrc0.bottom_half(), > xsrc1.bottom_half()); > > + p->AND(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); > > + p->curr.nibControl = 1; > > + xdst = GenRegister::suboffset(xdst, 4), > > + xsrc0 = GenRegister::suboffset(xsrc0, 4), > > + xsrc1 = GenRegister::suboffset(xsrc1, 4); > > + p->AND(xdst.bottom_half(), xsrc0.bottom_half(), > xsrc1.bottom_half()); > > + p->AND(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); > > + if (execWidth == 16) { > > + p->curr.quarterControl = 1; > > + p->curr.nibControl = 0; > > + xdst = GenRegister::suboffset(xdst, 4), > > + xsrc0 = GenRegister::suboffset(xsrc0, 4), > > + xsrc1 = GenRegister::suboffset(xsrc1, 4); > > + p->AND(xdst.bottom_half(), xsrc0.bottom_half(), > xsrc1.bottom_half()); > > + p->AND(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); > > + p->curr.nibControl = 1; > > + xdst = GenRegister::suboffset(xdst, 4), > > + xsrc0 = GenRegister::suboffset(xsrc0, 4), > > + xsrc1 = GenRegister::suboffset(xsrc1, 4); > > + p->AND(xdst.bottom_half(), xsrc0.bottom_half(), > xsrc1.bottom_half()); > > + p->AND(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); > > + } > > + p->pop(); > > + } > > + break; > > + case SEL_OP_I64OR: > > + { > > + GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL), > > + xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL), > > + xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL); > > + int execWidth = p->curr.execWidth; > > + p->push(); > > + p->curr.execWidth = 8; > > + p->OR(xdst.bottom_half(), xsrc0.bottom_half(), > xsrc1.bottom_half()); > > + p->OR(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); > > + p->curr.nibControl = 1; > > + xdst = GenRegister::suboffset(xdst, 4), > > + xsrc0 = GenRegister::suboffset(xsrc0, 4), > > + xsrc1 = GenRegister::suboffset(xsrc1, 4); > > + p->OR(xdst.bottom_half(), xsrc0.bottom_half(), > xsrc1.bottom_half()); > > + p->OR(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); > > + if (execWidth == 16) { > > + p->curr.quarterControl = 1; > > + p->curr.nibControl = 0; > > + xdst = GenRegister::suboffset(xdst, 4), > > + xsrc0 = GenRegister::suboffset(xsrc0, 4), > > + xsrc1 = GenRegister::suboffset(xsrc1, 4); > > + p->OR(xdst.bottom_half(), xsrc0.bottom_half(), > xsrc1.bottom_half()); > > + p->OR(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); > > + p->curr.nibControl = 1; > > + xdst = GenRegister::suboffset(xdst, 4), > > + xsrc0 = GenRegister::suboffset(xsrc0, 4), > > + xsrc1 = GenRegister::suboffset(xsrc1, 4); > > + p->OR(xdst.bottom_half(), xsrc0.bottom_half(), > xsrc1.bottom_half()); > > + p->OR(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); > > + } > > + p->pop(); > > + } > > + break; > > + case SEL_OP_I64XOR: > > + { > > + GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL), > > + xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL), > > + xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL); > > + int execWidth = p->curr.execWidth; > > + p->push(); > > + p->curr.execWidth = 8; > > + p->XOR(xdst.bottom_half(), xsrc0.bottom_half(), > xsrc1.bottom_half()); > > + p->XOR(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); > > + p->curr.nibControl = 1; > > + xdst = GenRegister::suboffset(xdst, 4), > > + xsrc0 = GenRegister::suboffset(xsrc0, 4), > > + xsrc1 = GenRegister::suboffset(xsrc1, 4); > > + p->XOR(xdst.bottom_half(), xsrc0.bottom_half(), > xsrc1.bottom_half()); > > + p->XOR(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); > > + if (execWidth == 16) { > > + p->curr.quarterControl = 1; > > + p->curr.nibControl = 0; > > + xdst = GenRegister::suboffset(xdst, 4), > > + xsrc0 = GenRegister::suboffset(xsrc0, 4), > > + xsrc1 = GenRegister::suboffset(xsrc1, 4); > > + p->XOR(xdst.bottom_half(), xsrc0.bottom_half(), > xsrc1.bottom_half()); > > + p->XOR(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); > > + p->curr.nibControl = 1; > > + xdst = GenRegister::suboffset(xdst, 4), > > + xsrc0 = GenRegister::suboffset(xsrc0, 4), > > + xsrc1 = GenRegister::suboffset(xsrc1, 4); > > + p->XOR(xdst.bottom_half(), xsrc0.bottom_half(), > xsrc1.bottom_half()); > > + p->XOR(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); > > + } > > + p->pop(); > > + } > > + break; > > case SEL_OP_SHR: p->SHR(dst, src0, src1); break; > > case SEL_OP_SHL: p->SHL(dst, src0, src1); break; > > case SEL_OP_RSR: p->RSR(dst, src0, src1); break; > > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > > index 66cfa31..7e9402d 100644 > > --- a/backend/src/backend/gen_insn_selection.cpp > > +++ b/backend/src/backend/gen_insn_selection.cpp > > @@ -423,6 +423,9 @@ namespace gbe > > ALU2(AND) > > ALU2(OR) > > ALU2(XOR) > > + ALU2(I64AND) > > + ALU2(I64OR) > > + ALU2(I64XOR) > > ALU2(SHR) > > ALU2(SHL) > > ALU2(RSR) > > @@ -1434,9 +1437,24 @@ namespace gbe > > sel.ADD(dst, src0, src1); > > sel.pop(); > > break; > > - case OP_XOR: sel.XOR(dst, src0, src1); break; > > - case OP_OR: sel.OR(dst, src0, src1); break; > > - case OP_AND: sel.AND(dst, src0, src1); break; > > + case OP_XOR: > > + if (type == Type::TYPE_U64 || type == Type::TYPE_S64) > > + sel.I64XOR(dst, src0, src1); > > + else > > + sel.XOR(dst, src0, src1); > > + break; > > + case OP_OR: > > + if (type == Type::TYPE_U64 || type == Type::TYPE_S64) > > + sel.I64OR(dst, src0, src1); > > + else > > + sel.OR(dst, src0, src1); > > + break; > > + case OP_AND: > > + if (type == Type::TYPE_U64 || type == Type::TYPE_S64) > > + sel.I64AND(dst, src0, src1); > > + else > > + sel.AND(dst, src0, src1); > > + break; > > case OP_SUB: > > if (type == Type::TYPE_U64 || type == Type::TYPE_S64) { > > GenRegister t = > sel.selReg(sel.reg(RegisterFamily::FAMILY_QWORD), Type::TYPE_S64); > > diff --git a/backend/src/backend/gen_insn_selection.hxx > b/backend/src/backend/gen_insn_selection.hxx > > index 8eeb19f..7664c8f 100644 > > --- a/backend/src/backend/gen_insn_selection.hxx > > +++ b/backend/src/backend/gen_insn_selection.hxx > > @@ -14,6 +14,9 @@ DECL_SELECTION_IR(SEL, BinaryInstruction) > > DECL_SELECTION_IR(AND, BinaryInstruction) > > DECL_SELECTION_IR(OR, BinaryInstruction) > > DECL_SELECTION_IR(XOR, BinaryInstruction) > > +DECL_SELECTION_IR(I64AND, BinaryInstruction) > > +DECL_SELECTION_IR(I64OR, BinaryInstruction) > > +DECL_SELECTION_IR(I64XOR, BinaryInstruction) > > DECL_SELECTION_IR(SHR, BinaryInstruction) > > DECL_SELECTION_IR(SHL, BinaryInstruction) > > DECL_SELECTION_IR(RSR, BinaryInstruction) > > diff --git a/backend/src/ir/instruction.cpp > b/backend/src/ir/instruction.cpp > > index 2589848..f58757b 100644 > > --- a/backend/src/ir/instruction.cpp > > +++ b/backend/src/ir/instruction.cpp > > @@ -672,6 +672,7 @@ namespace ir { > > static const Type logicalType[] = {TYPE_S8, TYPE_U8, > > TYPE_S16, TYPE_U16, > > TYPE_S32, TYPE_U32, > > + TYPE_S64, TYPE_U64, > > TYPE_BOOL}; > > static const uint32_t logicalTypeNum = ARRAY_ELEM_NUM(logicalType); > > > > -- > > 1.8.1.2 > > > > _______________________________________________ > > Beignet mailing list > > [email protected] > > http://lists.freedesktop.org/mailman/listinfo/beignet**** > > ** ** >
_______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
