On Fri, Mar 06, 2015 at 03:24:00PM +0800, junyan...@inbox.com wrote: > From: Junyan He <junyan...@linux.intel.com> > > The swap for short will be like: > mov(1) a0<1>:UD 0xe600e61UD { align1 WE_all }; > mov(1) a0.1<1>:UD 0xe620e63UD { align1 WE_all }; > mov(1) a0.2<1>:UD 0xe640e65UD { align1 WE_all }; > mov(1) a0.3<1>:UD 0xe660e67UD { align1 WE_all }; > mov(8) g114<1>:UB g[a0]<VxH,1,0>:UB { align1 WE_all 1Q }; > mov(8) g114.8<1>:UB g[a0 8]<VxH,1,0>:UB { align1 WE_all 1Q }; > mov(8) g114.16<1>:UB g[a0 16]<VxH,1,0>:UB { align1 WE_all 1Q }; > mov(8) g114.24<1>:UB g[a0 24]<VxH,1,0>:UB { align1 WE_all 1Q }; > mov(16) g113<1>:UW g114<8,8,1>:UW { align1 WE_normal 1H }; > > Signed-off-by: Junyan He <junyan...@linux.intel.com> > --- > backend/src/backend/gen_context.cpp | 112 > ++++++++++++++++++++++++++++ > backend/src/backend/gen_insn_selection.cpp | 9 +++ > backend/src/backend/gen_insn_selection.hxx | 1 + > 3 files changed, 122 insertions(+) > > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index 6856510..46b4a06 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -297,6 +297,118 @@ namespace gbe > p->MOV(dst.top_half(this->simdWidth), GenRegister::immud(0)); > break; > } > + case SEL_OP_BSWAP: { > + uint32_t simd = p->curr.execWidth; > + GBE_ASSERT(simd == 8 || simd == 16 || simd == 1); > + uint16_t new_a0[16]; > + memset(new_a0, 0, sizeof(new_a0)); > + > + GBE_ASSERT(src.type == dst.type); > + uint32_t start_addr = src.nr*32 + src.subnr; > + > + if (simd == 1) { > + GBE_ASSERT(src.hstride == GEN_HORIZONTAL_STRIDE_0 > + && dst.hstride == GEN_HORIZONTAL_STRIDE_0); > + if (src.type == GEN_TYPE_UD || src.type == GEN_TYPE_D) { > + GBE_ASSERT(start_addr >= 0); > + new_a0[0] = start_addr + 3; > + new_a0[1] = start_addr + 2; > + new_a0[2] = start_addr + 1; > + new_a0[3] = start_addr; > + this->setA0Content(new_a0, 0, 4); > + > + p->push(); > + p->curr.execWidth = 4; > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.noMask = 1; > + GenRegister ind_src = > GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > + a0[0], new_a0[0] - a0[0]); > + GenRegister dst_ = dst; > + dst_.type = GEN_TYPE_UB; > + dst_.hstride = GEN_HORIZONTAL_STRIDE_1; > + dst_.width = GEN_WIDTH_4; > + dst_.vstride = GEN_VERTICAL_STRIDE_4; > + p->MOV(dst_, ind_src); > + p->pop(); > + } else if (src.type == GEN_TYPE_UW || src.type == GEN_TYPE_W) { > + p->MOV(GenRegister::retype(dst, GEN_TYPE_UB), > + GenRegister::retype(GenRegister::offset(src, 0, 1), > GEN_TYPE_UB)); > + p->MOV(GenRegister::retype(GenRegister::offset(dst, 0, 1), > GEN_TYPE_UB), > + GenRegister::retype(src, GEN_TYPE_UB)); > + } else { > + GBE_ASSERT(0); > + } > + } else { > + if (src.type == GEN_TYPE_UD || src.type == GEN_TYPE_D) { > + GBE_ASSERT(src.subnr == 0); The above assertion is not correct. Because a valid simd8 or simd16 BSWAP instruction may have a uniform source register. We can't assume the source register must not be uniform value.
> + GBE_ASSERT(dst.subnr == 0); > + GBE_ASSERT(tmp.subnr == 0); > + GBE_ASSERT(start_addr >= 0); > + new_a0[0] = start_addr + 3; > + new_a0[1] = start_addr + 2; > + new_a0[2] = start_addr + 1; > + new_a0[3] = start_addr; > + new_a0[4] = start_addr + 7; > + new_a0[5] = start_addr + 6; > + new_a0[6] = start_addr + 5; > + new_a0[7] = start_addr + 4; > + this->setA0Content(new_a0, 56); > + > + p->push(); > + p->curr.execWidth = 8; > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.noMask = 1; > + GenRegister ind_src = > GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > + a0[0], new_a0[0] - a0[0]); > + p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); > + for (int i = 1; i < 4; i++) { > + ind_src.addr_imm += 8; > + p->MOV(GenRegister::offset(GenRegister::retype(tmp, > GEN_TYPE_UB), 0, 8*i), ind_src); > + } > + if (simd == 16) { > + for (int i = 0; i < 4; i++) { > + ind_src.addr_imm += 8; > + p->MOV(GenRegister::offset(GenRegister::retype(tmp, > GEN_TYPE_UB), 1, 8*i), ind_src); > + } > + } > + p->pop(); > + > + p->MOV(dst, tmp); > + } else if (src.type == GEN_TYPE_UW || src.type == GEN_TYPE_W) { > + GBE_ASSERT(src.subnr == 0 || src.subnr == 16); > + GBE_ASSERT(dst.subnr == 0 || dst.subnr == 16); > + GBE_ASSERT(tmp.subnr == 0 || tmp.subnr == 16); > + GBE_ASSERT(start_addr >= 0); > + new_a0[0] = start_addr + 1; > + new_a0[1] = start_addr; > + new_a0[2] = start_addr + 3; > + new_a0[3] = start_addr + 2; > + new_a0[4] = start_addr + 5; > + new_a0[5] = start_addr + 4; > + new_a0[6] = start_addr + 7; > + new_a0[7] = start_addr + 6; > + this->setA0Content(new_a0, 56); > + > + p->push(); > + p->curr.execWidth = 8; > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.noMask = 1; > + GenRegister ind_src = > GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > + a0[0], new_a0[0] - a0[0]); > + p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); > + for (int i = 1; i < (simd == 8 ? 2 : 4); i++) { > + ind_src.addr_imm += 8; > + p->MOV(GenRegister::offset(GenRegister::retype(tmp, > GEN_TYPE_UB), 0, 8*i), ind_src); > + } > + p->pop(); > + > + p->MOV(dst, tmp); > + } else { > + GBE_ASSERT(0); > + } > + } > + } > + break; > default: > NOT_IMPLEMENTED; > } > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index d100f80..2b166b1 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -498,6 +498,7 @@ namespace gbe > ALU1(RNDE) > ALU1(F16TO32) > ALU1(F32TO16) > + ALU1WithTemp(BSWAP) > ALU2(SEL) > ALU2(SEL_INT64) > ALU1(NOT) > @@ -2121,6 +2122,14 @@ namespace gbe > case ir::OP_SQR: sel.MATH(dst, GEN_MATH_FUNCTION_SQRT, src); break; > case ir::OP_RSQ: sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, src); break; > case ir::OP_RCP: sel.MATH(dst, GEN_MATH_FUNCTION_INV, src); break; > + case ir::OP_BSWAP: > + { > + ir::Register tmp = sel.reg(getFamily(insnType)); > + const GenRegister src_ = GenRegister::retype(src, > getGenType(insnType)); > + const GenRegister dst_ = GenRegister::retype(dst, > getGenType(insnType)); > + sel.BSWAP(dst_, src_, sel.selReg(tmp, insnType)); > + break; > + } > case ir::OP_SIMD_ANY: > { > const GenRegister constZero = GenRegister::immuw(0);; > diff --git a/backend/src/backend/gen_insn_selection.hxx > b/backend/src/backend/gen_insn_selection.hxx > index be1f7ec..09f5aaf 100644 > --- a/backend/src/backend/gen_insn_selection.hxx > +++ b/backend/src/backend/gen_insn_selection.hxx > @@ -1,5 +1,6 @@ > DECL_SELECTION_IR(LABEL, LabelInstruction) > DECL_SELECTION_IR(MOV, UnaryInstruction) > +DECL_SELECTION_IR(BSWAP, UnaryWithTempInstruction) > DECL_SELECTION_IR(MOV_DF, UnaryWithTempInstruction) > DECL_SELECTION_IR(LOAD_DF_IMM, UnaryWithTempInstruction) > DECL_SELECTION_IR(LOAD_INT64_IMM, UnaryInstruction) > -- > 1.7.9.5 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet