This version LGTM, will push latter, thanks.
On Fri, Jan 23, 2015 at 04:53:23PM +0800, [email protected] wrote: > From: Junyan He <[email protected]> > > On IVB and HSW, When dst spans two registers, the source MUST > span two registers. So the following instructions: > mov (16) r104.0<2>:uw r126.0<8;8,1>:uw { Align1, H1 } > mov (16) r104.1<2>:uw r111.0<8;8,1>:uw { Align1, H1 } > mov (16) r106.0<2>:uw r110.0<8;8,1>:uw { Align1, H1 } > mov (16) r106.1<2>:uw r109.0<8;8,1>:uw { Align1, H1 } > are illegal. > Add the check to split instruction into 2 SIMD8 instructions here. > > TODO: > These instructions are allowed on BDW, need to improve. > > Signed-off-by: Junyan He <[email protected]> > --- > backend/src/backend/gen_encoder.cpp | 41 > +++++++++++++++++++++++++++++++++++ > 1 file changed, 41 insertions(+) > > diff --git a/backend/src/backend/gen_encoder.cpp > b/backend/src/backend/gen_encoder.cpp > index 9058e2e..4d45811 100644 > --- a/backend/src/backend/gen_encoder.cpp > +++ b/backend/src/backend/gen_encoder.cpp > @@ -87,6 +87,39 @@ namespace gbe > return false; > } > > + INLINE bool isSrcDstDiffSpan(GenRegister dst, GenRegister src) { > + if (src.hstride == GEN_HORIZONTAL_STRIDE_0) return false; > + > + uint32_t typeSz = typeSize(dst.type); > + uint32_t horizontal = stride(dst.hstride); > + uint32_t spans = (dst.subnr / (horizontal * typeSz)) * (horizontal * > typeSz) + horizontal * typeSz * 16; > + uint32_t dstSpan = spans / GEN_REG_SIZE; > + dstSpan = dstSpan + (spans % GEN_REG_SIZE == 0 ? 0 : 1); > + if (dstSpan < 2) return false; > + > + typeSz = typeSize(src.type); > + horizontal = stride(src.hstride); > + spans = (src.subnr / (horizontal * typeSz)) * (horizontal * typeSz) + > horizontal * typeSz * 16; > + uint32_t srcSpan = (horizontal * typeSz * 16) / GEN_REG_SIZE; > + srcSpan = srcSpan + (spans % GEN_REG_SIZE == 0 ? 0 : 1); > + > + GBE_ASSERT(srcSpan <= 2); > + GBE_ASSERT(dstSpan == 2); > + > + if (srcSpan == dstSpan) return false; > + > + /* Special case, dst is DW and src is w. > + the case: > + mov (16) r10.0<1>:d r12<8;8,1>:w > + is allowed. */ > + if ((dst.type == GEN_TYPE_UD || dst.type == GEN_TYPE_D) > + && (src.type == GEN_TYPE_UW || src.type == GEN_TYPE_W) > + && dstSpan == 2 && srcSpan == 1 > + && dst.subnr == 0 && src.subnr == 0) return false; > + > + return true; > + } > + > INLINE bool needToSplitAlu1(GenEncoder *p, GenRegister dst, GenRegister > src) { > if (p->curr.execWidth != 16) return false; > if (isVectorOfLongs(dst) == true) return true; > @@ -97,6 +130,8 @@ namespace gbe > if (isCrossMoreThan2(src) == true) return true; > if (isVectorOfLongs(src) == true) return true; > > + if (isSrcDstDiffSpan(dst, src) == true) return true; > + > if (isVectorOfBytes(dst) == true && > ((isVectorOfBytes(src) == true && src.hstride == dst.hstride) > || src.hstride == GEN_HORIZONTAL_STRIDE_0)) > @@ -120,6 +155,9 @@ namespace gbe > if (isCrossMoreThan2(src0) == true) return true; > if (isCrossMoreThan2(src1) == true) return true; > > + if (isSrcDstDiffSpan(dst, src0) == true) return true; > + if (isSrcDstDiffSpan(dst, src1) == true) return true; > + > if (isVectorOfBytes(dst) == true && > ((isVectorOfBytes(src0) == true && src0.hstride == dst.hstride) || > src0.hstride == GEN_HORIZONTAL_STRIDE_0) && > @@ -149,6 +187,9 @@ namespace gbe > if (isCrossMoreThan2(src0) == true) return true; > if (isCrossMoreThan2(src1) == true) return true; > > + if (isSrcDstDiffSpan(dst, src0) == true) return true; > + if (isSrcDstDiffSpan(dst, src1) == true) return true; > + > if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD || src0.type == > GEN_TYPE_F) > return true; > if (src1.type == GEN_TYPE_D || src1.type == GEN_TYPE_UD || src1.type == > GEN_TYPE_F) > -- > 1.7.9.5 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
