Yes, simd16 + hstrid 4 is not covered, and just get a worse case for vstrid 128, 64bit element is not enough too. Will we allocate such register? If yes, I need to wrap it in a structure instead of a uint32/uint64.
-----Original Message----- From: Yang, Rong R Sent: Monday, October 19, 2015 4:38 PM To: Guo, Yejun; [email protected] Cc: Guo, Yejun Subject: RE: [Beignet] [PATCH] fix a long relative regreesion issue on BSW caused by local copy propagation One comment about CalculateElements. > -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf > Of Guo Yejun > Sent: Thursday, October 15, 2015 9:48 > To: [email protected] > Cc: Guo, Yejun > Subject: [Beignet] [PATCH] fix a long relative regreesion issue on BSW > caused by local copy propagation > > on bsw, for long data type, src and dst hstride must be aligned to the > same qword, so disable the copy propagation for following IRs. > MOV(8) %62<2>:UD : %34<8,8,1>:UD > MOV(8) %42<1>:L : %62<8,4,2>:UD > > and also fix a typo issue in function CalculateElements > > Signed-off-by: Guo Yejun <[email protected]> > --- > backend/src/backend/gen_insn_selection.cpp | 2 +- > backend/src/backend/gen_insn_selection.hpp | 2 ++ > backend/src/backend/gen_insn_selection_optimize.cpp | 8 +++++++- > 3 files changed, 10 insertions(+), 2 deletions(-) > > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 9714e3e..5125eb3 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -2140,7 +2140,7 @@ namespace gbe > this->opaque->setLongRegRestrict(true); > this->opaque->setSlowByteGather(true); > this->opaque->setHasHalfType(true); > - opt_features = SIOF_OP_AND_LOGICAL_SRCMOD; > + opt_features = SIOF_OP_AND_LOGICAL_SRCMOD | > + SIOF_OP_MOV_LONG_REG_RESTRICT; > } > > Selection9::Selection9(GenContext &ctx) : Selection(ctx) { diff > --git a/backend/src/backend/gen_insn_selection.hpp > b/backend/src/backend/gen_insn_selection.hpp > index f313786..4efb80b 100644 > --- a/backend/src/backend/gen_insn_selection.hpp > +++ b/backend/src/backend/gen_insn_selection.hpp > @@ -230,6 +230,8 @@ namespace gbe > //for OP_AND, on BDW+, SrcMod value indicates a logical source modifier > // on PRE-BDW, SrcMod value indicates a numeric source > modifier > SIOF_OP_AND_LOGICAL_SRCMOD = 1 << 0, > + //for OP_MOV, on BSW, for long data type, src and dst hstride > + must be > aligned to the same qword > + SIOF_OP_MOV_LONG_REG_RESTRICT = 1 << 1, > }; > > /*! Owns the selection engine */ > diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp > b/backend/src/backend/gen_insn_selection_optimize.cpp > index 939dd18..fffc8b0 100644 > --- a/backend/src/backend/gen_insn_selection_optimize.cpp > +++ b/backend/src/backend/gen_insn_selection_optimize.cpp > @@ -30,7 +30,7 @@ namespace gbe > elements |= (1 << offsetInType); Is 32bits for elements enough? For example simd16 and hstride is 4. > offsetInByte += hstride * elementSize; > } > - offsetInByte += vstride * elementSize; > + base += vstride * elementSize; > } > return elements; > } > @@ -174,6 +174,12 @@ namespace gbe > if (insn.opcode == SEL_OP_AND && (info->replacement.absolute || > info- > >replacement.negation)) > return false; > > + if (features & SIOF_OP_MOV_LONG_REG_RESTRICT && insn.opcode == > SEL_OP_MOV) { > + const GenRegister& dst = insn.dst(0); > + if (dst.isint64() && !info->replacement.isint64() && > + info->elements != > CalculateElements(info->replacement, insn.state.execWidth)) > + return false; > + } > + > if (info->replacementOverwritten) > return false; > > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
