The patch LGTM
> -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > junyan...@inbox.com > Sent: Tuesday, April 14, 2015 4:17 PM > To: beignet@lists.freedesktop.org > Cc: Junyan He > Subject: [Beignet] [PATCH] Kill the A0 cache in GenContext. > > From: Junyan He <junyan...@linux.intel.com> > > The a0 value cache in Gencontext can just hold the value in compiling time, > which may be different with the true offset value in run time when the code > generates the backward jump. So just kill the cache of a0 and we will use > load vector instruction to optimize it lader. > > Signed-off-by: Junyan He <junyan...@linux.intel.com> > --- > backend/src/backend/gen8_context.cpp | 54 > ++++++++-------------------------- > backend/src/backend/gen_context.cpp | 51 > +++++++------------------------- > backend/src/backend/gen_context.hpp | 1 - > 3 files changed, 24 insertions(+), 82 deletions(-) > > diff --git a/backend/src/backend/gen8_context.cpp > b/backend/src/backend/gen8_context.cpp > index 920eb3e..2cdb248 100644 > --- a/backend/src/backend/gen8_context.cpp > +++ b/backend/src/backend/gen8_context.cpp > @@ -98,8 +98,7 @@ namespace gbe > p->curr.execWidth = 4; > p->curr.predicate = GEN_PREDICATE_NONE; > p->curr.noMask = 1; > - GenRegister ind_src = > GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > - a0[0], new_a0[0] - a0[0]); > + GenRegister ind_src = > + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > + new_a0[0], 0); > GenRegister dst_ = dst; > dst_.type = GEN_TYPE_UB; > dst_.hstride = GEN_HORIZONTAL_STRIDE_1; @@ -159,8 > +158,7 @@ namespace gbe > p->curr.execWidth = 16; > p->curr.predicate = GEN_PREDICATE_NONE; > p->curr.noMask = 1; > - GenRegister ind_src = > GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > - a0[0], new_a0[0] - a0[0]); > + GenRegister ind_src = > + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > + new_a0[0], 0); > p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), > ind_src); > ind_src.addr_imm += 16; > p->MOV(GenRegister::offset(GenRegister::retype(tmp, > GEN_TYPE_UB), 0, 16), ind_src); @@ -218,8 +216,7 @@ namespace gbe > p->curr.execWidth = 16; > p->curr.predicate = GEN_PREDICATE_NONE; > p->curr.noMask = 1; > - GenRegister ind_src = > GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > - a0[0], new_a0[0] - a0[0]); > + GenRegister ind_src = > + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > + new_a0[0], 0); > p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), > ind_src); > if (simd == 16) { > ind_src.addr_imm += 16; @@ -862,46 +859,21 @@ > namespace gbe > } > > void Gen8Context::setA0Content(uint16_t new_a0[16], uint16_t > max_offset, int sz) { > - int16_t diff = new_a0[0] - this->a0[0]; > if (sz == 0) > sz = 16; > GBE_ASSERT(sz%4 == 0); > GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096); > - bool need_reset = false; > - for (int i = 1; i < sz; i++) { > - GBE_ASSERT(new_a0[i] >= 0 && new_a0[0] < 4096); > - int16_t d = new_a0[i] - this->a0[i]; > - if (diff != d) { > - need_reset = true; > - break; > - } > - } > > - GBE_ASSERT(this->a0[0] + diff < 4096 && this->a0[0] + diff >= 0); > - if (!need_reset && diff >= -512 && diff + max_offset <= 511) { > - return; > - } else if (!need_reset && sz == 16) { > - p->push(); > - p->curr.execWidth = 16; > - p->curr.predicate = GEN_PREDICATE_NONE; > - p->curr.noMask = 1; > - p->ADD(GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), > - GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), > GenRegister::immw(diff)); > - p->pop(); > - } else { > - p->push(); > - p->curr.execWidth = 1; > - p->curr.predicate = GEN_PREDICATE_NONE; > - p->curr.noMask = 1; > - for (int i = 0; i < sz/4; i++) { > - uint64_t addr = (new_a0[i*4 + 3] << 16) | (new_a0[i*4 + 2]); > - addr = addr << 32; > - addr = addr | (new_a0[i*4 + 1] << 16) | (new_a0[i*4]); > - p->MOV(GenRegister::retype(GenRegister::addr1(i*4), > GEN_TYPE_UL), GenRegister::immuint64(addr)); > - } > - p->pop(); > + p->push(); > + p->curr.execWidth = 1; > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.noMask = 1; > + for (int i = 0; i < sz/4; i++) { > + uint64_t addr = (new_a0[i*4 + 3] << 16) | (new_a0[i*4 + 2]); > + addr = addr << 32; > + addr = addr | (new_a0[i*4 + 1] << 16) | (new_a0[i*4]); > + p->MOV(GenRegister::retype(GenRegister::addr1(i*4), > GEN_TYPE_UL), > + GenRegister::immuint64(addr)); > } > - memcpy(this->a0, new_a0, sizeof(uint16_t)*sz); > + p->pop(); > } > - > } > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index 094e6b4..684ecaf 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -51,7 +51,6 @@ namespace gbe > this->ra = NULL; > this->ifEndifFix = false; > this->regSpillTick = 0; > - memset(a0, 0, sizeof(a0)); > } > > GenContext::~GenContext(void) { > @@ -340,8 +339,7 @@ namespace gbe > p->curr.execWidth = 4; > p->curr.predicate = GEN_PREDICATE_NONE; > p->curr.noMask = 1; > - GenRegister ind_src = > GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > - a0[0], new_a0[0] - a0[0]); > + GenRegister ind_src = > + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > + new_a0[0], 0); > GenRegister dst_ = dst; > dst_.type = GEN_TYPE_UB; > dst_.hstride = GEN_HORIZONTAL_STRIDE_1; @@ -385,8 > +383,7 @@ namespace gbe > p->curr.execWidth = 8; > p->curr.predicate = GEN_PREDICATE_NONE; > p->curr.noMask = 1; > - GenRegister ind_src = > GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > - a0[0], new_a0[0] - a0[0]); > + GenRegister ind_src = > + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > + new_a0[0], 0); > p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); > for (int i = 1; i < 4; i++) { > ind_src.addr_imm += 8; > @@ -430,8 +427,7 @@ namespace gbe > p->curr.execWidth = 8; > p->curr.predicate = GEN_PREDICATE_NONE; > p->curr.noMask = 1; > - GenRegister ind_src = > GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > - a0[0], new_a0[0] - a0[0]); > + GenRegister ind_src = > + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), > + new_a0[0], 0); > p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); > for (int i = 1; i < (simd == 8 ? 2 : 4); i++) { > ind_src.addr_imm += 8; > @@ -1951,45 +1947,20 @@ namespace gbe > } > > void GenContext::setA0Content(uint16_t new_a0[16], uint16_t > max_offset, int sz) { > - int16_t diff = new_a0[0] - this->a0[0]; > - > if (sz == 0) > sz = 8; > GBE_ASSERT(sz%4 == 0); > GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096); > - bool need_reset = false; > - for (int i = 1; i < sz; i++) { > - GBE_ASSERT(new_a0[i] >= 0 && new_a0[0] < 4096); > - int16_t d = new_a0[i] - this->a0[i]; > - if (diff != d) { > - need_reset = true; > - break; > - } > - } > > - GBE_ASSERT(a0[0] + diff < 4096 && a0[0] + diff >= 0); > - if (!need_reset && diff >= -512 && diff + max_offset <= 511) { > - return; > - } else if (!need_reset && sz == 8) { > - p->push(); > - p->curr.execWidth = 8; > - p->curr.predicate = GEN_PREDICATE_NONE; > - p->curr.noMask = 1; > - p->ADD(GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), > - GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), > GenRegister::immw(diff)); > - p->pop(); > - } else { > - p->push(); > - p->curr.execWidth = 1; > - p->curr.predicate = GEN_PREDICATE_NONE; > - p->curr.noMask = 1; > - for (int i = 0; i < sz/2; i++) { > - p->MOV(GenRegister::retype(GenRegister::addr1(i*2), > GEN_TYPE_UD), > - GenRegister::immud(new_a0[i*2 + 1] << 16 | new_a0[i*2])); > - } > - p->pop(); > + p->push(); > + p->curr.execWidth = 1; > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.noMask = 1; > + for (int i = 0; i < sz/2; i++) { > + p->MOV(GenRegister::retype(GenRegister::addr1(i*2), > GEN_TYPE_UD), > + GenRegister::immud(new_a0[i*2 + 1] << 16 | > new_a0[i*2])); > } > - memcpy(this->a0, new_a0, sizeof(uint16_t)*sz); > + p->pop(); > } > > BVAR(OCL_OUTPUT_REG_ALLOC, false); > diff --git a/backend/src/backend/gen_context.hpp > b/backend/src/backend/gen_context.hpp > index 6ca88db..560248a 100644 > --- a/backend/src/backend/gen_context.hpp > +++ b/backend/src/backend/gen_context.hpp > @@ -208,7 +208,6 @@ namespace gbe > /*! allocate a new curbe register and insert to curbe pool. */ > void allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t > subValue = 0); > > - uint16_t a0[16]; > virtual void setA0Content(uint16_t new_a0[16], uint16_t max_offset = > 0, int sz = 0); > > private: > -- > 1.7.9.5 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet