This patchset LGTM. But a typo in subject "unalgined" should be "unaligned". And I think it is better to keep the piece of code still in getEffectByteData() now.
-----Original Message----- From: Beignet [mailto:[email protected]] On Behalf Of Zhigang Gong Sent: Thursday, August 28, 2014 10:46 AM To: [email protected] Cc: Gong, Zhigang Subject: [Beignet] [PATCH 2/2] GBE: refine the unalgined data gathering. Save some unecessary duplicate instructions. Signed-off-by: Zhigang Gong <[email protected]> --- backend/src/backend/gen_insn_selection.cpp | 43 +++++++++++++++++------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 1258e54..4be6372 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -2888,7 +2888,9 @@ namespace gbe vector<GenRegister> &effectData, vector<GenRegister> &tmp, uint32_t effectDataNum, - GenRegister addr, + GenRegister shiftL, + GenRegister shiftH, + ir::Register alignedFlag, uint32_t simdWidth) const { using namespace ir; @@ -2899,25 +2901,14 @@ namespace gbe for(uint32_t i = 0; i < effectDataNum; i++) { GenRegister tmpH = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD)); GenRegister tmpL = effectData[i]; - GenRegister shift = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD)); - Register shift1Reg = sel.reg(FAMILY_DWORD); - GenRegister shift1 = GenRegister::udxgrf(simdWidth, shift1Reg); - GenRegister factor = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD)); - sel.AND(shift, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(0x3)); - sel.SHL(shift, shift, GenRegister::immud(0x3)); - sel.SHR(tmpL, tmp[i], shift); - sel.ADD(shift1, GenRegister::negate(shift), GenRegister::immud(32)); + sel.SHR(tmpL, tmp[i], shiftL); sel.push(); - // Only need to consider the tmpH when the shift is not 32. - Register flag = sel.reg(FAMILY_BOOL); - sel.curr.physicalFlag = 0; - sel.curr.modFlag = 1; - sel.curr.predicate = GEN_PREDICATE_NONE; - sel.curr.flagIndex = (uint16_t)flag; - sel.CMP(GEN_CONDITIONAL_NEQ, GenRegister::unpacked_uw(shift1Reg), GenRegister::immuw(32), factor); + // Only need to consider the tmpH when the addr is not aligned. sel.curr.modFlag = 0; + sel.curr.physicalFlag = 0; + sel.curr.flagIndex = (uint16_t)alignedFlag; sel.curr.predicate = GEN_PREDICATE_NORMAL; - sel.SHL(tmpH, tmp[i + 1], shift1); + sel.SHL(tmpH, tmp[i + 1], shiftH); sel.OR(effectData[i], tmpL, tmpH); sel.pop(); } @@ -2978,7 +2969,23 @@ namespace gbe for(uint32_t i = 0; i < effectDataNum; i++) effectData[i] = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD)); - getEffectByteData(sel, effectData, tmp, effectDataNum, address, simdWidth); + Register alignedFlag = sel.reg(FAMILY_BOOL); + GenRegister shiftL = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD)); + Register shiftHReg = sel.reg(FAMILY_DWORD); + GenRegister shiftH = GenRegister::udxgrf(simdWidth, shiftHReg); + sel.push(); + if (simdWidth == 1) + sel.curr.noMask = 1; + sel.AND(shiftL, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(0x3)); + sel.SHL(shiftL, shiftL, GenRegister::immud(0x3)); + sel.ADD(shiftH, GenRegister::negate(shiftL), GenRegister::immud(32)); + sel.curr.physicalFlag = 0; + sel.curr.modFlag = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.flagIndex = (uint16_t)alignedFlag; + sel.CMP(GEN_CONDITIONAL_NEQ, GenRegister::unpacked_uw(shiftHReg), GenRegister::immuw(32)); + sel.pop(); + getEffectByteData(sel, effectData, tmp, effectDataNum, shiftL, shiftH, alignedFlag, simdWidth); for(uint32_t i = 0; i < effectDataNum; i++) { unsigned int elemNum = (valueNum - i * (4 / typeSize)) > 4/typeSize ? -- 1.8.3.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
