Ping for review.
On Mon, May 19, 2014 at 12:33:06PM +0800, Zhigang Gong wrote: > This patch fixes the following two things. > 1. Use a temporary register as dst register for the CMP > instruction in the middle of a block. > 2. fix the switch flag for the CMP instruction at the begining > of each block. As the compact instruction handling will handle > the cmp instruction directly, and will ignore the switch > flag which is incorrect. > > This patch could get about 1-2% performance gain for luxmark. > > Signed-off-by: Zhigang Gong <[email protected]> > --- > backend/src/backend/gen_encoder.cpp | 9 +++- > backend/src/backend/gen_insn_selection.cpp | 84 > +++++++++++++++++------------- > backend/src/backend/gen_insn_selection.hpp | 4 +- > backend/src/backend/gen_reg_allocation.cpp | 33 ++++++++---- > 4 files changed, 79 insertions(+), 51 deletions(-) > > diff --git a/backend/src/backend/gen_encoder.cpp > b/backend/src/backend/gen_encoder.cpp > index aa30b05..ca587dd 100644 > --- a/backend/src/backend/gen_encoder.cpp > +++ b/backend/src/backend/gen_encoder.cpp > @@ -1148,13 +1148,14 @@ namespace gbe > > void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister > src1, GenRegister dst) { > if (needToSplitCmp(this, src0, src1) == false) { > - if(compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, > false)) { > + if(!GenRegister::isNull(dst) && compactAlu2(this, GEN_OPCODE_CMP, dst, > src0, src1, conditional, false)) { > return; > } > GenNativeInstruction *insn = this->next(GEN_OPCODE_CMP); > this->setHeader(insn); > insn->header.destreg_or_condmod = conditional; > - insn->header.thread_control = GEN_THREAD_SWITCH; > + if (GenRegister::isNull(dst)) > + insn->header.thread_control = GEN_THREAD_SWITCH; > this->setDst(insn, dst); > this->setSrc0(insn, src0); > this->setSrc1(insn, src1); > @@ -1164,6 +1165,8 @@ namespace gbe > // Instruction for the first quarter > insnQ1 = this->next(GEN_OPCODE_CMP); > this->setHeader(insnQ1); > + if (GenRegister::isNull(dst)) > + insnQ1->header.thread_control = GEN_THREAD_SWITCH; > insnQ1->header.quarter_control = GEN_COMPRESSION_Q1; > insnQ1->header.execution_size = GEN_WIDTH_8; > insnQ1->header.destreg_or_condmod = conditional; > @@ -1174,6 +1177,8 @@ namespace gbe > // Instruction for the second quarter > insnQ2 = this->next(GEN_OPCODE_CMP); > this->setHeader(insnQ2); > + if (GenRegister::isNull(dst)) > + insnQ2->header.thread_control = GEN_THREAD_SWITCH; > insnQ2->header.quarter_control = GEN_COMPRESSION_Q2; > insnQ2->header.execution_size = GEN_WIDTH_8; > insnQ2->header.destreg_or_condmod = conditional; > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 0cb633f..ed69bd0 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -311,9 +311,9 @@ namespace gbe > /*! Implement public class */ > INLINE uint32_t getVectorNum(void) const { return this->vectorNum; } > /*! Implement public class */ > - INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t > regID); > + INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t > regID, ir::Type type, bool needMov); > /*! Implement public class */ > - INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t > regID); > + INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t > regID, ir::Type type, bool needMov); > /*! spill a register (insert spill/unspill instructions) */ > INLINE bool spillRegs(const SpilledRegs &spilledRegs, uint32_t > registerPool); > /*! indicate whether a register is a scalar/uniform register. */ > @@ -843,48 +843,56 @@ namespace gbe > return true; > } > > - ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, > uint32_t regID) { > + ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, > uint32_t regID, ir::Type type, bool needMov) { > SelectionBlock *block = insn->parent; > const uint32_t simdWidth = insn->state.execWidth; > ir::Register tmp; > + GenRegister gr; > > // This will append the temporary register in the instruction block > this->block = block; > - tmp = this->reg(ir::FAMILY_DWORD); > - > - // Generate the MOV instruction and replace the register in the > instruction > - SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); > - mov->src(0) = GenRegister::retype(insn->src(regID), GEN_TYPE_F); > - mov->state = GenInstructionState(simdWidth); > - if (this->isScalarReg(insn->src(regID).reg())) > - mov->state.noMask = 1; > - insn->src(regID) = mov->dst(0) = GenRegister::fxgrf(simdWidth, tmp); > - insn->prepend(*mov); > + tmp = this->reg(ir::getFamily(type), simdWidth == 1); > + gr = this->selReg(tmp, type); > + if (needMov) { > + // Generate the MOV instruction and replace the register in the > instruction > + SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); > + mov->src(0) = GenRegister::retype(insn->src(regID), gr.type); > + mov->state = GenInstructionState(simdWidth); > + if (this->isScalarReg(insn->src(regID).reg())) > + mov->state.noMask = 1; > + mov->dst(0) = gr; > + insn->prepend(*mov); > + } > + insn->src(regID) = gr; > > return tmp; > } > > - ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, > uint32_t regID) { > + ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, > uint32_t regID, ir::Type type, bool needMov) { > SelectionBlock *block = insn->parent; > - uint32_t simdWidth = this->isScalarReg(insn->dst(regID).reg()) ? 1 : > insn->state.execWidth; > + uint32_t simdWidth; > + if (!GenRegister::isNull(insn->dst(regID))) > + simdWidth = this->isScalarReg(insn->dst(regID).reg()) ? 1 : > insn->state.execWidth; > + else { > + GBE_ASSERT(needMov == false); > + simdWidth = insn->state.execWidth; > + } > ir::Register tmp; > - ir::RegisterFamily f = file.get(insn->dst(regID).reg()).family; > - int genType = f == ir::FAMILY_QWORD ? GEN_TYPE_DF : GEN_TYPE_F; > GenRegister gr; > - > - // This will append the temporary register in the instruction block > this->block = block; > - tmp = this->reg(f); > - > + tmp = this->reg(ir::getFamily(type)); > + gr = this->selReg(tmp, type); > + if (needMov) { > // Generate the MOV instruction and replace the register in the > instruction > - SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); > - mov->dst(0) = GenRegister::retype(insn->dst(regID), genType); > - mov->state = GenInstructionState(simdWidth); > - if (simdWidth == 1) > - mov->state.noMask = 1; > - gr = f == ir::FAMILY_QWORD ? GenRegister::dfxgrf(simdWidth, tmp) : > GenRegister::fxgrf(simdWidth, tmp); > - insn->dst(regID) = mov->src(0) = gr; > - insn->append(*mov); > + SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); > + mov->dst(0) = GenRegister::retype(insn->dst(regID), gr.type); > + mov->state = GenInstructionState(simdWidth); > + if (simdWidth == 1) > + mov->state.noMask = 1; > + mov->src(0) = gr; > + insn->append(*mov); > + } > + insn->dst(regID) = gr; > return tmp; > } > > @@ -1625,12 +1633,12 @@ namespace gbe > return this->opaque->getRegisterData(reg); > } > > - ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t > regID) { > - return this->opaque->replaceSrc(insn, regID); > + ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t > regID, ir::Type type, bool needMov) { > + return this->opaque->replaceSrc(insn, regID, type, needMov); > } > > - ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t > regID) { > - return this->opaque->replaceDst(insn, regID); > + ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t > regID, ir::Type type, bool needMov) { > + return this->opaque->replaceDst(insn, regID, type, needMov); > } > bool Selection::spillRegs(const SpilledRegs &spilledRegs, uint32_t > registerPool) { > return this->opaque->spillRegs(spilledRegs, registerPool); > @@ -2895,7 +2903,7 @@ namespace gbe > type == TYPE_DOUBLE || type == TYPE_FLOAT || > type == TYPE_U32 || type == TYPE_S32 /*|| > (!needStoreBool)*/) > - tmpDst = GenRegister::nullud(); > + tmpDst = GenRegister::retype(GenRegister::null(), GEN_TYPE_F); > else > tmpDst = sel.selReg(dst, TYPE_BOOL); > > @@ -2952,7 +2960,7 @@ namespace gbe > // the dst to null register. And let the flag reg allocation > // function to generate the flag grf on demand correctly latter. > sel.curr.flagGen = needStoreBool; > - tmpDst = GenRegister::nullud(); > + tmpDst = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW); > } > sel.CMP(getGenCompare(opcode), src0, src1, tmpDst); > } > @@ -3280,7 +3288,8 @@ namespace gbe > sel.push(); > sel.curr.noMask = 1; > sel.curr.predicate = GEN_PREDICATE_NONE; > - sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), > src1, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); > + sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), > src1, > + GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); > sel.pop(); > > if (sel.block->hasBarrier) { > @@ -3293,7 +3302,8 @@ namespace gbe > sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), > GenRegister::immuw(GEN_MAX_LABEL)); > sel.curr.predicate = GEN_PREDICATE_NONE; > sel.curr.noMask = 1; > - sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, > GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL)); > + sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, > GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL), > + GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); > if (simdWidth == 8) > sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H; > else if (simdWidth == 16) > diff --git a/backend/src/backend/gen_insn_selection.hpp > b/backend/src/backend/gen_insn_selection.hpp > index df0a10e..1f48b23 100644 > --- a/backend/src/backend/gen_insn_selection.hpp > +++ b/backend/src/backend/gen_insn_selection.hpp > @@ -220,9 +220,9 @@ namespace gbe > /*! Get the data for the given register */ > ir::RegisterData getRegisterData(ir::Register reg) const; > /*! Replace a source by the returned temporary register */ > - ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID); > + ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID, > ir::Type type = ir::TYPE_FLOAT, bool needMov = true); > /*! Replace a destination to the returned temporary register */ > - ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID); > + ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID, > ir::Type type = ir::TYPE_FLOAT, bool needMov = true); > /*! spill a register (insert spill/unspill instructions) */ > bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool); > /*! Indicate if a register is scalar or not */ > diff --git a/backend/src/backend/gen_reg_allocation.cpp > b/backend/src/backend/gen_reg_allocation.cpp > index 880a267..8349e9a 100644 > --- a/backend/src/backend/gen_reg_allocation.cpp > +++ b/backend/src/backend/gen_reg_allocation.cpp > @@ -188,6 +188,21 @@ namespace gbe > INLINE bool spillReg(ir::Register reg, bool isAllocated = false); > INLINE bool vectorCanSpill(SelectionVector *vector); > INLINE void allocateScratchForSpilled(); > + > + /*! replace specified source/dst register with temporary register and > update interval */ > + INLINE ir::Register replaceReg(Selection &sel, SelectionInstruction > *insn, > + uint32_t regID, bool isSrc, > + ir::Type type = ir::TYPE_FLOAT, bool > needMov = true) { > + ir::Register reg; > + if (isSrc) > + reg = sel.replaceSrc(insn, regID, type, needMov); > + else > + reg = sel.replaceDst(insn, regID, type, needMov); > + intervals.push_back(reg); > + intervals[reg].minID = insn->ID; > + intervals[reg].maxID = insn->ID; > + return reg; > + } > /*! Use custom allocator */ > GBE_CLASS(Opaque); > }; > @@ -301,15 +316,9 @@ namespace gbe > // the MOVs > else { > ir::Register tmp; > - if (vector->isSrc) > - tmp = selection.replaceSrc(vector->insn, regID); > - else > - tmp = selection.replaceDst(vector->insn, regID); > + tmp = this->replaceReg(selection, vector->insn, regID, > vector->isSrc); > const VectorLocation location = std::make_pair(vector, regID); > this->vectorMap.insert(std::make_pair(tmp, location)); > - intervals.push_back(tmp); > - intervals[tmp].minID = vector->insn->ID; > - intervals[tmp].maxID = vector->insn->ID; > } > } > } > @@ -590,12 +599,16 @@ namespace gbe > if (insn.state.predicate != GEN_PREDICATE_NONE) > validateFlag(selection, insn); > } > - > // This is a CMP for a pure flag booleans, we don't need to write > result to > // the grf. And latter, we will not allocate grf for it. > if (insn.opcode == SEL_OP_CMP && > - flagBooleans.contains((ir::Register)(insn.dst(0).value.reg))) > - insn.dst(0) = GenRegister::null(); > + (flagBooleans.contains(insn.dst(0).reg()) || > + GenRegister::isNull(insn.dst(0)))) { > + // set a temporary register to avoid switch in this block. > + bool isSrc = false; > + bool needMov = false; > + this->replaceReg(selection, &insn, 0, isSrc, ir::TYPE_FLOAT, > needMov); > + } > // If the instruction requires to generate (CMP for > long/int/float..) > // the flag value to the register, and it's not a pure flag > boolean, > // we need to use SEL instruction to generate the flag value to > the UW8 > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
