This patch fixes the following two things. 1. Use a temporary register as dst register for the CMP instruction in the middle of a block. 2. fix the switch flag for the CMP instruction at the begining of each block. As the compact instruction handling will handle the cmp instruction directly, and will ignore the switch flag which is incorrect.
This patch could get about 1-2% performance gain for luxmark. Signed-off-by: Zhigang Gong <[email protected]> --- backend/src/backend/gen_encoder.cpp | 9 +++- backend/src/backend/gen_insn_selection.cpp | 84 +++++++++++++++++------------- backend/src/backend/gen_insn_selection.hpp | 4 +- backend/src/backend/gen_reg_allocation.cpp | 33 ++++++++---- 4 files changed, 79 insertions(+), 51 deletions(-) diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index aa30b05..ca587dd 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -1148,13 +1148,14 @@ namespace gbe void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst) { if (needToSplitCmp(this, src0, src1) == false) { - if(compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) { + if(!GenRegister::isNull(dst) && compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) { return; } GenNativeInstruction *insn = this->next(GEN_OPCODE_CMP); this->setHeader(insn); insn->header.destreg_or_condmod = conditional; - insn->header.thread_control = GEN_THREAD_SWITCH; + if (GenRegister::isNull(dst)) + insn->header.thread_control = GEN_THREAD_SWITCH; this->setDst(insn, dst); this->setSrc0(insn, src0); this->setSrc1(insn, src1); @@ -1164,6 +1165,8 @@ namespace gbe // Instruction for the first quarter insnQ1 = this->next(GEN_OPCODE_CMP); this->setHeader(insnQ1); + if (GenRegister::isNull(dst)) + insnQ1->header.thread_control = GEN_THREAD_SWITCH; insnQ1->header.quarter_control = GEN_COMPRESSION_Q1; insnQ1->header.execution_size = GEN_WIDTH_8; insnQ1->header.destreg_or_condmod = conditional; @@ -1174,6 +1177,8 @@ namespace gbe // Instruction for the second quarter insnQ2 = this->next(GEN_OPCODE_CMP); this->setHeader(insnQ2); + if (GenRegister::isNull(dst)) + insnQ2->header.thread_control = GEN_THREAD_SWITCH; insnQ2->header.quarter_control = GEN_COMPRESSION_Q2; insnQ2->header.execution_size = GEN_WIDTH_8; insnQ2->header.destreg_or_condmod = conditional; diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 0cb633f..ed69bd0 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -311,9 +311,9 @@ namespace gbe /*! Implement public class */ INLINE uint32_t getVectorNum(void) const { return this->vectorNum; } /*! Implement public class */ - INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID); + INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov); /*! Implement public class */ - INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID); + INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov); /*! spill a register (insert spill/unspill instructions) */ INLINE bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool); /*! indicate whether a register is a scalar/uniform register. */ @@ -843,48 +843,56 @@ namespace gbe return true; } - ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID) { + ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) { SelectionBlock *block = insn->parent; const uint32_t simdWidth = insn->state.execWidth; ir::Register tmp; + GenRegister gr; // This will append the temporary register in the instruction block this->block = block; - tmp = this->reg(ir::FAMILY_DWORD); - - // Generate the MOV instruction and replace the register in the instruction - SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); - mov->src(0) = GenRegister::retype(insn->src(regID), GEN_TYPE_F); - mov->state = GenInstructionState(simdWidth); - if (this->isScalarReg(insn->src(regID).reg())) - mov->state.noMask = 1; - insn->src(regID) = mov->dst(0) = GenRegister::fxgrf(simdWidth, tmp); - insn->prepend(*mov); + tmp = this->reg(ir::getFamily(type), simdWidth == 1); + gr = this->selReg(tmp, type); + if (needMov) { + // Generate the MOV instruction and replace the register in the instruction + SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); + mov->src(0) = GenRegister::retype(insn->src(regID), gr.type); + mov->state = GenInstructionState(simdWidth); + if (this->isScalarReg(insn->src(regID).reg())) + mov->state.noMask = 1; + mov->dst(0) = gr; + insn->prepend(*mov); + } + insn->src(regID) = gr; return tmp; } - ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID) { + ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) { SelectionBlock *block = insn->parent; - uint32_t simdWidth = this->isScalarReg(insn->dst(regID).reg()) ? 1 : insn->state.execWidth; + uint32_t simdWidth; + if (!GenRegister::isNull(insn->dst(regID))) + simdWidth = this->isScalarReg(insn->dst(regID).reg()) ? 1 : insn->state.execWidth; + else { + GBE_ASSERT(needMov == false); + simdWidth = insn->state.execWidth; + } ir::Register tmp; - ir::RegisterFamily f = file.get(insn->dst(regID).reg()).family; - int genType = f == ir::FAMILY_QWORD ? GEN_TYPE_DF : GEN_TYPE_F; GenRegister gr; - - // This will append the temporary register in the instruction block this->block = block; - tmp = this->reg(f); - + tmp = this->reg(ir::getFamily(type)); + gr = this->selReg(tmp, type); + if (needMov) { // Generate the MOV instruction and replace the register in the instruction - SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); - mov->dst(0) = GenRegister::retype(insn->dst(regID), genType); - mov->state = GenInstructionState(simdWidth); - if (simdWidth == 1) - mov->state.noMask = 1; - gr = f == ir::FAMILY_QWORD ? GenRegister::dfxgrf(simdWidth, tmp) : GenRegister::fxgrf(simdWidth, tmp); - insn->dst(regID) = mov->src(0) = gr; - insn->append(*mov); + SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); + mov->dst(0) = GenRegister::retype(insn->dst(regID), gr.type); + mov->state = GenInstructionState(simdWidth); + if (simdWidth == 1) + mov->state.noMask = 1; + mov->src(0) = gr; + insn->append(*mov); + } + insn->dst(regID) = gr; return tmp; } @@ -1625,12 +1633,12 @@ namespace gbe return this->opaque->getRegisterData(reg); } - ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t regID) { - return this->opaque->replaceSrc(insn, regID); + ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) { + return this->opaque->replaceSrc(insn, regID, type, needMov); } - ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t regID) { - return this->opaque->replaceDst(insn, regID); + ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) { + return this->opaque->replaceDst(insn, regID, type, needMov); } bool Selection::spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool) { return this->opaque->spillRegs(spilledRegs, registerPool); @@ -2895,7 +2903,7 @@ namespace gbe type == TYPE_DOUBLE || type == TYPE_FLOAT || type == TYPE_U32 || type == TYPE_S32 /*|| (!needStoreBool)*/) - tmpDst = GenRegister::nullud(); + tmpDst = GenRegister::retype(GenRegister::null(), GEN_TYPE_F); else tmpDst = sel.selReg(dst, TYPE_BOOL); @@ -2952,7 +2960,7 @@ namespace gbe // the dst to null register. And let the flag reg allocation // function to generate the flag grf on demand correctly latter. sel.curr.flagGen = needStoreBool; - tmpDst = GenRegister::nullud(); + tmpDst = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW); } sel.CMP(getGenCompare(opcode), src0, src1, tmpDst); } @@ -3280,7 +3288,8 @@ namespace gbe sel.push(); sel.curr.noMask = 1; sel.curr.predicate = GEN_PREDICATE_NONE; - sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); + sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1, + GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); sel.pop(); if (sel.block->hasBarrier) { @@ -3293,7 +3302,8 @@ namespace gbe sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL)); sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.noMask = 1; - sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL)); + sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL), + GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); if (simdWidth == 8) sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H; else if (simdWidth == 16) diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index df0a10e..1f48b23 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -220,9 +220,9 @@ namespace gbe /*! Get the data for the given register */ ir::RegisterData getRegisterData(ir::Register reg) const; /*! Replace a source by the returned temporary register */ - ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID); + ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type = ir::TYPE_FLOAT, bool needMov = true); /*! Replace a destination to the returned temporary register */ - ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID); + ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type = ir::TYPE_FLOAT, bool needMov = true); /*! spill a register (insert spill/unspill instructions) */ bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool); /*! Indicate if a register is scalar or not */ diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index 880a267..8349e9a 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -188,6 +188,21 @@ namespace gbe INLINE bool spillReg(ir::Register reg, bool isAllocated = false); INLINE bool vectorCanSpill(SelectionVector *vector); INLINE void allocateScratchForSpilled(); + + /*! replace specified source/dst register with temporary register and update interval */ + INLINE ir::Register replaceReg(Selection &sel, SelectionInstruction *insn, + uint32_t regID, bool isSrc, + ir::Type type = ir::TYPE_FLOAT, bool needMov = true) { + ir::Register reg; + if (isSrc) + reg = sel.replaceSrc(insn, regID, type, needMov); + else + reg = sel.replaceDst(insn, regID, type, needMov); + intervals.push_back(reg); + intervals[reg].minID = insn->ID; + intervals[reg].maxID = insn->ID; + return reg; + } /*! Use custom allocator */ GBE_CLASS(Opaque); }; @@ -301,15 +316,9 @@ namespace gbe // the MOVs else { ir::Register tmp; - if (vector->isSrc) - tmp = selection.replaceSrc(vector->insn, regID); - else - tmp = selection.replaceDst(vector->insn, regID); + tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc); const VectorLocation location = std::make_pair(vector, regID); this->vectorMap.insert(std::make_pair(tmp, location)); - intervals.push_back(tmp); - intervals[tmp].minID = vector->insn->ID; - intervals[tmp].maxID = vector->insn->ID; } } } @@ -590,12 +599,16 @@ namespace gbe if (insn.state.predicate != GEN_PREDICATE_NONE) validateFlag(selection, insn); } - // This is a CMP for a pure flag booleans, we don't need to write result to // the grf. And latter, we will not allocate grf for it. if (insn.opcode == SEL_OP_CMP && - flagBooleans.contains((ir::Register)(insn.dst(0).value.reg))) - insn.dst(0) = GenRegister::null(); + (flagBooleans.contains(insn.dst(0).reg()) || + GenRegister::isNull(insn.dst(0)))) { + // set a temporary register to avoid switch in this block. + bool isSrc = false; + bool needMov = false; + this->replaceReg(selection, &insn, 0, isSrc, ir::TYPE_FLOAT, needMov); + } // If the instruction requires to generate (CMP for long/int/float..) // the flag value to the register, and it's not a pure flag boolean, // we need to use SEL instruction to generate the flag value to the UW8 -- 1.8.3.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
