From: Pan Xiuli <xiuli....@intel.com> Bool values can just be flag registers and some operations need grf register to be involved. So we add two kinds of helper register BOOL_BIT and BOOL_UW to handle liveout bool values and bool operations.
Signed-off-by: Pan Xiuli <xiuli....@intel.com> --- backend/src/backend/gen_insn_selection.cpp | 43 +++- .../src/backend/gen_insn_selection_optimize.cpp | 4 + backend/src/backend/gen_reg_allocation.cpp | 151 +++++------- backend/src/backend/gen_register.hpp | 2 +- backend/src/ir/context.hpp | 2 +- backend/src/ir/function.hpp | 3 + backend/src/ir/instruction.cpp | 11 +- backend/src/ir/instruction.hpp | 2 + backend/src/ir/register.cpp | 2 + backend/src/ir/register.hpp | 6 +- backend/src/llvm/llvm_gen_backend.cpp | 257 +++++++++++++++++++++ 11 files changed, 370 insertions(+), 113 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 41ef7b8..aefae5e 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -1256,6 +1256,8 @@ namespace gbe SEL_REG(ul16grf, ul8grf, ul1grf); } break; + case FAMILY_BOOL_BIT: SEL_REG(uw1grf, uw1grf, uw1grf); break; + case FAMILY_BOOL_UW: SEL_REG(uw16grf, uw8grf, uw1grf); break; default: NOT_SUPPORTED; } GBE_ASSERT(false); @@ -3105,13 +3107,25 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp { sel.push(); auto dag = sel.regDAG[insn.getDst(0)]; - if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL && - dag->isUsed) { - sel.curr.physicalFlag = 0; - sel.curr.flagIndex = insn.getDst(0).value(); - sel.curr.modFlag = 1; - } - sel.MOV(dst, src); + // BOOL now is flag register, we need handle these situtaion carefully + if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL) { + sel.curr.execWidth = 1; + sel.curr.noMask = 1; + sel.curr.physicalFlag = 0; + sel.curr.flagIndex = insn.getDst(0).value(); + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.modFlag = 1; + } + else if (sel.getRegisterFamily(insn.getSrc(0)) == ir::FAMILY_BOOL || + sel.getRegisterFamily(insn.getSrc(0)) == ir::FAMILY_BOOL_BIT) + { + sel.curr.noMask = 1; + sel.curr.execWidth = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.modFlag = 1; + } + + sel.MOV(dst, src); sel.pop(); } break; @@ -3384,8 +3398,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp bool inverse = false; sel.getSrcGenRegImm(dag, src0, src1, type, inverse); // Output the binary instruction - if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL && - dag.isUsed) { + if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL ) { GBE_ASSERT(insn.getOpcode() == OP_AND || insn.getOpcode() == OP_OR || insn.getOpcode() == OP_XOR); @@ -3394,6 +3407,16 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp sel.curr.modFlag = 1; } + if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL_BIT ) { + GBE_ASSERT(insn.getOpcode() == OP_AND || + insn.getOpcode() == OP_OR || + insn.getOpcode() == OP_XOR); + sel.curr.execWidth = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.noMask = 1; + sel.curr.modFlag = 1; + } + switch (opcode) { case OP_ADD: if ((type == Type::TYPE_U64 || type == Type::TYPE_S64) && !sel.hasLongType()) { @@ -5393,8 +5416,6 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp const ir::Liveness &liveness = sel.ctx.getLiveness(); const ir::Liveness::LiveOut &liveOut = liveness.getLiveOut(curr); bool needStoreBool = false; - if (liveOut.contains(dst) || dag.computeBool) - needStoreBool = true; // why we set the tmpDst to null? // because for the listed type compare instruction could not diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp b/backend/src/backend/gen_insn_selection_optimize.cpp index d2e0fb9..d60ed41 100644 --- a/backend/src/backend/gen_insn_selection_optimize.cpp +++ b/backend/src/backend/gen_insn_selection_optimize.cpp @@ -162,6 +162,10 @@ namespace gbe assert(insn.opcode == SEL_OP_MOV); const GenRegister& src = insn.src(0); const GenRegister& dst = insn.dst(0); + + if ( dst.file == GEN_GENERAL_REGISTER_FILE && ctx.sel->getRegisterFamily(dst.reg()) == ir::FAMILY_BOOL) + return; + if (src.type != dst.type || src.file != dst.file) return; diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index 9183a24..193e75c 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -87,14 +87,16 @@ namespace gbe INLINE void getRegAttrib(ir::Register reg, uint32_t ®Size, ir::RegisterFamily *regFamily = NULL) const { // Note that byte vector registers use two bytes per byte (and can be // interleaved) - static const size_t familyVectorSize[] = {2,2,2,4,8,16,32}; - static const size_t familyScalarSize[] = {2,2,2,4,8,16,32}; + static const size_t familyVectorSize[] = {2,2,2,4,8,16,32,4,2,2}; + static const size_t familyScalarSize[] = {2,2,2,4,8,16,32,4,2,2}; using namespace ir; const bool isScalar = ctx.sel->isScalarReg(reg); const RegisterData regData = ctx.sel->getRegisterData(reg); const RegisterFamily family = regData.family; if (family == ir::FAMILY_REG) regSize = 32; + else if (family == ir::FAMILY_BOOL_BIT || family == ir::FAMILY_BOOL) + regSize = 2; else { const uint32_t typeSize = isScalar ? familyScalarSize[family] : familyVectorSize[family]; regSize = isScalar ? typeSize : ctx.getSimdWidth() * typeSize; @@ -480,8 +482,8 @@ namespace gbe insn.opcode == SEL_OP_XOR)) #define IS_SCALAR_FLAG(insn) selection.isScalarReg(ir::Register(insn.state.flagIndex)) - #define GET_FLAG_REG(insn) GenRegister::uwxgrf(IS_SCALAR_FLAG(insn) ? 1 : 8,\ - ir::Register(insn.state.flagIndex)); + + #define GET_FLAG_REG(insn) GenRegister::uw1grf(ir::Register(insn.state.flagIndex)); #define IS_TEMP_FLAG(insn) (insn.state.flag == 0 && insn.state.subFlag == 1) #define NEED_DST_GRF_TYPE_FIX(ty) \ (ty == GEN_TYPE_F || \ @@ -489,39 +491,6 @@ namespace gbe ty == GEN_TYPE_DF || \ ty == GEN_TYPE_UL || \ ty == GEN_TYPE_L) - // Flag is a virtual flag, this function is to validate the virtual flag - // to a physical flag. It is used to validate both temporary flag and the - // non-temporary flag registers. - // We track the last temporary validate register, if it's the same as - // current, we can avoid the revalidation. - void GenRegAllocator::Opaque::validateFlag(Selection &selection, - SelectionInstruction &insn) { - GBE_ASSERT(insn.state.physicalFlag == 1); - if (!IS_TEMP_FLAG(insn) && validatedFlags.find(insn.state.flagIndex) != validatedFlags.end()) - return; - else if (IS_TEMP_FLAG(insn) && validTempFlagReg == insn.state.flagIndex) - return; - SelectionInstruction *cmp0 = selection.create(SEL_OP_CMP, 1, 2); - cmp0->state = GenInstructionState(ctx.getSimdWidth()); - cmp0->state.flag = insn.state.flag; - cmp0->state.subFlag = insn.state.subFlag; - if (IS_SCALAR_FLAG(insn)) - cmp0->state.noMask = 1; - cmp0->src(0) = GET_FLAG_REG(insn); - cmp0->src(1) = GenRegister::immuw(0); - cmp0->dst(0) = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW); - cmp0->extra.function = GEN_CONDITIONAL_NEQ; - insn.prepend(*cmp0); - if (!IS_TEMP_FLAG(insn)) - validatedFlags.insert(insn.state.flagIndex); - else { - if (insn.state.modFlag == 0) - validTempFlagReg = insn.state.flagIndex; - else - validTempFlagReg = 0; - } - } - void GenRegAllocator::Opaque::allocateFlags(Selection &selection) { // Previously, we have a global flag allocation implemntation. @@ -653,53 +622,61 @@ namespace gbe // Patch the predicate now. Note that only compares actually modify it (it // is called a "conditional modifier"). The other instructions just read // it - if (insn.state.physicalFlag == 0) { - // SEL.bool instruction, the dst register should be stored in GRF - // the pred flag is used by flag register - if (insn.opcode == SEL_OP_SEL) { - ir::Register dst = insn.dst(0).reg(); - if (ctx.sel->getRegisterFamily(dst) == ir::FAMILY_BOOL && - allocatedFlags.find(dst) != allocatedFlags.end()) - allocatedFlags.erase(dst); + if (IS_IMPLICITLY_MOD_FLAG(insn)) + { + //outputSelectionInst(insn); + const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum; + for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { + const GenRegister &selReg = insn.src(srcID); + const ir::Register reg = selReg.reg(); + if (selReg.file != GEN_GENERAL_REGISTER_FILE) + continue; + if (ctx.sel->getRegisterFamily(reg) == ir::FAMILY_BOOL); + { + auto it = allocatedFlags.find(reg); + if (it != allocatedFlags.end()) + insn.src(srcID) = GenRegister::flag(it->second / 2 ,it->second & 1); + } } + for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { + const GenRegister &selReg = insn.dst(dstID); + const ir::Register reg = selReg.reg(); + if (selReg.file != GEN_GENERAL_REGISTER_FILE) + continue; + if (ctx.sel->getRegisterFamily(reg) == ir::FAMILY_BOOL); + { + auto it = allocatedFlags.find(reg); + if (it != allocatedFlags.end()) + insn.dst(dstID) = GenRegister::flag(it->second / 2 ,it->second & 1); + } + } + } + + if (insn.state.physicalFlag == 0) { auto it = allocatedFlags.find(ir::Register(insn.state.flagIndex)); if (it != allocatedFlags.end()) { insn.state.physicalFlag = 1; insn.state.flag = it->second / 2; insn.state.subFlag = it->second & 1; - // modFlag is for the LOADI/MOV/AND/OR/XOR instructions which will modify a - // flag register. We set the condition for them to save one instruction if possible. - if (IS_IMPLICITLY_MOD_FLAG(insn)) { - // If this is a modFlag on a scalar bool, we need to remove it - // from the allocated flags map. Then latter, the user could - // validate the flag from the scalar value correctly. - // The reason is we can not predicate the active channel when we - // need to use this flag. - if (IS_SCALAR_FLAG(insn)) { - allocatedFlags.erase(ir::Register(insn.state.flagIndex)); - continue; - } - insn.extra.function = GEN_CONDITIONAL_NEQ; - } - // If this is an external bool, we need to validate it if it is not validated yet. - if ((insn.state.externFlag && - insn.state.predicate != GEN_PREDICATE_NONE)) - validateFlag(selection, insn); } else { insn.state.physicalFlag = 1; insn.state.flag = 0; insn.state.subFlag = 1; - // If this is for MOV/AND/OR/... we don't need to waste an extra instruction - // to generate the flag here, just continue to next instruction. And the validTempFlagReg - // will not be destroyed. - if (IS_IMPLICITLY_MOD_FLAG(insn)) - continue; // This bool doesn't have a deadicated flag, we use temporary flag here. // each time we need to validate it from the grf register. if (insn.state.predicate != GEN_PREDICATE_NONE) - validateFlag(selection, insn); + { + SelectionInstruction *movf = selection.create(SEL_OP_MOV, 1, 1); + movf->state = GenInstructionState(1); + movf->state.noMask = 1; + movf->state.predicate = GEN_PREDICATE_NONE; + movf->state.execWidth = 1; + movf->src(0) = GenRegister::uw1grf(ir::Register(insn.state.flagIndex)); + movf->dst(0) = GenRegister::flag(insn.state.flag,insn.state.subFlag); + insn.prepend(*movf); + } } if (insn.opcode == SEL_OP_CMP && (flagBooleans.contains(insn.dst(0).reg()) || @@ -729,35 +706,15 @@ namespace gbe // register. if (insn.state.flagGen == 1 && !flagBooleans.contains((ir::Register)(insn.state.flagIndex))) { - SelectionInstruction *sel0 = selection.create(SEL_OP_SEL, 1, 2); - uint32_t simdWidth; - simdWidth = IS_SCALAR_FLAG(insn) ? 1 : ctx.getSimdWidth(); - - sel0->state = GenInstructionState(simdWidth); - if (IS_SCALAR_FLAG(insn)) - sel0->state.noMask = 1; - sel0->state.flag = insn.state.flag; - sel0->state.subFlag = insn.state.subFlag; - sel0->state.predicate = GEN_PREDICATE_NORMAL; - sel0->src(0) = GenRegister::uw1grf(ir::ocl::one); - sel0->src(1) = GenRegister::uw1grf(ir::ocl::zero); - sel0->dst(0) = GET_FLAG_REG(insn); - liveInSet01.insert(insn.parent->bb); - insn.append(*sel0); - // We use the zero one after the liveness analysis, we have to update - // the liveness data manually here. - GenRegInterval &interval0 = intervals[ir::ocl::zero]; - GenRegInterval &interval1 = intervals[ir::ocl::one]; - interval0.minID = std::min(interval0.minID, (int32_t)insn.ID); - interval0.maxID = std::max(interval0.maxID, (int32_t)insn.ID); - interval1.minID = std::min(interval1.minID, (int32_t)insn.ID); - interval1.maxID = std::max(interval1.maxID, (int32_t)insn.ID); + + SelectionInstruction *movg = selection.create(SEL_OP_MOV, 1, 1); + movg->state = GenInstructionState(1); + movg->state.predicate = GEN_PREDICATE_NONE; + movg->state.noMask = 1; + movg->src(0) = GenRegister::flag(insn.state.flag, insn.state.subFlag); + movg->dst(0) = GenRegister::uw1grf(ir::Register(insn.state.flagIndex)); + insn.append(*movg); } - } else { - // If the instruction use the temporary flag register manually, - // we should invalidate the temp flag reg here. - if (insn.state.flag == 0 && insn.state.subFlag == 1) - validTempFlagReg = 0; } } } diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp index 6c73f5e..da58805 100644 --- a/backend/src/backend/gen_register.hpp +++ b/backend/src/backend/gen_register.hpp @@ -120,7 +120,7 @@ namespace gbe this->noMask = 0; this->flag = 0; this->subFlag = 0; - this->grfFlag = 1; + this->grfFlag = 0; this->externFlag = 0; this->modFlag = 0; this->flagGen = 0; diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp index 877d639..6945bfe 100644 --- a/backend/src/ir/context.hpp +++ b/backend/src/ir/context.hpp @@ -142,7 +142,7 @@ namespace ir { /*! Append a new tuple */ template <typename... Args> INLINE Tuple tuple(Args...args) { GBE_ASSERTM(fn != NULL, "No function currently defined"); - return fn->file.appendTuple(args...); + return fn->makeTuple(args...); } /*! Make a tuple from an array of register */ INLINE Tuple arrayTuple(const Register *reg, uint32_t regNum) { diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index 64d9727..ce7412b 100644 --- a/backend/src/ir/function.hpp +++ b/backend/src/ir/function.hpp @@ -563,6 +563,9 @@ namespace ir { INLINE bool setUseDeviceEnqueue(bool useDeviceEnqueue) { return this->useDeviceEnqueue = useDeviceEnqueue; } + template <typename... Args> INLINE Tuple makeTuple(Args...args) { + return this->file.appendTuple(args...); + } private: friend class Context; //!< Can freely modify a function std::string name; //!< Function name diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index a9156ff..7525138 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -2290,7 +2290,7 @@ END_FUNCTION(Instruction, Register) #if GBE_DEBUG const RegisterData oldData = this->getDstData(dstID); const RegisterData newData = fn.getRegisterData(reg); - GBE_ASSERT(oldData.family == newData.family); + GBE_ASSERT(oldData.family == newData.family || oldData.family == FAMILY_BOOL || oldData.family == FAMILY_BOOL_BIT); #endif /* GBE_DEBUG */ const Opcode op = this->getOpcode(); switch (op) { @@ -2339,6 +2339,15 @@ END_FUNCTION(Instruction, Register) *new_ins = insn; } + void Instruction::insertbefore(Instruction *next, Instruction ** new_ins) { + Function &fn = next->getFunction(); + Instruction *insn = fn.newInstruction(*this); + insn->parent = next->parent; + prepend(insn, next); + if (new_ins) + *new_ins = insn; + } + bool Instruction::hasSideEffect(void) const { return opcode == OP_STORE || opcode == OP_TYPED_WRITE || diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 8685dd4..9dd775b 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -209,6 +209,8 @@ namespace ir { void remove(void); /* Insert the instruction after the previous one. */ void insert(Instruction *prev, Instruction ** new_ins = NULL); + /* Insert the instruction before the next one. */ + void insertbefore(Instruction *next, Instruction ** new_ins = NULL); void setDBGInfo(DebugInfo in) { DBGInfo = in; } /*! Indicates if the instruction belongs to instruction type T. Typically, T * can be BinaryInstruction, UnaryInstruction, LoadInstruction and so on diff --git a/backend/src/ir/register.cpp b/backend/src/ir/register.cpp index 1e78722..ec59e61 100644 --- a/backend/src/ir/register.cpp +++ b/backend/src/ir/register.cpp @@ -38,6 +38,8 @@ namespace ir { case FAMILY_OWORD: return out << "oword"; case FAMILY_HWORD: return out << "hword"; case FAMILY_REG: return out << "reg"; + case FAMILY_BOOL_BIT: return out << "boolbit"; + case FAMILY_BOOL_UW: return out << "booluw"; }; return out; } diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp index 09af24e..e2194dd 100644 --- a/backend/src/ir/register.hpp +++ b/backend/src/ir/register.hpp @@ -48,11 +48,13 @@ namespace ir { FAMILY_QWORD = 4, FAMILY_OWORD = 5, FAMILY_HWORD = 6, - FAMILY_REG = 7 + FAMILY_REG = 7, + FAMILY_BOOL_BIT = 8, + FAMILY_BOOL_UW = 9 }; INLINE char getFamilyName(RegisterFamily family) { - static char registerFamilyName[] = {'b', 'B', 'W', 'D', 'Q', 'O', 'H', 'R'}; + static char registerFamilyName[] = {'b', 'B', 'W', 'D', 'Q', 'O', 'H', 'R', 'T', 'U'}; return registerFamilyName[family]; } diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 9954021..c8e29c5 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -705,6 +705,8 @@ namespace gbe map <ir::Register, ir::Register> &redundantPhiCopyMap); /*! Will try to remove redundants LOADI in basic blocks */ void removeLOADIs(const ir::Liveness &liveness, ir::Function &fn); + /*! Will fix bool values live out basic blocks */ + void fixBools(const ir::Liveness &liveness, ir::Function &fn); /*! To avoid lost copy, we need two values for PHI. This function create a * fake value for the copy (basically ptr+1) */ @@ -2919,6 +2921,260 @@ namespace gbe }); } + INLINE ir::Register getRegFromMap(ir::Function &fn, map<ir::Register, ir::Register> &map, ir::Register reg, ir::RegisterFamily family = ir::FAMILY_BOOL_BIT) + { + auto it = map.find(reg); + ir::Register ret; + if (it != map.end()) + ret = it->second; + else + { + ret = fn.newRegister(family); + map.insert(std::make_pair(reg, ret)); + } + return ret; + } + + void GenWriter::fixBools(const ir::Liveness &liveness, ir::Function &fn) + { + // We have two kinds of helper register BOOL_BIT adn BOOL_UW for BOOL registers + // BOOL_BIT use per bit for a channel's flag thus the same size as flag register + // BOOL_UW use a UW for a channel's flag thus same as UINT16 register. + // We store these helper register as well as all bool registers and PHI bool registers. + set<ir::Register> boolRegs; + set<ir::Register> boolPHIs; + map<ir::Register, ir::Register> boolBits; + map<ir::Register, ir::Register> boolUWs; + + // Traverse all blocks and insert helper registers to help handle bool operation. + fn.foreachBlock([&](ir::BasicBlock &bb) + { + // Liveinfo helps us to know if the bool value outlives the block + const ir::Liveness::BlockInfo &info = liveness.getBlockInfo(&bb); + + // Top bottom traversal to handle all of the bool registers + bb.foreach([&](ir::Instruction &insn) + { + bool isChanged = false; + ir::Opcode op = insn.getOpcode(); + + // Handle compare that generate the bool values + if (insn.isMemberOf<ir::CompareInstruction>()) { + ir::Register reg = insn.getDst(0); + if (info.inLiveOut(reg)) + { + GBE_ASSERT(fn.getRegisterFamily(reg) == ir::FAMILY_BOOL); + ir::Register boolBit = fn.newRegister(ir::FAMILY_BOOL_BIT); + ir::Instruction mov = ir::MOV(ir::TYPE_U16, boolBit, reg); + mov.insert(&insn); + boolBits.insert(std::make_pair(reg, boolBit)); + } + } + + // Replace the bool calculate with helper register. + if (op == ir::OP_OR || op == ir::OP_AND || + op == ir::OP_XOR) + { + ir::Register dst = insn.getDst(0); + ir::Register src0 = insn.getSrc(0); + ir::Register src1 = insn.getSrc(1); + if(fn.getRegisterFamily(dst) == ir::FAMILY_BOOL) + { + // In simple case, we could use BOOL_BIT to calculate bool values, + // but if the bool value is a PHI value thus it will be changed in + // differnt block and not all channal can be changed thus we need a + // UW register to enable channel mask. + // TODO: Now we use BOOL_UW to handle all cases for there will be + // some error when there are too many layers of IF branch. + if (boolPHIs.find(dst) != boolPHIs.end() || 1 || + boolPHIs.find(src0) != boolPHIs.end() || + boolPHIs.find(src1) != boolPHIs.end()) + { + auto it0 = boolUWs.find(src0); + ir::Register boolUW0; + if (it0 != boolUWs.end()) + boolUW0 = it0->second; + else + { + boolUW0 = fn.newRegister(ir::FAMILY_BOOL_UW); + boolUWs.insert(std::make_pair(src0, boolUW0)); + ir::Register flag = src0; + auto it = boolBits.find(src0); + if (it != boolBits.end()) + { + ir::Register boolBit = it->second; + ir::Instruction mov = ir::MOV(ir::TYPE_U16, flag , boolBit); + mov.insertbefore(&insn); + } + ir::Register zeroReg = ir::Register(ir::ocl::zero); + ir::Register oneReg = ir::Register(ir::ocl::one); + const ir::Tuple index = fn.makeTuple(flag, oneReg ,zeroReg); + ir::Instruction sel = ir::SEL(ir::TYPE_U16, boolUW0 ,index); + sel.insertbefore(&insn); + } + + auto it1 = boolUWs.find(src1); + ir::Register boolUW1; + if (it1 != boolUWs.end()) + boolUW1 = it1->second; + else + { + boolUW1 = fn.newRegister(ir::FAMILY_BOOL_UW); + boolUWs.insert(std::make_pair(src1, boolUW1)); + ir::Register flag = src1; + auto it = boolBits.find(src1); + if (it != boolBits.end()) + { + ir::Register boolBit = it->second; + ir::Instruction mov = ir::MOV(ir::TYPE_U16, flag , boolBit); + mov.insertbefore(&insn); + } + ir::Register zeroReg = ir::Register(ir::ocl::zero); + ir::Register oneReg = ir::Register(ir::ocl::one); + const ir::Tuple index = fn.makeTuple(flag, oneReg ,zeroReg); + ir::Instruction sel = ir::SEL(ir::TYPE_U16, boolUW1 ,index); + sel.insertbefore(&insn); + } + + auto it = boolUWs.find(dst); + ir::Register boolUW; + if (it != boolUWs.end()) + boolUW = it->second; + else + { + boolUW = fn.newRegister(ir::FAMILY_BOOL_UW); + boolUWs.insert(std::make_pair(dst, boolUW)); + } + + ir::Register boolBit= getRegFromMap(fn, boolBits, dst); + ir::Instruction mov = ir::MOV(ir::TYPE_U16, boolBit, dst); + mov.insert(&insn); + // USE CMP to get UW ==> flag + ir::Register zeroReg = ir::Register(ir::ocl::zero); + ir::Instruction cmp = ir::NE(ir::TYPE_U16, dst, boolUW ,zeroReg); + cmp.insert(&insn); + + if (op == ir::OP_OR) + { + ir::Instruction newinsn = ir::OR(ir::TYPE_U16, boolUW, boolUW0, boolUW1); + newinsn.replace(&insn); + } + else if (op == ir::OP_AND) + { + ir::Instruction newinsn = ir::AND(ir::TYPE_U16, boolUW, boolUW0, boolUW1); + newinsn.replace(&insn); + } + else if (op == ir::OP_XOR) + { + ir::Instruction newinsn = ir::XOR(ir::TYPE_U16, boolUW, boolUW0, boolUW1); + newinsn.replace(&insn); + } + else + GBE_ASSERT(0 && "UNSOPPORTED"); + + } + else + { + auto it = boolBits.find(dst); + ir::Register boolBit; + if (it != boolBits.end()) + boolBit = it->second; + else + boolBit = fn.newRegister(ir::FAMILY_BOOL_BIT); + boolBits.insert(std::make_pair(dst, boolBit)); + + auto it0 = boolBits.find(src0); + ir::Register boolBit0; + if (it != boolBits.end()) + boolBit0 = it0->second; + else + { + boolBit0 = fn.newRegister(ir::FAMILY_BOOL_BIT); + ir::Instruction mov = ir::MOV(ir::TYPE_U16, boolBit0, src0); + mov.insertbefore(&insn); + boolBits.insert(std::make_pair(src0, boolBit0)); + } + + auto it1 = boolBits.find(src1); + ir::Register boolBit1; + if (it != boolBits.end()) + boolBit1 = it1->second; + else + { + boolBit1 = fn.newRegister(ir::FAMILY_BOOL_BIT); + ir::Instruction mov = ir::MOV(ir::TYPE_U16, boolBit1, src1); + mov.insertbefore(&insn); + boolBits.insert(std::make_pair(src1, boolBit1)); + } + + if (op == ir::OP_OR) + { + ir::Instruction newinsn = ir::OR(ir::TYPE_U16, boolBit, boolBit0, boolBit1); + newinsn.replace(&insn); + } + else if (op == ir::OP_AND) + { + ir::Instruction newinsn = ir::AND(ir::TYPE_U16, boolBit, boolBit0, boolBit1); + newinsn.replace(&insn); + } + else if (op == ir::OP_XOR) + { + ir::Instruction newinsn = ir::XOR(ir::TYPE_U16, boolBit, boolBit0, boolBit1); + newinsn.replace(&insn); + } + else + GBE_ASSERT(0 && "UNSOPPORTED"); + } + isChanged = true; + } + } + + if (insn.getOpcode() == ir::OP_MOV) + { + ir::Register dst = insn.getDst(0); + ir::Register src = insn.getSrc(0); + if(fn.getRegisterFamily(dst) == ir::FAMILY_BOOL && + fn.getRegisterFamily(src) == ir::FAMILY_BOOL) + { + boolPHIs.insert(dst); + ir::Register boolBit= getRegFromMap(fn, boolBits, dst); + ir::Register boolBit0= getRegFromMap(fn, boolBits, src); + ir::Instruction newinsn = ir::MOV(ir::TYPE_U16, boolBit, boolBit0); + newinsn.replace(&insn); + isChanged = true; + } + } + + if (insn.getOpcode() == ir::OP_LOADI) + { + ir::Register reg = insn.getDst(0); + if(fn.getRegisterFamily(reg) == ir::FAMILY_BOOL) { + ir::Register boolBit = getRegFromMap(fn, boolBits, reg); + replaceDst(&insn, reg, boolBit); + } + } + + // Convert BOOL_BIT into BOOL + for (uint32_t i = 0; i < insn.getSrcNum(); ++i) + { + if (isChanged) break; + ir::Register reg = insn.getSrc(i); + if (fn.getRegisterFamily(reg) != ir::FAMILY_BOOL) + continue; + if (!(insn.getOpcode() == ir::OP_BRA || insn.getOpcode() == ir::OP_SEL)) + continue; + auto it = boolBits.find(reg); + if (it == boolBits.end()) + break; + ir::Register boolBit = it->second; + ir::Instruction mov = ir::MOV(ir::TYPE_U16, reg, boolBit); + mov.insertbefore(&insn); + } + }); + }); + } + + BVAR(OCL_OPTIMIZE_PHI_MOVES, true); BVAR(OCL_OPTIMIZE_LOADI, true); @@ -3221,6 +3477,7 @@ namespace gbe this->postPhiCopyOptimization(liveness, fn, replaceMap, redundantPhiCopyMap); this->removeMOVs(liveness, fn); } + this->fixBools(liveness, fn); } void GenWriter::regAllocateReturnInst(ReturnInst &I) {} -- 2.7.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet