This patch LGTM. Pushed with slightly change to remove the extraLive structure and merge it into the normal livein/out set. Thanks.
On Mon, Mar 24, 2014 at 09:46:21AM +0800, Ruiling Song wrote: > As we run in SIMD mode with prediction mask to indicate active lanes, > If a vreg is defined in a loop, and there are som uses of the vreg out of the > loop, > the define point may be run several times under *different* prediction mask. > For these kinds of vreg, we must extend the vreg liveness into the whole loop. > If we don't do this, it's liveness is killed before the def point inside loop. > If the vreg's corresponding physical reg is assigned to other vreg during the > killed period, and the instructions before kill point were re-executed with > different prediction, > the inactive lanes of vreg maybe over-written. Then the out-of-loop use will > got wrong data. > > This patch fixes the HaarFixture case in opencv. > > Signed-off-by: Ruiling Song <ruiling.s...@intel.com> > --- > backend/src/ir/context.cpp | 7 +-- > backend/src/ir/function.cpp | 16 ++++++ > backend/src/ir/function.hpp | 15 ++++++ > backend/src/ir/liveness.cpp | 96 > +++++++++++++-------------------- > backend/src/ir/liveness.hpp | 2 +- > backend/src/llvm/llvm_gen_backend.cpp | 80 ++++++++++++++++++++++++++- > 6 files changed, 151 insertions(+), 65 deletions(-) > > diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp > index c89286e..bc14fd6 100644 > --- a/backend/src/ir/context.cpp > +++ b/backend/src/ir/context.cpp > @@ -76,13 +76,14 @@ namespace ir { > // function > lowerReturn(unit, fn->getName()); > > + // Properly order labels and compute the CFG, it's needed by > FunctionArgumentLower > + fn->sortLabels(); > + fn->computeCFG(); > + > // Spill function argument to the stack if required and identify which > // function arguments can use constant push > lowerFunctionArguments(unit, fn->getName()); > > - // Properly order labels and compute the CFG > - fn->sortLabels(); > - fn->computeCFG(); > const StackElem elem = fnStack.back(); > fnStack.pop_back(); > fn = elem.fn; > diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp > index 71dcc1f..a6aecb5 100644 > --- a/backend/src/ir/function.cpp > +++ b/backend/src/ir/function.cpp > @@ -52,6 +52,7 @@ namespace ir { > > Function::~Function(void) { > for (auto block : blocks) GBE_DELETE(block); > + for (auto loop : loops) GBE_DELETE(loop); > for (auto arg : args) GBE_DELETE(arg); > } > > @@ -59,6 +60,10 @@ namespace ir { > return unit.getPointerFamily(); > } > > + void Function::addLoop(const vector<LabelIndex> &bbs, const > vector<std::pair<LabelIndex, LabelIndex>> &exits) { > + loops.push_back(GBE_NEW(Loop, bbs, exits)); > + } > + > void Function::sortLabels(void) { > uint32_t last = 0; > > @@ -96,6 +101,17 @@ namespace ir { > } > }); > > + // fix labels for loops > + for (auto &x : loops) { > + for (auto &y : x->bbs) > + y = labelMap[y]; > + > + for (auto &z : x->exits) { > + z.first = labelMap[z.first]; > + z.second = labelMap[z.second]; > + } > + } > + > // Reset the label to block mapping > this->labels.resize(last); > foreachBlock([&](BasicBlock &bb) { > diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp > index 8a42529..303c9ba 100644 > --- a/backend/src/ir/function.hpp > +++ b/backend/src/ir/function.hpp > @@ -134,6 +134,17 @@ namespace ir { > return arg0.offset < arg1.offset; > } > > + /*! CFG loops */ > + struct Loop : public NonCopyable > + { > + public: > + Loop(const vector<LabelIndex> &in, const vector<std::pair<LabelIndex, > LabelIndex>> &exit) : > + bbs(in), exits(exit) {} > + vector<LabelIndex> bbs; > + vector<std::pair<LabelIndex, LabelIndex>> exits; > + GBE_STRUCT(Loop); > + }; > + > /*! A function is : > * - a register file > * - a set of basic block layout into a CGF > @@ -318,6 +329,9 @@ namespace ir { > INLINE const uint32_t getStackSize(void) const { return this->stackSize; > } > /*! Push stack size. */ > INLINE void pushStackSize(uint32_t step) { this->stackSize += step; } > + /*! add the loop info for later liveness analysis */ > + void addLoop(const vector<LabelIndex> &bbs, const > vector<std::pair<LabelIndex, LabelIndex>> &exits); > + INLINE const vector<Loop * > &getLoops() { return loops; } > private: > friend class Context; //!< Can freely modify a function > std::string name; //!< Function name > @@ -327,6 +341,7 @@ namespace ir { > vector<BasicBlock*> labels; //!< Each label points to a basic block > vector<Immediate> immediates; //!< All immediate values in the function > vector<BasicBlock*> blocks; //!< All chained basic blocks > + vector<Loop *> loops; //!< Loops info of the function > RegisterFile file; //!< RegisterDatas used by the > instructions > Profile profile; //!< Current function profile > PushMap pushMap; //!< Pushed function arguments (reg->loc) > diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp > index 724d5c3..afab02b 100644 > --- a/backend/src/ir/liveness.cpp > +++ b/backend/src/ir/liveness.cpp > @@ -38,19 +38,11 @@ namespace ir { > if (op == OP_RET) { > workSet.insert(info); > info->liveOut.insert(ocl::retVal); > - } else if (op == OP_BRA) { > - // If this is a backward jump, put it to the extra work list. > - if (((BranchInstruction*)lastInsn)->getLabelIndex() < > bb.getLabelIndex()) > - extraWorkSet.insert(info); > } > }); > // Now with iterative analysis, we compute liveout and livein sets > this->computeLiveInOut(); > - for (auto it : extraWorkSet) { > - for (auto reg : it->liveOut) { > - it->extraLiveIn.insert(reg); > - } > - } > + // extend register (def in loop, use out-of-loop) liveness to the whole > loop > this->computeExtraLiveInOut(); > } > > @@ -128,60 +120,46 @@ namespace ir { > } > > /* > - Consider the following scenario, %100's normal liveness will start from > Ln-1's > - position. In normal analysis, the Ln-1 is not Ln's predecessor, thus the > liveness > - of %100 will be passed to Ln and then will not be passed to L0. > - > - But considering we are running on a multilane with predication's vector > machine. > - The unconditional BR in Ln-1 may be removed and it will enter Ln with a > subset of > - the revert set of Ln-1's predication. For example when running Ln-1, the > active lane > - is 0-7, then at Ln the active lane is 8-15. Then at the end of Ln, a > subset of 8-15 > - will jump to L0. If a register %10 is allocated the same GRF as %100, > given the fact > - that their normal liveness doesn't overlapped, the a subset of 8-15 lanes > will be > - modified. If the %10 and %100 are the same vector data type, then we are > fine. But if > - %100 is a float vector, and the %10 is a bool or short vector, then we hit > a bug here. > - > -L0: > - ... > - %10 = 5 > - ... > -Ln-1: > - %100 = 2 > - BR Ln+1 > - > -Ln: > - ... > - BR(%xxx) L0 > - > -Ln+1: > - %101 = %100 + 2; > - ... > - > - The solution to fix this issue is to build another liveness data. We will > start with > - those BBs with backward jump. Then pass all the liveOut register as extra > liveIn > - of current BB and then forward this extra liveIn to all the blocks. This > is very similar > - to the normal liveness analysis just with reverse direction. > + As we run in SIMD mode with prediction mask to indicate active lanes. > + If a vreg is defined in a loop, and there are som uses of the vreg out of > the loop, > + the define point may be run several times under *different* prediction > mask. > + For these kinds of vreg, we must extend the vreg liveness into the whole > loop. > + If we don't do this, it's liveness is killed before the def point inside > loop. > + If the vreg's corresponding physical reg is assigned to other vreg during > the > + killed period, and the instructions before kill point were re-executed > with different prediction, > + the inactive lanes of vreg maybe over-written. Then the out-of-loop use > will got wrong data. > */ > + > void Liveness::computeExtraLiveInOut(void) { > - while(!extraWorkSet.empty()) { > - struct BlockInfo *currInfo = *extraWorkSet.begin(); > - extraWorkSet.erase(currInfo); > - for (auto currInVar : currInfo->extraLiveIn) > - currInfo->extraLiveOut.insert(currInVar); > - bool isChanged = false; > - for (auto succ : currInfo->bb.getSuccessorSet()) { > - BlockInfo *succInfo = liveness[succ]; > - for (auto currOutVar : currInfo->extraLiveOut) { > - bool changed = false; > - if (!succInfo->upwardUsed.contains(currOutVar)) { > - auto it = succInfo->extraLiveIn.insert(currOutVar); > - changed = it.second; > + const vector<Loop *> &loops = fn.getLoops(); > + if(loops.size() == 0) return; > + > + for (auto l : loops) { > + for (auto x : l->exits) { > + const BasicBlock &a = fn.getBlock(x.first); > + const BasicBlock &b = fn.getBlock(x.second); > + BlockInfo * exiting = liveness[&a]; > + BlockInfo * exit = liveness[&b]; > + std::vector<Register> toExtend; > + > + if(b.getPredecessorSet().size() > 1) { > + for (auto p : exit->upwardUsed) > + toExtend.push_back(p); > + } else { > + std::set_intersection(exiting->liveOut.begin(), > exiting->liveOut.end(), exit->upwardUsed.begin(), exit->upwardUsed.end(), > std::back_inserter(toExtend)); > + } > + if (toExtend.size() == 0) continue; > + > + for (auto bb : l->bbs) { > + BlockInfo * bI = liveness[&fn.getBlock(bb)]; > + for(auto r : toExtend) { > + if(!bI->upwardUsed.contains(r)) > + bI->extraLiveIn.insert(r); > + bI->extraLiveOut.insert(r); > } > - if (changed) isChanged = true; > } > - if (isChanged) > - extraWorkSet.insert(succInfo);} > - }; > + } > + } > #if 0 > fn.foreachBlock([this](const BasicBlock &bb){ > printf("label %d:\n", bb.getLabelIndex()); > diff --git a/backend/src/ir/liveness.hpp b/backend/src/ir/liveness.hpp > index 9198eae..2545053 100644 > --- a/backend/src/ir/liveness.hpp > +++ b/backend/src/ir/liveness.hpp > @@ -143,7 +143,7 @@ namespace ir { > void computeExtraLiveInOut(void); > /*! Set of work list block which has exit(return) instruction */ > typedef set <struct BlockInfo*> WorkSet; > - WorkSet workSet, extraWorkSet; > + WorkSet workSet; > > /*! Use custom allocators */ > GBE_CLASS(Liveness); > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index 49fbc7b..80cdb37 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -489,7 +489,6 @@ namespace gbe > if(!bKernel) return false; > > LI = &getAnalysis<LoopInfo>(); > - > emitFunction(F); > return false; > } > @@ -497,6 +496,8 @@ namespace gbe > virtual bool doFinalization(Module &M) { return false; } > /*! handle global variable register allocation (local, constant space) */ > void allocateGlobalVariableRegister(Function &F); > + /*! gather all the loops in the function and add them to ir::Function */ > + void gatherLoopInfo(ir::Function &fn); > /*! Emit the complete function code and declaration */ > void emitFunction(Function &F); > /*! Handle input and output function parameters */ > @@ -1447,6 +1448,78 @@ namespace gbe > } > > } > + static INLINE void findAllLoops(LoopInfo * LI, > std::vector<std::pair<Loop*, int>> &lp) > + { > + for (Loop::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; > ++I) { > + lp.push_back(std::make_pair(*I, -1)); > + } > + if (lp.size() == 0) return; > + > + uint32_t i = 0; > + do { > + const std::vector<Loop*> subLoops = lp[i].first->getSubLoops(); > + for(auto sub : subLoops) > + lp.push_back(std::make_pair(sub, i)); > + i++; > + } while(i < lp.size()); > + } > + > + void GenWriter::gatherLoopInfo(ir::Function &fn) { > + vector<ir::LabelIndex> loopBBs; > + vector<std::pair<ir::LabelIndex, ir::LabelIndex>> loopExits; > + std::vector<std::pair<Loop*, int>> lp; > + > + findAllLoops(LI, lp); > +#if GBE_DEBUG > + // check two loops' interference > + for(int i = 0; i < lp.size(); i++) { > + SmallVector<Loop::Edge, 8> exitBBs; > + lp[i].first->getExitEdges(exitBBs); > + > + const std::vector<BasicBlock*> &inBBs = lp[i].first->getBlocks(); > + std::vector<ir::LabelIndex> bbs1; > + for(auto x : inBBs) { > + bbs1.push_back(labelMap[x]); > + } > + std::sort(bbs1.begin(), bbs1.end()); > + for(int j = i+1; j < lp.size(); j++) { > + if(! lp[i].first->contains(lp[j].first)) { > + const std::vector<BasicBlock*> &inBBs2 = lp[j].first->getBlocks(); > + std::vector<ir::LabelIndex> bbs2; > + std::vector<ir::LabelIndex> bbs3; > + > + for(auto x : inBBs2) { > + bbs2.push_back(labelMap[x]); > + } > + > + std::sort(bbs2.begin(), bbs2.end()); > + std::set_intersection(bbs1.begin(), bbs1.end(), bbs2.begin(), > bbs2.end(), std::back_inserter(bbs3)); > + GBE_ASSERT(bbs3.size() < 1); > + } > + } > + } > +#endif > + > + for (auto loop : lp) { > + loopBBs.clear(); > + loopExits.clear(); > + > + const std::vector<BasicBlock*> &inBBs = loop.first->getBlocks(); > + for (auto b : inBBs) { > + GBE_ASSERT(labelMap.find(b) != labelMap.end()); > + loopBBs.push_back(labelMap[b]); > + } > + > + SmallVector<Loop::Edge, 8> exitBBs; > + loop.first->getExitEdges(exitBBs); > + for(auto b : exitBBs){ > + GBE_ASSERT(labelMap.find(b.first) != labelMap.end()); > + GBE_ASSERT(labelMap.find(b.second) != labelMap.end()); > + loopExits.push_back(std::make_pair(labelMap[b.first], > labelMap[b.second])); > + } > + fn.addLoop(loopBBs, loopExits); > + } > + } > > void GenWriter::emitFunction(Function &F) > { > @@ -1463,6 +1536,7 @@ namespace gbe > } > > ctx.startFunction(F.getName()); > + ir::Function &fn = ctx.getFunction(); > this->regTranslator.clear(); > this->labelMap.clear(); > this->emitFunctionPrototype(F); > @@ -1483,11 +1557,13 @@ namespace gbe > for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) > this->simplifyTerminator(BB); > > + // gather loop info, which is useful for liveness analysis > + gatherLoopInfo(fn); > + > // ... then, emit the instructions for all basic blocks > pass = PASS_EMIT_INSTRUCTIONS; > for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) > emitBasicBlock(BB); > - ir::Function &fn = ctx.getFunction(); > ctx.endFunction(); > > // Liveness can be shared when we optimized the immediates and the MOVs > -- > 1.7.9.5 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet