Ping for review including the patch 1/2 in the first version patch set and this v2 patch.
And please be noted, this patch requires the following patch [PATCH] GBE: workaround an unsupported wide integer case. to pass all the unit tests. Thanks. On Wed, Oct 08, 2014 at 12:58:59PM +0800, Zhigang Gong wrote: > By default, the unrolling threshold is relatively small. > Thus some relative large loops which access private array > will not be unrolled, thus those private array can't > be scalarized latter. And the private array is allocated > in stack which is extreme slow for Gen backend currently. > > To increase the unrolling threshold for all loops is not > a good idea, as most of the loops don't need to do unrolling > for this purpose and a large unrolling threshold will cause > a big code size and unecessary big register pressure which > may lead to register spilling. > > So this patch introduce a trade-off pass to identify those > loops which still have private load/store in the outer most > of the loop. Then add a metadata to it to indicate aggresive > unrolling on those loops. Then do another round loop unrolling. > > This patch with the previous small patch, can bring significant > performance improvement for some cases. I just tested with some > opencv test cases, and observed it can bring 2x to 10x improvement. > > v2: > refine the parent loop unroll analysis method. > > Signed-off-by: Zhigang Gong <[email protected]> > --- > backend/src/CMakeLists.txt | 1 + > backend/src/llvm/llvm_gen_backend.hpp | 4 + > backend/src/llvm/llvm_to_gen.cpp | 12 +- > backend/src/llvm/llvm_unroll.cpp | 230 > ++++++++++++++++++++++++++++++++++ > 4 files changed, 245 insertions(+), 2 deletions(-) > create mode 100644 backend/src/llvm/llvm_unroll.cpp > > diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt > index e57227a..e408365 100644 > --- a/backend/src/CMakeLists.txt > +++ b/backend/src/CMakeLists.txt > @@ -84,6 +84,7 @@ set (GBE_SRC > llvm/llvm_loadstore_optimization.cpp > llvm/llvm_gen_backend.hpp > llvm/llvm_gen_ocl_function.hxx > + llvm/llvm_unroll.cpp > llvm/llvm_to_gen.hpp > backend/gen/gen_mesa_disasm.c > backend/gen_insn_selection.cpp > diff --git a/backend/src/llvm/llvm_gen_backend.hpp > b/backend/src/llvm/llvm_gen_backend.hpp > index f73aafe..76b60df 100644 > --- a/backend/src/llvm/llvm_gen_backend.hpp > +++ b/backend/src/llvm/llvm_gen_backend.hpp > @@ -27,6 +27,7 @@ > #define __GBE_LLVM_GEN_BACKEND_HPP__ > > #include "llvm/Pass.h" > +#include "llvm/Analysis/LoopPass.h" > #include "sys/platform.hpp" > #include "sys/map.hpp" > #include "sys/hash_map.hpp" > @@ -98,6 +99,9 @@ namespace gbe > /*! Passer the printf function call. */ > llvm::FunctionPass* createPrintfParserPass(); > > + /* customized loop unrolling pass. */ > + llvm::LoopPass *createCustomLoopUnrollPass(); > + > /*! Add all the function call of ocl to our bitcode. */ > llvm::Module* runBitCodeLinker(llvm::Module *mod, bool strictMath); > > diff --git a/backend/src/llvm/llvm_to_gen.cpp > b/backend/src/llvm/llvm_to_gen.cpp > index 26d2a49..6fbf41f 100644 > --- a/backend/src/llvm/llvm_to_gen.cpp > +++ b/backend/src/llvm/llvm_to_gen.cpp > @@ -152,9 +152,17 @@ namespace gbe > MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars > MPM.add(createLoopIdiomPass()); // Recognize idioms like > memset. > MPM.add(createLoopDeletionPass()); // Delete dead loops > - MPM.add(createLoopUnrollPass()); // Unroll small loops > - if(optLevel > 0) > + MPM.add(createLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops > + if(optLevel > 0) { > + MPM.add(createSROAPass(/*RequiresDomTree*/ false)); > + MPM.add(createGVNPass()); // Remove redundancies > + } > + MPM.add(createCustomLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll > loops > + MPM.add(createLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops > + if(optLevel > 0) { > + MPM.add(createSROAPass(/*RequiresDomTree*/ false)); > MPM.add(createGVNPass()); // Remove redundancies > + } > MPM.add(createMemCpyOptPass()); // Remove memcpy / form > memset > MPM.add(createSCCPPass()); // Constant prop with SCCP > > diff --git a/backend/src/llvm/llvm_unroll.cpp > b/backend/src/llvm/llvm_unroll.cpp > new file mode 100644 > index 0000000..a27324b > --- /dev/null > +++ b/backend/src/llvm/llvm_unroll.cpp > @@ -0,0 +1,230 @@ > +/* > + * Copyright © 2012 Intel Corporation > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library. If not, see > <http://www.gnu.org/licenses/>. > + */ > + > +#include <set> > +#include "llvm/Config/llvm-config.h" > +#if LLVM_VERSION_MINOR <= 2 > +#include "llvm/Function.h" > +#include "llvm/InstrTypes.h" > +#include "llvm/Instructions.h" > +#include "llvm/IntrinsicInst.h" > +#include "llvm/Module.h" > +#else > +#include "llvm/IR/Function.h" > +#include "llvm/IR/InstrTypes.h" > +#include "llvm/IR/Instructions.h" > +#include "llvm/IR/IntrinsicInst.h" > +#include "llvm/IR/Module.h" > +#endif /* LLVM_VERSION_MINOR <= 2 */ > +#include "llvm/Pass.h" > +#if LLVM_VERSION_MINOR <= 1 > +#include "llvm/Support/IRBuilder.h" > +#elif LLVM_VERSION_MINOR == 2 > +#include "llvm/IRBuilder.h" > +#else > +#include "llvm/IR/IRBuilder.h" > +#endif /* LLVM_VERSION_MINOR <= 1 */ > +#include "llvm/Support/raw_ostream.h" > +#include "llvm/PassManager.h" > +#include "llvm/Transforms/Scalar.h" > +#include "llvm/Analysis/ScalarEvolution.h" > +#include "llvm/Analysis/LoopPass.h" > +#include "llvm/Analysis/TargetTransformInfo.h" > +#include "llvm/IR/Dominators.h" > +#include "llvm/llvm_gen_backend.hpp" > +#include "sys/map.hpp" > + > + > +using namespace llvm; > + > +namespace gbe { > + class CustomLoopUnroll : public LoopPass > + { > + public: > + static char ID; > + CustomLoopUnroll() : > + LoopPass(ID) {} > + > + void getAnalysisUsage(AnalysisUsage &AU) const { > + AU.addRequired<LoopInfo>(); > + AU.addPreserved<LoopInfo>(); > + AU.addRequiredID(LoopSimplifyID); > + AU.addPreservedID(LoopSimplifyID); > + AU.addRequiredID(LCSSAID); > + AU.addPreservedID(LCSSAID); > + AU.addRequired<ScalarEvolution>(); > + AU.addPreserved<ScalarEvolution>(); > + // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. > + // If loop unroll does not preserve dom info then LCSSA pass on next > + // loop will receive invalid dom info. > + // For now, recreate dom info, if loop is unrolled. > + AU.addPreserved<DominatorTreeWrapperPass>(); > + > + } > + > + // Returns the value associated with the given metadata node name (for > + // example, "llvm.loop.unroll.count"). If no such named metadata node > + // exists, then nullptr is returned. > + static const ConstantInt *GetUnrollMetadataValue(const Loop *L, > + StringRef Name) { > + MDNode *LoopID = L->getLoopID(); > + if (!LoopID) return nullptr; > + > + // First operand should refer to the loop id itself. > + assert(LoopID->getNumOperands() > 0 && "requires at least one > operand"); > + assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); > + > + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { > + const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); > + if (!MD) continue; > + > + const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); > + if (!S) continue; > + > + if (Name.equals(S->getString())) { > + assert(MD->getNumOperands() == 2 && > + "Unroll hint metadata should have two operands."); > + return cast<ConstantInt>(MD->getOperand(1)); > + } > + } > + return nullptr; > + } > + > + void setUnrollID(Loop *L, bool enable) { > + if (!enable && disabledLoops.find(L) != disabledLoops.end()) > + return; > + LLVMContext &Context = L->getHeader()->getContext(); > + SmallVector<Value *, 2> forceUnroll; > + forceUnroll.push_back(MDString::get(Context, > "llvm.loop.unroll.enable")); > + forceUnroll.push_back(ConstantInt::get(Type::getInt1Ty(Context), > enable)); > + MDNode *forceUnrollNode = MDNode::get(Context, forceUnroll); > + SmallVector<Value *, 4> Vals; > + Vals.push_back(NULL); > + Vals.push_back(forceUnrollNode); > + MDNode *NewLoopID = MDNode::get(Context, Vals); > + // Set operand 0 to refer to the loop id itself. > + NewLoopID->replaceOperandWith(0, NewLoopID); > + L->setLoopID(NewLoopID); > + if (!enable) > + disabledLoops.insert(L); > + } > + > + static bool hasPrivateLoadStore(Loop *L) { > + const std::vector<Loop*> subLoops = L->getSubLoops(); > + std::set<BasicBlock*> subBlocks, blocks; > + > + for(auto l : subLoops) > + for(auto bb : l->getBlocks()) > + subBlocks.insert(bb); > + for(auto bb : L->getBlocks()) > + if (subBlocks.find(bb) == subBlocks.end()) > + blocks.insert(bb); > + for(auto bb : blocks) { > + for (BasicBlock::iterator inst = bb->begin(), instE = bb->end(); > inst != instE; ++inst) { > + unsigned addrSpace = 0; > + if (isa<LoadInst>(*inst)) { > + LoadInst *ld = cast<LoadInst>(&*inst); > + addrSpace = ld->getPointerAddressSpace(); > + } > + else if (isa<StoreInst>(*inst)) { > + StoreInst *st = cast<StoreInst>(&*inst); > + addrSpace = st->getPointerAddressSpace(); > + } > + if (addrSpace == 0) > + return true; > + } > + } > + return false; > + } > + // If one loop has very large self trip count > + // we don't want to unroll it. > + // self trip count means trip count divide by the parent's trip count. > for example > + // for (int i = 0; i < 16; i++) { > + // for (int j = 0; j < 4; j++) { > + // for (int k = 0; k < 2; k++) { > + // ... > + // } > + // ... > + // } > + // The inner loops j and k could be unrolled, but the loop i will not > be unrolled. > + // The return value true means the L could be unrolled, otherwise, it > could not > + // be unrolled. > + bool handleParentLoops(Loop *L, LPPassManager &LPM) { > + Loop *currL = L; > + ScalarEvolution *SE = &getAnalysis<ScalarEvolution>(); > + BasicBlock *latchBlock = currL->getLoopLatch(); > + unsigned currTripCount = 0; > + bool shouldUnroll = true; > + if (latchBlock) > + currTripCount = SE->getSmallConstantTripCount(L, latchBlock); > + > + while(currL) { > + Loop *parentL = currL->getParentLoop(); > + unsigned parentTripCount = 0; > + if (parentL) { > + BasicBlock *parentLatchBlock = parentL->getLoopLatch(); > + if (parentLatchBlock) > + parentTripCount = SE->getSmallConstantTripCount(parentL, > parentLatchBlock); > + } > + if ((parentTripCount != 0 && currTripCount / parentTripCount > 16) > || > + (currTripCount > 32)) { > + if (currL == L) > + shouldUnroll = false; > + setUnrollID(currL, false); > + if (currL != L) > + LPM.deleteLoopFromQueue(currL); > + } > + currL = parentL; > + currTripCount = parentTripCount; > + } > + return shouldUnroll; > + } > + > + // Analyze the outermost BBs of this loop, if there are > + // some private load or store, we change it's loop meta data > + // to indicate more aggresive unrolling on it. > + virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { > + const ConstantInt *Enable = GetUnrollMetadataValue(L, > "llvm.loop.unroll.enable"); > + if (Enable) > + return false; > + const ConstantInt *Count = GetUnrollMetadataValue(L, > "llvm.loop.unroll.count"); > + if (Count) > + return false; > + > + if (!handleParentLoops(L, LPM)) > + return false; > + > + if (!hasPrivateLoadStore(L)) > + return false; > + setUnrollID(L, true); > + return true; > + } > + > + virtual const char *getPassName() const { > + return "SPIR backend: custom loop unrolling pass"; > + } > + private: > + std::set<Loop *> disabledLoops; > + > + }; > + > + char CustomLoopUnroll::ID = 0; > + > + LoopPass *createCustomLoopUnrollPass() { > + return new CustomLoopUnroll(); > + } > +} // end namespace > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
