LGTM. Thanks.
On Mon, Sep 28, 2015 at 03:23:01AM +0800, Guo Yejun wrote: > It is done at selection ir level, it removes MOV instructions > and helps to reduce the register pressure. > > For instructions like: > MOV(8) %42<2>:UB : %53<32,8,4>:UB > ADD(8) %43<2>:B : %40<16,8,2>:B -%42<16,8,2>:B > can be optimized as: > ADD(8) %43<2>:UB : %56<32,8,4>:UB -%53<32,8,4>:UB > > v2: move propagateRegister() as static mothod of GenRegister > refine replaceInfo from set to map > encapsulate function CanBeReplaced > > Signed-off-by: Guo Yejun <yejun....@intel.com> > --- > backend/src/backend/gen_insn_selection.cpp | 4 + > backend/src/backend/gen_insn_selection.hpp | 2 + > .../src/backend/gen_insn_selection_optimize.cpp | 203 > ++++++++++++++++++++- > backend/src/backend/gen_register.hpp | 21 +++ > 4 files changed, 225 insertions(+), 5 deletions(-) > > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 28c7ed2..038b839 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -2062,6 +2062,10 @@ namespace gbe > /////////////////////////////////////////////////////////////////////////// > // Code selection public implementation > /////////////////////////////////////////////////////////////////////////// > + const GenContext& Selection::getCtx() > + { > + return this->opaque->ctx; > + } > > Selection::Selection(GenContext &ctx) { > this->blockList = NULL; > diff --git a/backend/src/backend/gen_insn_selection.hpp > b/backend/src/backend/gen_insn_selection.hpp > index 86542b0..275eb9c 100644 > --- a/backend/src/backend/gen_insn_selection.hpp > +++ b/backend/src/backend/gen_insn_selection.hpp > @@ -271,6 +271,8 @@ namespace gbe > void optimize(void); > uint32_t opt_features; > > + const GenContext &getCtx(); > + > /*! Use custom allocators */ > GBE_CLASS(Selection); > }; > diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp > b/backend/src/backend/gen_insn_selection_optimize.cpp > index c82fbe5..3f2ae2f 100644 > --- a/backend/src/backend/gen_insn_selection_optimize.cpp > +++ b/backend/src/backend/gen_insn_selection_optimize.cpp > @@ -12,38 +12,231 @@ > > namespace gbe > { > + //helper functions > + static uint32_t CalculateElements(const GenRegister& reg, uint32_t > execWidth) > + { > + uint32_t elements = 0; > + uint32_t elementSize = typeSize(reg.type); > + uint32_t width = GenRegister::width_size(reg); > + assert(execWidth >= width); > + uint32_t height = execWidth / width; > + uint32_t vstride = GenRegister::vstride_size(reg); > + uint32_t hstride = GenRegister::hstride_size(reg); > + uint32_t base = reg.subnr; > + for (uint32_t i = 0; i < height; ++i) { > + uint32_t offsetInByte = base; > + for (uint32_t j = 0; j < width; ++j) { > + uint32_t offsetInType = offsetInByte / elementSize; > + elements |= (1 << offsetInType); > + offsetInByte += hstride * elementSize; > + } > + offsetInByte += vstride * elementSize; > + } > + return elements; > + } > > class SelOptimizer > { > public: > - SelOptimizer(uint32_t features) : features(features) {} > + SelOptimizer(const GenContext& ctx, uint32_t features) : ctx(ctx), > features(features) {} > virtual void run() = 0; > virtual ~SelOptimizer() {} > protected: > + const GenContext &ctx; //in case that we need it > uint32_t features; > }; > > class SelBasicBlockOptimizer : public SelOptimizer > { > public: > - SelBasicBlockOptimizer(uint32_t features, SelectionBlock &bb) : > SelOptimizer(features), bb(bb) {} > + SelBasicBlockOptimizer(const GenContext& ctx, > + const ir::Liveness::LiveOut& liveout, > + uint32_t features, > + SelectionBlock &bb) : > + SelOptimizer(ctx, features), bb(bb), liveout(liveout), > optimized(false) > + { > + } > ~SelBasicBlockOptimizer() {} > virtual void run(); > > private: > + // local copy propagation > + class ReplaceInfo > + { > + public: > + ReplaceInfo(SelectionInstruction& insn, > + const GenRegister& intermedia, > + const GenRegister& replacement) : > + insn(insn), intermedia(intermedia), > replacement(replacement) > + { > + assert(insn.opcode == SEL_OP_MOV); > + assert(&(insn.src(0)) == &replacement); > + assert(&(insn.dst(0)) == &intermedia); > + this->elements = CalculateElements(intermedia, insn.state.execWidth); > + replacementOverwritten = false; > + } > + ~ReplaceInfo() > + { > + this->toBeReplaceds.clear(); > + } > + > + SelectionInstruction& insn; > + const GenRegister& intermedia; > + uint32_t elements; > + const GenRegister& replacement; > + set<GenRegister*> toBeReplaceds; > + bool replacementOverwritten; > + GBE_CLASS(ReplaceInfo); > + }; > + typedef map<ir::Register, ReplaceInfo*> ReplaceInfoMap; > + ReplaceInfoMap replaceInfoMap; > + void doLocalCopyPropagation(); > + void addToReplaceInfoMap(SelectionInstruction& insn); > + void changeInsideReplaceInfoMap(const SelectionInstruction& insn, > GenRegister& var); > + void removeFromReplaceInfoMap(const GenRegister& var); > + void doReplacement(ReplaceInfo* info); > + bool CanBeReplaced(const ReplaceInfo* info, const SelectionInstruction& > insn, const GenRegister& var); > + void cleanReplaceInfoMap(); > + > SelectionBlock &bb; > - static const size_t MaxTries = 1; //the times for optimization > + const ir::Liveness::LiveOut& liveout; > + bool optimized; > + static const size_t MaxTries = 1; //the max times of optimization try > }; > > + void SelBasicBlockOptimizer::doReplacement(ReplaceInfo* info) > + { > + for (GenRegister* reg : info->toBeReplaceds) { > + GenRegister::propagateRegister(*reg, info->replacement); > + } > + bb.insnList.erase(&(info->insn)); > + optimized = true; > + } > + > + void SelBasicBlockOptimizer::cleanReplaceInfoMap() > + { > + for (auto& pair : replaceInfoMap) { > + ReplaceInfo* info = pair.second; > + doReplacement(info); > + delete info; > + } > + replaceInfoMap.clear(); > + } > + > + void SelBasicBlockOptimizer::removeFromReplaceInfoMap(const GenRegister& > var) > + { > + for (ReplaceInfoMap::iterator pos = replaceInfoMap.begin(); pos != > replaceInfoMap.end(); ++pos) { > + ReplaceInfo* info = pos->second; > + if (info->intermedia.reg() == var.reg()) { //intermedia is > overwritten > + if (info->intermedia.quarter == var.quarter && > info->intermedia.subnr == var.subnr) { > + //the whole intermedia is overwritten, so, do replacement for the > scanned IRs > + doReplacement(info); > + } > + replaceInfoMap.erase(pos); > + delete info; > + return; > + } > + if (info->replacement.reg() == var.reg()) { //replacement is > overwritten > + info->replacementOverwritten = true; > + return; > + } > + } > + } > + > + void SelBasicBlockOptimizer::addToReplaceInfoMap(SelectionInstruction& > insn) > + { > + assert(insn.opcode == SEL_OP_MOV); > + const GenRegister& src = insn.src(0); > + const GenRegister& dst = insn.dst(0); > + if (src.type != dst.type || src.file != dst.file) > + return; > + > + if (liveout.find(dst.reg()) != liveout.end()) > + return; > + > + ReplaceInfo* info = new ReplaceInfo(insn, dst, src); > + replaceInfoMap[dst.reg()] = info; > + } > + > + bool SelBasicBlockOptimizer::CanBeReplaced(const ReplaceInfo* info, const > SelectionInstruction& insn, const GenRegister& var) > + { > + //some conditions here are very strict, while some conditions are very > light > + //the reason is that i'm unable to find a perfect condition now in the > first version > + //need to refine the conditions when debugging/optimizing real kernels > + > + if (insn.opcode == SEL_OP_BSWAP) //should remove once bswap issue is > fixed > + return false; > + > + if (insn.isWrite() || insn.isRead()) //register in selection vector > + return false; > + > + if (info->replacementOverwritten) > + return false; > + > + if (info->insn.state.noMask == 0 && insn.state.noMask == 1) > + return false; > + > + if (info->insn.state.predicate != insn.state.predicate && > info->insn.state.predicate != GEN_PREDICATE_NONE) > + return false; > + > + if (info->intermedia.type == var.type && info->intermedia.quarter == > var.quarter && info->intermedia.subnr == var.subnr) { > + uint32_t elements = CalculateElements(var, insn.state.execWidth); > //considering width, hstrid, vstrid and execWidth > + if (info->elements == elements) > + return true; > + } > + > + return false; > + } > + > + void SelBasicBlockOptimizer::changeInsideReplaceInfoMap(const > SelectionInstruction& insn, GenRegister& var) > + { > + ReplaceInfoMap::iterator it = replaceInfoMap.find(var.reg()); > + if (it != replaceInfoMap.end()) { //same ir register > + ReplaceInfo* info = it->second; > + if (CanBeReplaced(info, insn, var)) { > + info->toBeReplaceds.insert(&var); > + } else { > + //if it is the same ir register, but could not be replaced for some > reason, > + //that means we could not remove MOV instruction, and so no > replacement, > + //so we'll remove the info for this case. > + replaceInfoMap.erase(it); > + delete info; > + } > + } > + } > + > + void SelBasicBlockOptimizer::doLocalCopyPropagation() > + { > + for (SelectionInstruction &insn : bb.insnList) { > + for (uint8_t i = 0; i < insn.srcNum; ++i) > + changeInsideReplaceInfoMap(insn, insn.src(i)); > + > + for (uint8_t i = 0; i < insn.dstNum; ++i) > + removeFromReplaceInfoMap(insn.dst(i)); > + > + if (insn.opcode == SEL_OP_MOV) > + addToReplaceInfoMap(insn); > + } > + cleanReplaceInfoMap(); > + } > + > void SelBasicBlockOptimizer::run() > { > + for (size_t i = 0; i < MaxTries; ++i) { > + optimized = false; > > + doLocalCopyPropagation(); > + //doOtherLocalOptimization(); > + > + if (!optimized) > + break; //break since no optimization found at this round > + } > } > > class SelGlobalOptimizer : public SelOptimizer > { > public: > - SelGlobalOptimizer(uint32_t features) : SelOptimizer(features) {} > + SelGlobalOptimizer(const GenContext& ctx, uint32_t features) : > SelOptimizer(ctx, features) {} > ~SelGlobalOptimizer() {} > virtual void run(); > }; > @@ -57,7 +250,7 @@ namespace gbe > { > //do basic block level optimization > for (SelectionBlock &block : *blockList) { > - SelBasicBlockOptimizer bbopt(opt_features, block); > + SelBasicBlockOptimizer bbopt(getCtx(), getCtx().getLiveOut(block.bb), > opt_features, block); > bbopt.run(); > } > > diff --git a/backend/src/backend/gen_register.hpp > b/backend/src/backend/gen_register.hpp > index a1bcd5a..0f5d7cf 100644 > --- a/backend/src/backend/gen_register.hpp > +++ b/backend/src/backend/gen_register.hpp > @@ -1212,6 +1212,27 @@ namespace gbe > return reg; > } > > + static INLINE void propagateRegister(GenRegister& dst, const > GenRegister& src) > + { > + dst.type = src.type; > + dst.file = src.file; > + dst.physical = src.physical; > + dst.subphysical = src.subphysical; > + dst.value.reg = src.value.reg; > + dst.vstride = src.vstride; > + dst.width = src.width; > + dst.hstride = src.hstride; > + dst.quarter = src.quarter; > + dst.nr = src.nr; > + dst.subnr = src.subnr; > + dst.address_mode = src.address_mode; > + dst.a0_subnr = src.a0_subnr; > + dst.addr_imm = src.addr_imm; > + > + dst.negation = dst.negation ^ src.negation; > + dst.absolute = dst.absolute | src.absolute; > + } > + > /*! Generate register encoding with run-time simdWidth */ > #define DECL_REG_ENCODER(NAME, SIMD16, SIMD8, SIMD1) \ > template <typename... Args> \ > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet