It is done at selection ir level, it removes MOV instructions and helps to reduce the register pressure.
For instructions like: MOV(8) %42<2>:UB : %53<32,8,4>:UB ADD(8) %43<2>:B : %40<16,8,2>:B -%42<16,8,2>:B can be optimized as: ADD(8) %43<2>:UB : %56<32,8,4>:UB -%53<32,8,4>:UB Signed-off-by: Guo Yejun <yejun....@intel.com> --- backend/src/backend/gen_insn_selection.cpp | 4 + backend/src/backend/gen_insn_selection.hpp | 2 + .../src/backend/gen_insn_selection_optimize.cpp | 201 ++++++++++++++++++++- 3 files changed, 202 insertions(+), 5 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 28c7ed2..038b839 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -2062,6 +2062,10 @@ namespace gbe /////////////////////////////////////////////////////////////////////////// // Code selection public implementation /////////////////////////////////////////////////////////////////////////// + const GenContext& Selection::getCtx() + { + return this->opaque->ctx; + } Selection::Selection(GenContext &ctx) { this->blockList = NULL; diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index 86542b0..275eb9c 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -271,6 +271,8 @@ namespace gbe void optimize(void); uint32_t opt_features; + const GenContext &getCtx(); + /*! Use custom allocators */ GBE_CLASS(Selection); }; diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp b/backend/src/backend/gen_insn_selection_optimize.cpp index c82fbe5..1c2f559 100644 --- a/backend/src/backend/gen_insn_selection_optimize.cpp +++ b/backend/src/backend/gen_insn_selection_optimize.cpp @@ -12,38 +12,229 @@ namespace gbe { + //helper functions + static uint32_t CalculateElements(const GenRegister& reg, uint32_t execWidth) + { + uint32_t elements = 0; + uint32_t elementSize = typeSize(reg.type); + uint32_t width = GenRegister::width_size(reg); + assert(execWidth >= width); + uint32_t height = execWidth / width; + uint32_t vstride = GenRegister::vstride_size(reg); + uint32_t hstride = GenRegister::hstride_size(reg); + uint32_t base = reg.subnr; + for (uint32_t i = 0; i < height; ++i) { + uint32_t offsetInByte = base; + for (uint32_t j = 0; j < width; ++j) { + uint32_t offsetInType = offsetInByte / elementSize; + elements |= (1 << offsetInType); + offsetInByte += hstride * elementSize; + } + offsetInByte += vstride * elementSize; + } + return elements; + } class SelOptimizer { public: - SelOptimizer(uint32_t features) : features(features) {} + SelOptimizer(const GenContext& ctx, uint32_t features) : ctx(ctx), features(features) {} virtual void run() = 0; virtual ~SelOptimizer() {} protected: + const GenContext &ctx; //in case that we need it uint32_t features; }; class SelBasicBlockOptimizer : public SelOptimizer { public: - SelBasicBlockOptimizer(uint32_t features, SelectionBlock &bb) : SelOptimizer(features), bb(bb) {} + SelBasicBlockOptimizer(const GenContext& ctx, + const ir::Liveness::LiveOut& liveout, + uint32_t features, + SelectionBlock &bb) : + SelOptimizer(ctx, features), bb(bb), liveout(liveout), optimized(false) + { + } ~SelBasicBlockOptimizer() {} virtual void run(); private: + // local copy propagation + class ReplaceInfo + { + public: + ReplaceInfo(SelectionInstruction& insn, + const GenRegister& intermedia, + const GenRegister& replacement, + uint32_t execWidth) : + insn(insn), intermedia(intermedia), replacement(replacement) + { + assert(insn.opcode == SEL_OP_MOV); + assert(&(insn.src(0)) == &replacement); + assert(&(insn.dst(0)) == &intermedia); + this->elements = CalculateElements(intermedia, execWidth); + replacementChanged = false; + } + ~ReplaceInfo() + { + this->toBeReplacements.clear(); + } + + SelectionInstruction& insn; + const GenRegister& intermedia; + uint32_t elements; + const GenRegister& replacement; + set<GenRegister*> toBeReplacements; + bool replacementChanged; + GBE_CLASS(ReplaceInfo); + }; + set<ReplaceInfo*> replaceInfos; + void doLocalCopyPropagation(); + void addToReplaceInfos(SelectionInstruction& insn); + void changeInsideReplaceInfos(const SelectionInstruction& insn, GenRegister& var); + void removeFromReplaceInfos(const GenRegister& var); + void doReplacement(ReplaceInfo* info); + void cleanReplaceInfos(); + static void propagateRegister(GenRegister& dst, const GenRegister& src); + SelectionBlock &bb; - static const size_t MaxTries = 1; //the times for optimization + const ir::Liveness::LiveOut& liveout; + bool optimized; + static const size_t MaxTries = 1; //the max times of optimization try }; + void SelBasicBlockOptimizer::propagateRegister(GenRegister& dst, const GenRegister& src) + { + dst.type = src.type; + dst.file = src.file; + dst.physical = src.physical; + dst.subphysical = src.subphysical; + dst.value.reg = src.value.reg; + dst.vstride = src.vstride; + dst.width = src.width; + dst.hstride = src.hstride; + dst.quarter = src.quarter; + dst.nr = src.nr; + dst.subnr = src.subnr; + dst.address_mode = src.address_mode; + dst.a0_subnr = src.a0_subnr; + dst.addr_imm = src.addr_imm; + + dst.negation = dst.negation ^ src.negation; + dst.absolute = dst.absolute | src.absolute; + } + + void SelBasicBlockOptimizer::doReplacement(ReplaceInfo* info) + { + for (GenRegister* reg : info->toBeReplacements) { + propagateRegister(*reg, info->replacement); + } + bb.insnList.erase(&(info->insn)); + } + + void SelBasicBlockOptimizer::cleanReplaceInfos() + { + for (ReplaceInfo* info : replaceInfos) { + doReplacement(info); + delete info; + } + replaceInfos.clear(); + } + + void SelBasicBlockOptimizer::removeFromReplaceInfos(const GenRegister& var) + { + for (set<ReplaceInfo*>::iterator pos = replaceInfos.begin(); pos != replaceInfos.end(); ) { + ReplaceInfo* info = *pos; + if (info->intermedia.reg() == var.reg()) { + if (info->intermedia.quarter == var.quarter && info->intermedia.subnr == var.subnr) + doReplacement(info); + replaceInfos.erase(pos++); + delete info; + } + else if (info->replacement.reg() == var.reg()) { + info->replacementChanged = true; + ++pos; + } + else + ++pos; + } + } + + void SelBasicBlockOptimizer::addToReplaceInfos(SelectionInstruction& insn) + { + assert(insn.opcode == SEL_OP_MOV); + const GenRegister& src = insn.src(0); + const GenRegister& dst = insn.dst(0); + if (src.type != dst.type || src.file != dst.file) + return; + + if (liveout.find(dst.reg()) != liveout.end()) + return; + + ReplaceInfo* info = new ReplaceInfo(insn, dst, src, insn.state.execWidth); + replaceInfos.insert(info); + } + + void SelBasicBlockOptimizer::changeInsideReplaceInfos(const SelectionInstruction& insn, GenRegister& var) + { + for (ReplaceInfo* info : replaceInfos) { + if (info->intermedia.reg() == var.reg()) { + bool replaceable = false; + if (insn.opcode != SEL_OP_BSWAP && !insn.isWrite()) { + if (!info->replacementChanged && info->intermedia.type == var.type && info->intermedia.quarter == var.quarter && info->intermedia.subnr == var.subnr) { + uint32_t elements = CalculateElements(var, insn.state.execWidth); + if (info->elements == elements) { + info->toBeReplacements.insert(&var); + replaceable = true; + } + } + } + + //if we found the same register, but could not be replaced for some reason, + //that means we could not remove MOV instruction, and so no replacement, + //so we'll remove the info for this case. + if (!replaceable) { + replaceInfos.erase(info); + delete info; + } + break; + } + } + } + + void SelBasicBlockOptimizer::doLocalCopyPropagation() + { + for (SelectionInstruction &insn : bb.insnList) { + for (uint8_t i = 0; i < insn.srcNum; ++i) + changeInsideReplaceInfos(insn, insn.src(i)); + + for (uint8_t i = 0; i < insn.dstNum; ++i) + removeFromReplaceInfos(insn.dst(i)); + + if (insn.opcode == SEL_OP_MOV) + addToReplaceInfos(insn); + } + cleanReplaceInfos(); + } + void SelBasicBlockOptimizer::run() { + for (size_t i = 0; i < MaxTries; ++i) { + optimized = false; + doLocalCopyPropagation(); + //doOtherLocalOptimization(); + + if (!optimized) + break; //break since no optimization found at this round + } } class SelGlobalOptimizer : public SelOptimizer { public: - SelGlobalOptimizer(uint32_t features) : SelOptimizer(features) {} + SelGlobalOptimizer(const GenContext& ctx, uint32_t features) : SelOptimizer(ctx, features) {} ~SelGlobalOptimizer() {} virtual void run(); }; @@ -57,7 +248,7 @@ namespace gbe { //do basic block level optimization for (SelectionBlock &block : *blockList) { - SelBasicBlockOptimizer bbopt(opt_features, block); + SelBasicBlockOptimizer bbopt(getCtx(), getCtx().getLiveOut(block.bb), opt_features, block); bbopt.run(); } -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet