As one bool value may be used in multiple basic blocks, we have to validate its value to and it with current flag register.
This patch is not fully optimized. As we can avoid the validation, if we know this bool value is already validated in the same basic block. I will write another patch to do this optimization. After this patch, the Opencv's all filter/blur and filter/filter2D passed. Signed-off-by: Zhigang Gong <zhigang.g...@intel.com> --- backend/src/backend/gen_insn_selection.cpp | 34 +++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 8e6586b..063391c 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -3005,6 +3005,27 @@ namespace gbe /*! Branch instruction pattern */ DECL_PATTERN(BranchInstruction) { + + // Get active pred. + const ir::Register getActivePred(Selection::Opaque &sel, + const ir::Register pred) const + { + using namespace ir; + GenRegister flagReg; + Register activePred = sel.reg(FAMILY_BOOL); + + sel.push(); + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.noMask = 1; + if(sel.curr.physicalFlag) + flagReg = GenRegister::flag(sel.curr.flag, sel.curr.subFlag); + else + flagReg = sel.selReg(ir::Register(sel.curr.flagIndex), ir::TYPE_U16); + sel.AND(sel.selReg(activePred, TYPE_U16), flagReg, sel.selReg(pred, TYPE_U16)); + sel.pop(); + return activePred; + } + void emitForwardBranch(Selection::Opaque &sel, const ir::BranchInstruction &insn, ir::LabelIndex dst, @@ -3022,11 +3043,12 @@ namespace gbe if (insn.isPredicated() == true) { const Register pred = insn.getPredicateIndex(); + const Register activePred = getActivePred(sel, pred); // Update the PcIPs sel.push(); sel.curr.physicalFlag = 0; - sel.curr.flagIndex = uint16_t(pred); + sel.curr.flagIndex = uint16_t(activePred); sel.MOV(ip, GenRegister::immuw(uint16_t(dst))); sel.pop(); @@ -3040,7 +3062,7 @@ namespace gbe sel.push(); sel.curr.physicalFlag = 0; - sel.curr.flagIndex = uint16_t(pred); + sel.curr.flagIndex = uint16_t(activePred); sel.curr.predicate = GEN_PREDICATE_NONE; sel.CMP(GEN_CONDITIONAL_G, ip, GenRegister::immuw(nextLabel)); @@ -3052,7 +3074,7 @@ namespace gbe sel.curr.execWidth = 1; sel.curr.noMask = 1; GenRegister notEmaskReg = GenRegister::uw1grf(ocl::notemask); - sel.OR(sel.selReg(pred, TYPE_U16), sel.selReg(pred, TYPE_U16), notEmaskReg); + sel.OR(sel.selReg(activePred, TYPE_U16), sel.selReg(activePred, TYPE_U16), notEmaskReg); if (simdWidth == 8) sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H; @@ -3094,7 +3116,7 @@ namespace gbe if (insn.isPredicated() == true) { const Register pred = insn.getPredicateIndex(); - + const Register activePred = getActivePred(sel, pred); // Update the PcIPs for all the branches. Just put the IPs of the next // block. Next instruction will properly reupdate the IPs of the lanes @@ -3105,7 +3127,7 @@ namespace gbe sel.push(); // Re-update the PcIPs for the branches that takes the backward jump sel.curr.physicalFlag = 0; - sel.curr.flagIndex = uint16_t(pred); + sel.curr.flagIndex = uint16_t(activePred); sel.MOV(ip, GenRegister::immuw(uint16_t(dst))); // We clear all the inactive channel to 0 as the GEN_PREDICATE_ALIGN1_ANY8/16 @@ -3114,7 +3136,7 @@ namespace gbe sel.curr.execWidth = 1; sel.curr.noMask = 1; GenRegister emaskReg = GenRegister::uw1grf(ocl::emask); - sel.AND(sel.selReg(pred, TYPE_U16), sel.selReg(pred, TYPE_U16), emaskReg); + sel.AND(sel.selReg(activePred, TYPE_U16), sel.selReg(activePred, TYPE_U16), emaskReg); // Branch to the jump target if (simdWidth == 8) -- 1.7.9.5 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet