From: Grigore Lupescu <grigore.lupescu at intel.com> Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com> --- backend/src/backend/gen_context.cpp | 3 +-- backend/src/backend/gen_insn_selection.cpp | 32 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index bf66295..9098a3f 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -2390,8 +2390,7 @@ namespace gbe if (wg_op == ir::WORKGROUP_OP_ALL) { if (dataReg.type == GEN_TYPE_D - || dataReg.type == GEN_TYPE_UD - || dataReg.type == GEN_TYPE_F) + || dataReg.type == GEN_TYPE_UD) p->MOV(dataReg, GenRegister::immd(0xFFFFFFFF)); else if(dataReg.type == GEN_TYPE_L || dataReg.type == GEN_TYPE_UL) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 07bdef8..52871b1 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -6474,6 +6474,38 @@ namespace gbe have been assigned the final value. */ sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier); + /* FIX: workgroup all operation by masking correctly + * Temporary fix, assume execution size 16 */ + if(workGroupOp == ir::WORKGROUP_OP_ALL) + { + GenRegister lsize = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32); + GenRegister lsizelw = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32); + sel.MOV(lsize, sel.selReg(ir::ocl::lsize0, TYPE_U32)); + + /* get lower divisible with 0xF part of lsize */ + sel.SHR(lsizelw, lsize, GenRegister::immd(0x4)); + sel.SHL(lsizelw, lsizelw, GenRegister::immd(0x4)); + + /* only interested in last 4 bits */ + sel.AND(lsize, lsize, GenRegister::immd(0xF)); + + sel.push(); { + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.noMask = 1; + + /* mask execution on selected work-items, + * local size must be multiple of execution width 0xF */ + sel.CMP(GEN_CONDITIONAL_EQ, lsize, lsizelw, + GenRegister::retype(GenRegister::null(), GEN_TYPE_UD)); + sel.curr.predicate = GEN_PREDICATE_NORMAL; + sel.CMP(GEN_CONDITIONAL_L, lsize, GenRegister::immd(0xF), + GenRegister::retype(GenRegister::null(), GEN_TYPE_UD)); + sel.MOV(src, GenRegister::immd(0xFFFFFFFF)); + }sel.pop(); + } + /* compute individual slice of workitems, (e.g. 0->16 workitems) */ sel.MOV(slmOff, GenRegister::immud(insn.getSlmAddr())); -- 2.5.0 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet