This patch LGTM. Though the uniform dest result is not calculated correctly due to sub_reg_nr set, my later patch will fix this issue.
Luo Xionghu Best Regards -----Original Message----- From: Beignet [mailto:[email protected]] On Behalf Of Guo Yejun Sent: Monday, April 25, 2016 9:54 AM To: [email protected] Cc: Guo, Yejun <[email protected]> Subject: [Beignet] [PATCH] set SIMD width as 1 for mad when the dst is uniform Signed-off-by: Guo Yejun <[email protected]> --- backend/src/backend/gen8_encoder.cpp | 6 +++++- backend/src/backend/gen_insn_selection.cpp | 12 ++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp index 16b3fc6..32a096b 100644 --- a/backend/src/backend/gen8_encoder.cpp +++ b/backend/src/backend/gen8_encoder.cpp @@ -503,7 +503,11 @@ namespace gbe gen8_insn->bits1.da3src.dest_writemask = 0xf; this->setHeader(insn); gen8_insn->header.access_mode = GEN_ALIGN_16; - gen8_insn->header.execution_size = GEN_WIDTH_8; + + if (this->curr.execWidth == 1) + gen8_insn->header.execution_size = GEN_WIDTH_1; + else + gen8_insn->header.execution_size = GEN_WIDTH_8; assert(src0.file == GEN_GENERAL_REGISTER_FILE); assert(src0.address_mode == GEN_ADDRESS_DIRECT); diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index d157009..9e6c6be 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -3133,7 +3133,11 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp const GenRegister src1 = sel.selReg(child0->insn.getSrc(1), TYPE_FLOAT); GenRegister src2 = sel.selReg(insn.getSrc(1), TYPE_FLOAT); if(insn.getOpcode() == ir::OP_SUB) src2 = GenRegister::negate(src2); + sel.push(); + if (sel.isScalarReg(insn.getDst(0))) + sel.curr.execWidth = 1; sel.MAD(dst, src2, src0, src1); // order different on HW! + sel.pop(); if (child0->child[0]) child0->child[0]->isRoot = 1; if (child0->child[1]) child0->child[1]->isRoot = 1; if (child1) child1->isRoot = 1; @@ -3145,7 +3149,11 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp const GenRegister src1 = sel.selReg(child1->insn.getSrc(1), TYPE_FLOAT); const GenRegister src2 = sel.selReg(insn.getSrc(0), TYPE_FLOAT); if(insn.getOpcode() == ir::OP_SUB) src0 = GenRegister::negate(src0); + sel.push(); + if (sel.isScalarReg(insn.getDst(0))) + sel.curr.execWidth = 1; sel.MAD(dst, src2, src0, src1); // order different on HW! + sel.pop(); if (child1->child[0]) child1->child[0]->isRoot = 1; if (child1->child[1]) child1->child[1]->isRoot = 1; if (child0) child0->isRoot = 1; @@ -5285,7 +5293,11 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp } case OP_MAD: { + sel.push(); + if (sel.isScalarReg(insn.getDst(0))) + sel.curr.execWidth = 1; sel.MAD(dst, src2, src0, src1); + sel.pop(); break; } case OP_LRP: -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
