Re: [Beignet] [PATCH] backend: refine fdiv to rcp at some cases
One comment. Thanks. > -Original Message- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > rander.wang > Sent: Monday, June 19, 2017 13:34 > To: beig...@freedesktop.org > Cc: Wang, Rander <rander.w...@intel.com> > Subject: [Beignet] [PATCH] backend: refine fdiv to rcp at some cases > > when the src0 of fdiv is a immedia value and it is > exactly pow of 2, like 2.0f, 4.0f, 1.0/8.0f, > fdiv %0, imm, %1 can be convert to > rcp %0, %1 > mul %0, %0, imm. > > for fdiv cost 8cycle, rcp 4cycle. it will save at least > 3cycle. > > pass the conformance test and utests > > Signed-off-by: rander.wang <rander.w...@intel.com> > --- > backend/src/backend/gen_insn_selection.cpp | 29 > + > 1 file changed, 29 insertions(+) > > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 7498f38..572f6a8 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -3279,6 +3279,35 @@ extern bool OCL_DEBUGINFO; // first defined by > calling BVAR in program.cpp > sel.MATH(dst, function, src0, src1); >} else if(type == TYPE_FLOAT) { > GBE_ASSERT(op != OP_REM); > +SelectionDAG *child0 = dag.child[0]; > +if (child0 && child0->insn.getOpcode() == OP_LOADI) { > + const auto = cast(child0->insn); > + const Immediate imm = loadimm.getImmediate(); > + float immVal = imm.getFloatValue(); > + int* dwPtr = (int*) > + > + //if immedia is a exactly pow of 2, it can be converted to RCP > + if((*dwPtr & 0x7F) == 0) { > +if(immVal == -1.0f) > +{ > + GenRegister tmp = src1; > + tmp.negation = 1; It is wrong when src1.negation is 1. Could use GenRegister:: negate() directly. ___ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] backend: refine fdiv to rcp at some cases
when the src0 of fdiv is a immedia value and it is exactly pow of 2, like 2.0f, 4.0f, 1.0/8.0f, fdiv %0, imm, %1 can be convert to rcp %0, %1 mul %0, %0, imm. for fdiv cost 8cycle, rcp 4cycle. it will save at least 3cycle. pass the conformance test and utests Signed-off-by: rander.wang--- backend/src/backend/gen_insn_selection.cpp | 29 + 1 file changed, 29 insertions(+) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 7498f38..572f6a8 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -3279,6 +3279,35 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp sel.MATH(dst, function, src0, src1); } else if(type == TYPE_FLOAT) { GBE_ASSERT(op != OP_REM); +SelectionDAG *child0 = dag.child[0]; +if (child0 && child0->insn.getOpcode() == OP_LOADI) { + const auto = cast(child0->insn); + const Immediate imm = loadimm.getImmediate(); + float immVal = imm.getFloatValue(); + int* dwPtr = (int*) + + //if immedia is a exactly pow of 2, it can be converted to RCP + if((*dwPtr & 0x7F) == 0) { +if(immVal == -1.0f) +{ + GenRegister tmp = src1; + tmp.negation = 1; + sel.MATH(dst, GEN_MATH_FUNCTION_INV, tmp); +} +else { + sel.MATH(dst, GEN_MATH_FUNCTION_INV, src1); + if(immVal != 1.0f) { +GenRegister isrc = GenRegister::immf(immVal); +sel.MUL(dst, dst, isrc); + } +} + +if(dag.child[1]) + dag.child[1]->isRoot = 1; +return true; + } +} + sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1); } else if (type == TYPE_S64 || type == TYPE_U64) { GenRegister tmp[15]; -- 2.7.4 ___ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet