when the src0 of fdiv is a immedia value and it is
        exactly pow of 2, like 2.0f, 4.0f, 1.0/8.0f,
        fdiv %0, imm, %1 can be convert to
        rcp %0, %1
        mul %0, %0, imm.

        for fdiv cost 8cycle, rcp 4cycle. it will save at least
        3cycle.

        pass the conformance test and utests

Signed-off-by: rander.wang <rander.w...@intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 7498f38..572f6a8 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3279,6 +3279,35 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
         sel.MATH(dst, function, src0, src1);
       } else if(type == TYPE_FLOAT) {
         GBE_ASSERT(op != OP_REM);
+        SelectionDAG *child0 = dag.child[0];
+        if (child0 && child0->insn.getOpcode() == OP_LOADI) {
+          const auto &loadimm = cast<LoadImmInstruction>(child0->insn);
+          const Immediate imm = loadimm.getImmediate();
+          float immVal = imm.getFloatValue();
+          int* dwPtr = (int*)&immVal;
+
+          //if immedia is a exactly pow of 2, it can be converted to RCP
+          if((*dwPtr & 0x7FFFFF) == 0) {
+            if(immVal == -1.0f)
+            {
+              GenRegister tmp = src1;
+              tmp.negation = 1;
+              sel.MATH(dst, GEN_MATH_FUNCTION_INV, tmp);
+            }
+            else {
+              sel.MATH(dst, GEN_MATH_FUNCTION_INV, src1);
+              if(immVal != 1.0f) {
+                GenRegister isrc = GenRegister::immf(immVal);
+                sel.MUL(dst, dst, isrc);
+              }
+            }
+
+            if(dag.child[1])
+              dag.child[1]->isRoot = 1;
+            return true;
+          }
+        }
+
         sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1);
       } else if (type == TYPE_S64 || type == TYPE_U64) {
         GenRegister tmp[15];
-- 
2.7.4

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to