From: Luo Xionghu <[email protected]> translate native pow to llvm.pow for fast path.
Signed-off-by: Luo Xionghu <[email protected]> --- backend/src/libocl/tmpl/ocl_math.tmpl.cl | 2 +- backend/src/llvm/llvm_gen_backend.cpp | 18 +++++++++--------- backend/src/llvm/llvm_gen_ocl_function.hxx | 1 - 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl index d9e677b..da5b9a9 100644 --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl @@ -30,7 +30,7 @@ CONST float __gen_ocl_sqrt(float x) __asm("llvm.sqrt" ".f32"); PURE CONST float __gen_ocl_rsqrt(float x); CONST float __gen_ocl_log(float x) __asm("llvm.log2" ".f32"); CONST float __gen_ocl_exp(float x) __asm("llvm.exp2" ".f32"); -PURE CONST float __gen_ocl_pow(float x, float y); +PURE CONST float __gen_ocl_pow(float x, float y) __asm("llvm.pow" ".f32"); PURE CONST float __gen_ocl_rcp(float x); CONST float __gen_ocl_rndz(float x) __asm("llvm.trunc" ".f32"); CONST float __gen_ocl_rnde(float x) __asm("llvm.rint" ".f32"); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index d9ac6e0..773300b 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2618,6 +2618,7 @@ namespace gbe case Intrinsic::cos: case Intrinsic::log2: case Intrinsic::exp2: + case Intrinsic::pow: this->newRegister(&I); break; default: @@ -2672,7 +2673,6 @@ namespace gbe case GEN_OCL_FBL: case GEN_OCL_CBIT: case GEN_OCL_RSQ: - case GEN_OCL_POW: case GEN_OCL_RCP: case GEN_OCL_ABS: case GEN_OCL_GET_IMAGE_WIDTH: @@ -3005,6 +3005,14 @@ namespace gbe case Intrinsic::exp2: this->emitUnaryCallInst(I,CS,ir::OP_EXP); break; case Intrinsic::bswap: this->emitUnaryCallInst(I,CS,ir::OP_BSWAP, getUnsignedType(ctx, I.getType())); break; + case Intrinsic::pow: + { + const ir::Register src0 = this->getRegister(*AI); ++AI; + const ir::Register src1 = this->getRegister(*AI); + const ir::Register dst = this->getRegister(&I); + ctx.POW(ir::TYPE_FLOAT, dst, src0, src1); + break; + } default: NOT_IMPLEMENTED; } } else { @@ -3021,14 +3029,6 @@ namespace gbe #endif /* GBE_DEBUG */ switch (genIntrinsicID) { - case GEN_OCL_POW: - { - const ir::Register src0 = this->getRegister(*AI); ++AI; - const ir::Register src1 = this->getRegister(*AI); - const ir::Register dst = this->getRegister(&I); - ctx.POW(ir::TYPE_FLOAT, dst, src0, src1); - break; - } case GEN_OCL_FBH: this->emitUnaryCallInst(I,CS,ir::OP_FBH); break; case GEN_OCL_FBL: this->emitUnaryCallInst(I,CS,ir::OP_FBL); break; case GEN_OCL_CBIT: this->emitUnaryCallInst(I,CS,ir::OP_CBIT, getUnsignedType(ctx, (*AI)->getType())); break; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 5f5451c..9536a3c 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -20,7 +20,6 @@ DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim) // Math function DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt) -DECL_LLVM_GEN_FUNCTION(POW, __gen_ocl_pow) DECL_LLVM_GEN_FUNCTION(RCP, __gen_ocl_rcp) DECL_LLVM_GEN_FUNCTION(FMAX, __gen_ocl_fmax) DECL_LLVM_GEN_FUNCTION(FMIN, __gen_ocl_fmin) -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
