From: Luo Xionghu <[email protected]> replace sin/cos non-stardard math intrinsic call with llvm intrinsic. translate them to llvm.xxx for fast path, refine the calls to call emitUnaryCallInst. v2: some file changes like ocl_math.tmpl.cl and hxx missed.
Signed-off-by: Luo Xionghu <[email protected]> --- backend/src/libocl/tmpl/ocl_math.tmpl.cl | 4 +- backend/src/llvm/llvm_gen_backend.cpp | 59 ++++++++---------------------- backend/src/llvm/llvm_gen_ocl_function.hxx | 2 - 3 files changed, 18 insertions(+), 47 deletions(-) diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl index 49c4efa..8f726ff 100644 --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl @@ -24,8 +24,8 @@ constant int __ocl_math_fastpath_flag = 1; PURE CONST float __gen_ocl_fabs(float x); -PURE CONST float __gen_ocl_sin(float x); -PURE CONST float __gen_ocl_cos(float x); +CONST float __gen_ocl_sin(float x) __asm("llvm.sin" ".f32"); +CONST float __gen_ocl_cos(float x) __asm("llvm.cos" ".f32"); PURE CONST float __gen_ocl_sqrt(float x); PURE CONST float __gen_ocl_rsqrt(float x); PURE CONST float __gen_ocl_log(float x); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index d10d5c0..34c571e 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2794,13 +2794,13 @@ error: case Intrinsic::bswap: this->newRegister(&I); break; + case Intrinsic::fabs: case Intrinsic::sqrt: case Intrinsic::ceil: case Intrinsic::fma: case Intrinsic::trunc: - this->newRegister(&I); - break; - case Intrinsic::fabs: + case Intrinsic::sin: + case Intrinsic::cos: this->newRegister(&I); break; default: @@ -2854,8 +2854,6 @@ error: case GEN_OCL_FBH: case GEN_OCL_FBL: case GEN_OCL_CBIT: - case GEN_OCL_COS: - case GEN_OCL_SIN: case GEN_OCL_SQR: case GEN_OCL_RSQ: case GEN_OCL_LOG: @@ -3056,6 +3054,13 @@ error: if (Function *F = I.getCalledFunction()) { if (F->getIntrinsicID() != 0) { const ir::Function &fn = ctx.getFunction(); + + // Get the function arguments + CallSite CS(&I); + CallSite::arg_iterator AI = CS.arg_begin(); +#if GBE_DEBUG + CallSite::arg_iterator AE = CS.arg_end(); +#endif /* GBE_DEBUG */ switch (F->getIntrinsicID()) { case Intrinsic::stacksave: { @@ -3212,29 +3217,6 @@ error: } } break; - case Intrinsic::sqrt: - { - const ir::Register dst = this->getRegister(&I); - const ir::Register src = this->getRegister(I.getOperand(0)); - ctx.ALU1(ir::OP_SQR, ir::TYPE_FLOAT, dst, src); - } - break; - case Intrinsic::fabs: - { - ir::Type srcType = getType(ctx, I.getType()); - const ir::Register dst = this->getRegister(&I); - const ir::Register src = this->getRegister(I.getOperand(0)); - ctx.ALU1(ir::OP_ABS, srcType, dst, src); - } - break; - case Intrinsic::ceil: - { - ir::Type srcType = getType(ctx, I.getType()); - const ir::Register dst = this->getRegister(&I); - const ir::Register src = this->getRegister(I.getOperand(0)); - ctx.ALU1(ir::OP_RNDU, srcType, dst, src); - } - break; case Intrinsic::ctlz: { Type *llvmDstType = I.getType(); @@ -3286,19 +3268,12 @@ error: ctx.MAD(srcType, dst, src0, src1, src2); } break; - case Intrinsic::trunc: - { - Type *llvmDstType = I.getType(); - Type *llvmSrcType = I.getOperand(0)->getType(); - ir::Type dstType = getType(ctx, llvmDstType); - ir::Type srcType = getType(ctx, llvmSrcType); - GBE_ASSERT(srcType == dstType); - - const ir::Register dst = this->getRegister(&I); - const ir::Register src = this->getRegister(I.getOperand(0)); - ctx.RNDZ(dstType, dst, src); - } - break; + case Intrinsic::sqrt: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break; + case Intrinsic::ceil: this->emitUnaryCallInst(I,CS,ir::OP_RNDU); break; + case Intrinsic::fabs: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break; + case Intrinsic::trunc: this->emitUnaryCallInst(I,CS,ir::OP_RNDZ); break; + case Intrinsic::sin: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break; + case Intrinsic::cos: this->emitUnaryCallInst(I,CS,ir::OP_COS); break; default: NOT_IMPLEMENTED; } } else { @@ -3367,8 +3342,6 @@ error: ctx.REGION(dst, src, x.getIntegerValue()); break; } - case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break; - case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break; case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break; case GEN_OCL_EXP: this->emitUnaryCallInst(I,CS,ir::OP_EXP); break; case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 8ec8336..0ae7ec2 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -20,8 +20,6 @@ DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim) // Math function DECL_LLVM_GEN_FUNCTION(FABS, __gen_ocl_fabs) -DECL_LLVM_GEN_FUNCTION(COS, __gen_ocl_cos) -DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin) DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt) DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt) DECL_LLVM_GEN_FUNCTION(LOG, __gen_ocl_log) -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
