This version LGTM, will push latter, thanks.
On Fri, Jan 30, 2015 at 02:24:31PM +0800, [email protected] wrote: > From: Luo Xionghu <[email protected]> > > replace sin/cos non-stardard math intrinsic call with llvm intrinsic. > translate them to llvm.xxx for fast path, refine the calls to call > emitUnaryCallInst. > v2: some file changes like ocl_math.tmpl.cl and hxx missed. > > Signed-off-by: Luo Xionghu <[email protected]> > --- > backend/src/libocl/tmpl/ocl_math.tmpl.cl | 4 +- > backend/src/llvm/llvm_gen_backend.cpp | 59 > ++++++++---------------------- > backend/src/llvm/llvm_gen_ocl_function.hxx | 2 - > 3 files changed, 18 insertions(+), 47 deletions(-) > > diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl > b/backend/src/libocl/tmpl/ocl_math.tmpl.cl > index 49c4efa..8f726ff 100644 > --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl > +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl > @@ -24,8 +24,8 @@ > constant int __ocl_math_fastpath_flag = 1; > > PURE CONST float __gen_ocl_fabs(float x); > -PURE CONST float __gen_ocl_sin(float x); > -PURE CONST float __gen_ocl_cos(float x); > +CONST float __gen_ocl_sin(float x) __asm("llvm.sin" ".f32"); > +CONST float __gen_ocl_cos(float x) __asm("llvm.cos" ".f32"); > PURE CONST float __gen_ocl_sqrt(float x); > PURE CONST float __gen_ocl_rsqrt(float x); > PURE CONST float __gen_ocl_log(float x); > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index d10d5c0..34c571e 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -2794,13 +2794,13 @@ error: > case Intrinsic::bswap: > this->newRegister(&I); > break; > + case Intrinsic::fabs: > case Intrinsic::sqrt: > case Intrinsic::ceil: > case Intrinsic::fma: > case Intrinsic::trunc: > - this->newRegister(&I); > - break; > - case Intrinsic::fabs: > + case Intrinsic::sin: > + case Intrinsic::cos: > this->newRegister(&I); > break; > default: > @@ -2854,8 +2854,6 @@ error: > case GEN_OCL_FBH: > case GEN_OCL_FBL: > case GEN_OCL_CBIT: > - case GEN_OCL_COS: > - case GEN_OCL_SIN: > case GEN_OCL_SQR: > case GEN_OCL_RSQ: > case GEN_OCL_LOG: > @@ -3056,6 +3054,13 @@ error: > if (Function *F = I.getCalledFunction()) { > if (F->getIntrinsicID() != 0) { > const ir::Function &fn = ctx.getFunction(); > + > + // Get the function arguments > + CallSite CS(&I); > + CallSite::arg_iterator AI = CS.arg_begin(); > +#if GBE_DEBUG > + CallSite::arg_iterator AE = CS.arg_end(); > +#endif /* GBE_DEBUG */ > switch (F->getIntrinsicID()) { > case Intrinsic::stacksave: > { > @@ -3212,29 +3217,6 @@ error: > } > } > break; > - case Intrinsic::sqrt: > - { > - const ir::Register dst = this->getRegister(&I); > - const ir::Register src = this->getRegister(I.getOperand(0)); > - ctx.ALU1(ir::OP_SQR, ir::TYPE_FLOAT, dst, src); > - } > - break; > - case Intrinsic::fabs: > - { > - ir::Type srcType = getType(ctx, I.getType()); > - const ir::Register dst = this->getRegister(&I); > - const ir::Register src = this->getRegister(I.getOperand(0)); > - ctx.ALU1(ir::OP_ABS, srcType, dst, src); > - } > - break; > - case Intrinsic::ceil: > - { > - ir::Type srcType = getType(ctx, I.getType()); > - const ir::Register dst = this->getRegister(&I); > - const ir::Register src = this->getRegister(I.getOperand(0)); > - ctx.ALU1(ir::OP_RNDU, srcType, dst, src); > - } > - break; > case Intrinsic::ctlz: > { > Type *llvmDstType = I.getType(); > @@ -3286,19 +3268,12 @@ error: > ctx.MAD(srcType, dst, src0, src1, src2); > } > break; > - case Intrinsic::trunc: > - { > - Type *llvmDstType = I.getType(); > - Type *llvmSrcType = I.getOperand(0)->getType(); > - ir::Type dstType = getType(ctx, llvmDstType); > - ir::Type srcType = getType(ctx, llvmSrcType); > - GBE_ASSERT(srcType == dstType); > - > - const ir::Register dst = this->getRegister(&I); > - const ir::Register src = this->getRegister(I.getOperand(0)); > - ctx.RNDZ(dstType, dst, src); > - } > - break; > + case Intrinsic::sqrt: this->emitUnaryCallInst(I,CS,ir::OP_SQR); > break; > + case Intrinsic::ceil: this->emitUnaryCallInst(I,CS,ir::OP_RNDU); > break; > + case Intrinsic::fabs: this->emitUnaryCallInst(I,CS,ir::OP_ABS); > break; > + case Intrinsic::trunc: this->emitUnaryCallInst(I,CS,ir::OP_RNDZ); > break; > + case Intrinsic::sin: this->emitUnaryCallInst(I,CS,ir::OP_SIN); > break; > + case Intrinsic::cos: this->emitUnaryCallInst(I,CS,ir::OP_COS); > break; > default: NOT_IMPLEMENTED; > } > } else { > @@ -3367,8 +3342,6 @@ error: > ctx.REGION(dst, src, x.getIntegerValue()); > break; > } > - case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break; > - case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break; > case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break; > case GEN_OCL_EXP: this->emitUnaryCallInst(I,CS,ir::OP_EXP); break; > case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break; > diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx > b/backend/src/llvm/llvm_gen_ocl_function.hxx > index 8ec8336..0ae7ec2 100644 > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx > @@ -20,8 +20,6 @@ DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim) > > // Math function > DECL_LLVM_GEN_FUNCTION(FABS, __gen_ocl_fabs) > -DECL_LLVM_GEN_FUNCTION(COS, __gen_ocl_cos) > -DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin) > DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt) > DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt) > DECL_LLVM_GEN_FUNCTION(LOG, __gen_ocl_log) > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
