Test pass with multi-thread disabled. Thanks. -----Original Message----- From: [email protected] [mailto:[email protected]] On Behalf Of Lv Meng Sent: Friday, December 20, 2013 10:37 AM To: [email protected] Cc: Lv, Meng Subject: [Beignet] [PATCH] [PATCH]GBE: improve precision of tanh
Signed-off-by: Lv Meng <[email protected]> --- backend/src/ocl_stdlib.tmpl.h | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index da8b411..4c14f63 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -1662,10 +1662,6 @@ INLINE_OVERLOADABLE float sincos(float x, private float *cosval) { BODY; } INLINE_OVERLOADABLE float __gen_ocl_internal_cosh(float x) { return (1 + native_exp(-2 * x)) / (2 * native_exp(-x)); } -INLINE_OVERLOADABLE float __gen_ocl_internal_tanh(float x) { - float y = native_exp(-2 * x); - return (1 - y) / (1 + y); -} INLINE float __gen_ocl_asin_util(float x) { /* @@ -2209,6 +2205,41 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_sinh(float x){ return x*shuge; } +INLINE_OVERLOADABLE float __gen_ocl_internal_tanh(float x) { + //float y = native_exp(-2 * x); + //return (1 - y) / (1 + y); + float one=1.0, two=2.0, tiny = 1.0e-30; + float t,z; + int jx,ix; + GEN_OCL_GET_FLOAT_WORD(jx,x); + ix = jx&0x7fffffff; + /* x is INF or NaN */ + if(ix>=0x7f800000) { + if (jx>=0) + return one/x+one; /* tanh(+-inf)=+-1 */ + else + return one/x-one; /* tanh(NaN) = NaN */ + } + + if (ix < 0x41b00000) { /* |x|<22 */ + if (ix == 0) + return x; /* x == +-0 */ + if (ix<0x24000000) /* |x|<2**-55 */ + return x*(one+x); /* tanh(small) = small */ + if (ix>=0x3f800000) { /* |x|>=1 */ + t = __gen_ocl_internal_expm1(two*__gen_ocl_internal_fabs(x)); + z = one - two/(t+two); + } else { + t = __gen_ocl_internal_expm1(-two*__gen_ocl_internal_fabs(x)); + z= -t/(t+two); + } + } else { /* |x| > 22, return +-1 */ + z = one - tiny; /* raised inexact flag */ + } + return (jx>=0)? z: -z; +} + + // TODO use llvm intrinsics definitions #define cos native_cos #define cospi __gen_ocl_internal_cospi -- 1.7.10.4 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
