Pushed, thanks for the patch and review.
On Thu, Apr 23, 2015 at 04:30:19AM +0000, Song, Ruiling wrote: > Hi Rebecca, > > Thanks for the patch, it looks good. > I find that even with your patch the builtin_tgamma still fails. It turns out > to be the issue of the test case. > So I send a patch "[PATCH] utests: fix test case builtin_tgamma." to fix it. > > Thanks! > Ruiling > > -----Original Message----- > > From: Beignet [mailto:[email protected]] On Behalf Of > > Rebecca N. Palmer > > Sent: Friday, April 17, 2015 8:48 PM > > To: [email protected] > > Subject: [Beignet] [PATCH] Make tgamma meet the accuracy standard > > > > The old tgamma=exp(lgamma) implementation had high rounding error on > > large outputs, exceeding the 16ulp specification for approx. x>8 (hence the > > test failure in strict conformance mode). > > > > Replace this with an implementation based on glibc's > > http://sources.debian.net/src/glibc/2.19-17/sysdeps/ieee754/flt-32/e_gam > > maf_r.c/ > > > > Signed-off-by: Rebecca Palmer <[email protected]> > > > > diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl > > b/backend/src/libocl/tmpl/ocl_math.tmpl.cl > > index da5b9a9..49edd95 100644 > > --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl > > +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl > > @@ -1744,12 +1744,6 @@ OVERLOADABLE float > > __gen_ocl_internal_exp(float x) { > > } > > } > > > > -INLINE_OVERLOADABLE float tgamma(float x) { > > - float y; > > - int s; > > - y=lgamma_r(x,&s); > > - return __gen_ocl_internal_exp(y)*s; > > -} > > > > /* erf,erfc from glibc s_erff.c -- float version of s_erf.c. > > * Conversion to float by Ian Lance Taylor, Cygnus Support, > > [email protected]. > > @@ -3165,6 +3159,95 @@ float __gen_ocl_internal_pown(float x, int y) { > > return sn*z; > > } > > > > +OVERLOADABLE float tgamma (float x) > > +{ > > + /* based on glibc __ieee754_gammaf_r by Ulrich Drepper > > +<[email protected]> */ > > + > > + unsigned int hx; > > + GEN_OCL_GET_FLOAT_WORD(hx,x); > > + if (hx == 0xff800000) > > + { > > + /* x == -Inf. According to ISO this is NaN. */ > > + return NAN; > > + } > > + if ((hx & 0x7f800000) == 0x7f800000) > > + { > > + /* Positive infinity (return positive infinity) or NaN (return > > + NaN). */ > > + return x; > > + } > > + if (x < 0.0f && __gen_ocl_internal_floor (x) == x) > > + { > > + /* integer x < 0 */ > > + return NAN; > > + } > > + > > + if (x >= 36.0f) > > + { > > + /* Overflow. */ > > + return INFINITY; > > + } > > + else if (x <= 0.0f && x >= -FLT_EPSILON / 4.0f) > > + { > > + return 1.0f / x; > > + } > > + else > > + { > > + float sinpix = __gen_ocl_internal_sinpi(x); > > + if (x <= -42.0f) > > + /* Underflow. */ > > + {return 0.0f * sinpix /*for sign*/;} > > + int exp2_adj = 0; > > + float x_abs = __gen_ocl_fabs(x); > > + float gam0; > > + > > + if (x_abs < 4.0f) { > > + /* gamma = exp(lgamma) is only accurate for small lgamma */ > > + float prod,x_adj; > > + if (x_abs < 0.5f) { > > + prod = 1.0f / x_abs; > > + x_adj = x_abs + 1.0f; > > + } else if (x_abs <= 1.5f) { > > + prod = 1.0f; > > + x_adj = x_abs; > > + } else if (x_abs < 2.5f) { > > + x_adj = x_abs - 1.0f; > > + prod = x_adj; > > + } else { > > + x_adj = x_abs - 2.0f; > > + prod = x_adj * (x_abs - 1.0f); > > + } > > + gam0 = __gen_ocl_internal_exp (lgamma (x_adj)) * prod; > > + } > > + else { > > + /* Compute gamma (X) using Stirling's approximation, > > + starting by computing pow (X, X) with a power of 2 > > + factored out to avoid intermediate overflow. */ > > + float x_int = __gen_ocl_internal_round (x_abs); > > + float x_frac = x_abs - x_int; > > + int x_log2; > > + float x_mant = frexp (x_abs, &x_log2); > > + if (x_mant < M_SQRT1_2_F) > > + { > > + x_log2--; > > + x_mant *= 2.0f; > > + } > > + exp2_adj = x_log2 * (int) x_int; > > + float ret = (__gen_ocl_internal_pow(x_mant, x_abs) > > + * exp2 (x_log2 * x_frac) > > + * __gen_ocl_internal_exp (-x_abs) > > + * sqrt (2.0f * M_PI_F / x_abs) ); > > + > > + float x2 = x_abs * x_abs; > > + float bsum = (0x3.403404p-12f / x2 -0xb.60b61p-12f) / x2 + > > 0x1.555556p-4f; > > + gam0 = ret + ret * __gen_ocl_internal_expm1 (bsum / x_abs); > > + } > > + if (x > 0.0f) {return __gen_ocl_internal_ldexp (gam0, exp2_adj);} > > + float gam1 = M_PI_F / (-x * sinpix * gam0); > > + return __gen_ocl_internal_ldexp (gam1, -exp2_adj); > > + } > > +} > > + > > OVERLOADABLE float hypot(float x, float y) { > > if (__ocl_math_fastpath_flag) > > return __gen_ocl_internal_fastpath_hypot(x, y); > > > > _______________________________________________ > > Beignet mailing list > > [email protected] > > http://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
