From: Lv Meng <meng...@intel.com>
Signed-off-by: Lv Meng <meng...@intel.com> --- backend/src/ocl_stdlib.tmpl.h | 172 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 170 insertions(+), 2 deletions(-) mode change 100644 => 100755 backend/src/ocl_stdlib.tmpl.h diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h old mode 100644 new mode 100755 index e5f356e..e380b79 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -1851,13 +1851,180 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_round(float x) { } INLINE_OVERLOADABLE float __gen_ocl_internal_floor(float x) { return __gen_ocl_rndd(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_ceil(float x) { return __gen_ocl_rndu(x); } -INLINE_OVERLOADABLE float __gen_ocl_internal_exp(float x) { return native_exp(x); } INLINE_OVERLOADABLE float powr(float x, float y) { return __gen_ocl_pow(x,y); } -INLINE_OVERLOADABLE float fmod(float x, float y) { return x-y*__gen_ocl_rndz(x/y); } INLINE_OVERLOADABLE float remainder(float x, float y) { return x-y*__gen_ocl_rnde(x/y); } INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) { return __gen_ocl_rnde(x); } + +typedef union{ float value; int word;} ieee_float_shape_type; + +/* Get a 32 bit int from a float. */ +#ifndef GET_FLOAT_WORD +# define GET_FLOAT_WORD(i,d) \ +do { \ + ieee_float_shape_type gf_u; \ + gf_u.value = (d); \ + (i) = gf_u.word; \ +} while (0) +#endif +/* Set a float from a 32 bit int. */ +#ifndef SET_FLOAT_WORD +# define SET_FLOAT_WORD(d,i) \ +do { \ + ieee_float_shape_type sf_u; \ + sf_u.word = (i); \ + (d) = sf_u.value; \ +} while (0) +#endif +INLINE_OVERLOADABLE float __gen_ocl_internal_exp(float x) { + //return native_exp(x); + float o_threshold = 8.8721679688e+01, /* 0x42b17180 */ + u_threshold = -1.0397208405e+02, /* 0xc2cff1b5 */ + twom100 = 7.8886090522e-31, /* 2**-100=0x0d800000 */ + ivln2 = 1.4426950216e+00, /* 0x3fb8aa3b =1/ln2 */ + one = 1.0, + huge = 1.0e+30, + P1 = 1.6666667163e-01, /* 0x3e2aaaab */ + P2 = -2.7777778450e-03, /* 0xbb360b61 */ + P3 = 6.6137559770e-05, /* 0x388ab355 */ + P4 = -1.6533901999e-06, /* 0xb5ddea0e */ + P5 = 4.1381369442e-08; /* 0x3331bb4c */ + float ln2HI[2],ln2LO[2],halF[2]; + float y,hi=0.0,lo=0.0,c,t; + int k=0,xsb; + unsigned hx; + ln2HI[0] = 6.9313812256e-01; /* 0x3f317180 */ + ln2HI[1] = -6.9313812256e-01; /* 0xbf317180 */ + ln2LO[0] = 9.0580006145e-06; /* 0x3717f7d1 */ + ln2LO[1] = -9.0580006145e-06; /* 0xb717f7d1 */ + halF[0] = 0.5; + halF[1] = -0.5; + + GET_FLOAT_WORD(hx,x); + xsb = (hx>>31)&1; /* sign bit of x */ + hx &= 0x7fffffff; /* high word of |x| */ + + /* filter out non-finite argument */ + if(hx >= 0x42b17218) { /* if |x|>=88.721... */ + if(hx>0x7f800000) + return x+x; /* NaN */ + if(hx==0x7f800000) + return (xsb==0)? x:0.0; /* exp(+-inf)={inf,0} */ + if(x > o_threshold) return huge*huge; /* overflow */ + if(x < u_threshold) return twom100*twom100; /* underflow */ + } + /* argument reduction */ + if(hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */ + if(hx < 0x3F851592) { /* and |x| < 1.5 ln2 */ + hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb; + } else { + k = ivln2*x+halF[xsb]; + t = k; + hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */ + lo = t*ln2LO[0]; + } + x = hi - lo; + } + else if(hx < 0x31800000) { /* when |x|<2**-28 */ + if(huge+x>one) return one+x;/* trigger inexact */ + } + else k = 0; + + /* x is now in primary range */ + t = x*x; + c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); + if(k==0) + return one-((x*c)/(c-(float)2.0)-x); + else + y = one-((lo-(x*c)/((float)2.0-c))-hi); + if(k >= -125) { + unsigned hy; + GET_FLOAT_WORD(hy,y); + SET_FLOAT_WORD(y,hy+(k<<23)); /* add k to y's exponent */ + return y; + } else { + unsigned hy; + GET_FLOAT_WORD(hy,y); + SET_FLOAT_WORD(y,hy+((k+100)<<23)); /* add k to y's exponent */ + return y*twom100; + } +} +INLINE_OVERLOADABLE float __gen_ocl_internal_fmod (float x, float y) { + //return x-y*__gen_ocl_rndz(x/y); + float one = 1.0; + float Zero[2]; + int n,hx,hy,hz,ix,iy,sx,i; + Zero[0] = 0.0; + Zero[1] = -0.0; + GET_FLOAT_WORD(hx,x); + GET_FLOAT_WORD(hy,y); + sx = hx&0x80000000; /* sign of x */ + hx ^=sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + /* purge off exception values */ + if(hy==0||(hx>=0x7f800000)|| /* y=0,or x not finite */ + (hy>0x7f800000)) /* or y is NaN */ + return (x*y)/(x*y); + if(hx<hy) return x; /* |x|<|y| return x */ + if(hx==hy) + return Zero[(unsigned)sx>>31]; /* |x|=|y| return x*0*/ + + /* determine ix = ilogb(x) */ + if(hx<0x00800000) { /* subnormal x */ + for (ix = -126,i=(hx<<8); i>0; i<<=1) ix -=1; + } else ix = (hx>>23)-127; + + /* determine iy = ilogb(y) */ + if(hy<0x00800000) { /* subnormal y */ + for (iy = -126,i=(hy<<8); i>=0; i<<=1) iy -=1; + } else iy = (hy>>23)-127; + + /* set up {hx,lx}, {hy,ly} and align y to x */ + if(ix >= -126) + hx = 0x00800000|(0x007fffff&hx); + else { /* subnormal x, shift x to normal */ + n = -126-ix; + hx = hx<<n; + } + if(iy >= -126) + hy = 0x00800000|(0x007fffff&hy); + else { /* subnormal y, shift y to normal */ + n = -126-iy; + hy = hy<<n; + } + /* fix point fmod */ + n = ix - iy; + while(n--) { + hz=hx-hy; + if(hz<0){hx = hx+hx;} + else { + if(hz==0) /* return sign(x)*0 */ + return Zero[(unsigned)sx>>31]; + hx = hz+hz; + } + } + hz=hx-hy; + if(hz>=0) {hx=hz;} + + /* convert back to floating value and restore the sign */ + if(hx==0) /* return sign(x)*0 */ + return Zero[(unsigned)sx>>31]; + while(hx<0x00800000) { /* normalize x */ + hx = hx+hx; + iy -= 1; + } + if(iy>= -126) { /* normalize output */ + hx = ((hx-0x00800000)|((iy+127)<<23)); + SET_FLOAT_WORD(x,hx|sx); + } else { /* subnormal output */ + n = -126 - iy; + hx >>= n; + SET_FLOAT_WORD(x,hx|sx); + x *= one; /* create necessary signal */ + } + return x; /* exact output */ +} // TODO use llvm intrinsics definitions #define cos native_cos #define cospi __gen_ocl_internal_cospi @@ -1885,6 +2052,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) { #define copysign __gen_ocl_internal_copysign #define erf __gen_ocl_internal_erf #define erfc __gen_ocl_internal_erfc +#define fmod __gen_ocl_internal_fmod PURE CONST float __gen_ocl_mad(float a, float b, float c); INLINE_OVERLOADABLE float mad(float a, float b, float c) { -- 1.7.10.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet