copy it from fdlibm Signed-off-by: rander <rander.w...@intel.com> --- backend/src/libocl/tmpl/ocl_math.tmpl.cl | 112 ++++++++++++++++++++++++++++ backend/src/libocl/tmpl/ocl_math.tmpl.h | 1 + backend/src/libocl/tmpl/ocl_math_20.tmpl.cl | 112 ++++++++++++++++++++++++++++ backend/src/libocl/tmpl/ocl_math_20.tmpl.h | 1 + 4 files changed, 226 insertions(+)
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl index 95fbf0b..a50d4db 100644 --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl @@ -4442,6 +4442,118 @@ OVERLOADABLE double fmin(double a, double b) return (c <= 0) ? a:b; } +OVERLOADABLE double fmod (double x, double y) +{ + const double one = 1.0, Zero[] = {0.0, -0.0,}; + int n,hx,hy,hz,ix,iy,sx,i; + uint lx,ly,lz; + + hx = __HI(x); + lx = __LO(x); + hy = __HI(y); + ly = __LO(y); + sx = hx&0x80000000; /* sign of x */ + hx ^=sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values */ + if((hy|ly)==0||(hx>=0x7ff00000)|| /* y=0,or x not finite */ + ((hy|((ly|-ly)>>31))>0x7ff00000)) /* or y is NaN */ + return (x*y)/(x*y); + if(hx<=hy) { + if((hx<hy)||(lx<ly)) return x; /* |x|<|y| return x */ + if(lx==ly) + return Zero[(uint)sx>>31]; /* |x|=|y| return x*0*/ + } + + /* determine ix = ilogb(x) */ + if(hx<0x00100000) { /* subnormal x */ + if(hx==0) { + for (ix = -1043, i=lx; i>0; i<<=1) ix -=1; + } else { + for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1; + } + } else ix = (hx>>20)-1023; + + /* determine iy = ilogb(y) */ + if(hy<0x00100000) { /* subnormal y */ + if(hy==0) { + for (iy = -1043, i=ly; i>0; i<<=1) iy -=1; + } else { + for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1; + } + } else iy = (hy>>20)-1023; + + /* set up {hx,lx}, {hy,ly} and align y to x */ + if(ix >= -1022) + hx = 0x00100000|(0x000fffff&hx); + else { /* subnormal x, shift x to normal */ + n = -1022-ix; + if(n<=31) { + hx = (hx<<n)|(lx>>(32-n)); + lx <<= n; + } else { + hx = lx<<(n-32); + lx = 0; + } + } + if(iy >= -1022) + hy = 0x00100000|(0x000fffff&hy); + else { /* subnormal y, shift y to normal */ + n = -1022-iy; + if(n<=31) { + hy = (hy<<n)|(ly>>(32-n)); + ly <<= n; + } else { + hy = ly<<(n-32); + ly = 0; + } + } + + /* fix point fmod */ + n = ix - iy; + while(n--) { + hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1; + if(hz<0){hx = hx+hx+(lx>>31); lx = lx+lx;} + else { + if((hz|lz)==0) /* return sign(x)*0 */ + return Zero[(uint)sx>>31]; + hx = hz+hz+(lz>>31); lx = lz+lz; + } + } + hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1; + if(hz>=0) {hx=hz;lx=lz;} + + /* convert back to floating value and restore the sign */ + if((hx|lx)==0) /* return sign(x)*0 */ + return Zero[(uint)sx>>31]; + while(hx<0x00100000) { /* normalize x */ + hx = hx+hx+(lx>>31); lx = lx+lx; + iy -= 1; + } + if(iy>= -1022) { /* normalize output */ + hx = ((hx-0x00100000)|((iy+1023)<<20)); + __setHigh(&x,hx|sx); + __setLow(&x, lx); + } else { /* subnormal output */ + n = -1022 - iy; + if(n<=20) { + lx = (lx>>n)|((uint)hx<<(32-n)); + hx >>= n; + } else if (n<=31) { + lx = (hx<<(32-n))|(lx>>n); hx = sx; + } else { + lx = hx>>(n-32); hx = sx; + } + __setHigh(&x,hx|sx); + __setLow(&x, lx); + + x *= one; /* create necessary signal */ + } + return x; /* exact output */ + +} + OVERLOADABLE double rint(double x) { long ret; diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.h b/backend/src/libocl/tmpl/ocl_math.tmpl.h index 2e2938a..aee332c 100644 --- a/backend/src/libocl/tmpl/ocl_math.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.h @@ -240,6 +240,7 @@ OVERLOADABLE double fdim(double x, double y); OVERLOADABLE double floor(double x); OVERLOADABLE double fmax(double a, double b); OVERLOADABLE double fmin(double a, double b); +OVERLOADABLE double fmod (double x, double y); OVERLOADABLE double fract(double x, global double *p); OVERLOADABLE double fract(double x, local double *p); OVERLOADABLE double fract(double x, private double *p); diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl index e639e27..a084a88 100644 --- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl @@ -4318,6 +4318,118 @@ OVERLOADABLE double fmin(double a, double b) return (c <= 0) ? a:b; } +OVERLOADABLE double fmod (double x, double y) +{ + const double one = 1.0, Zero[] = {0.0, -0.0,}; + int n,hx,hy,hz,ix,iy,sx,i; + uint lx,ly,lz; + + hx = __HI(x); + lx = __LO(x); + hy = __HI(y); + ly = __LO(y); + sx = hx&0x80000000; /* sign of x */ + hx ^=sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values */ + if((hy|ly)==0||(hx>=0x7ff00000)|| /* y=0,or x not finite */ + ((hy|((ly|-ly)>>31))>0x7ff00000)) /* or y is NaN */ + return (x*y)/(x*y); + if(hx<=hy) { + if((hx<hy)||(lx<ly)) return x; /* |x|<|y| return x */ + if(lx==ly) + return Zero[(uint)sx>>31]; /* |x|=|y| return x*0*/ + } + + /* determine ix = ilogb(x) */ + if(hx<0x00100000) { /* subnormal x */ + if(hx==0) { + for (ix = -1043, i=lx; i>0; i<<=1) ix -=1; + } else { + for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1; + } + } else ix = (hx>>20)-1023; + + /* determine iy = ilogb(y) */ + if(hy<0x00100000) { /* subnormal y */ + if(hy==0) { + for (iy = -1043, i=ly; i>0; i<<=1) iy -=1; + } else { + for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1; + } + } else iy = (hy>>20)-1023; + + /* set up {hx,lx}, {hy,ly} and align y to x */ + if(ix >= -1022) + hx = 0x00100000|(0x000fffff&hx); + else { /* subnormal x, shift x to normal */ + n = -1022-ix; + if(n<=31) { + hx = (hx<<n)|(lx>>(32-n)); + lx <<= n; + } else { + hx = lx<<(n-32); + lx = 0; + } + } + if(iy >= -1022) + hy = 0x00100000|(0x000fffff&hy); + else { /* subnormal y, shift y to normal */ + n = -1022-iy; + if(n<=31) { + hy = (hy<<n)|(ly>>(32-n)); + ly <<= n; + } else { + hy = ly<<(n-32); + ly = 0; + } + } + + /* fix point fmod */ + n = ix - iy; + while(n--) { + hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1; + if(hz<0){hx = hx+hx+(lx>>31); lx = lx+lx;} + else { + if((hz|lz)==0) /* return sign(x)*0 */ + return Zero[(uint)sx>>31]; + hx = hz+hz+(lz>>31); lx = lz+lz; + } + } + hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1; + if(hz>=0) {hx=hz;lx=lz;} + + /* convert back to floating value and restore the sign */ + if((hx|lx)==0) /* return sign(x)*0 */ + return Zero[(uint)sx>>31]; + while(hx<0x00100000) { /* normalize x */ + hx = hx+hx+(lx>>31); lx = lx+lx; + iy -= 1; + } + if(iy>= -1022) { /* normalize output */ + hx = ((hx-0x00100000)|((iy+1023)<<20)); + __setHigh(&x,hx|sx); + __setLow(&x, lx); + } else { /* subnormal output */ + n = -1022 - iy; + if(n<=20) { + lx = (lx>>n)|((uint)hx<<(32-n)); + hx >>= n; + } else if (n<=31) { + lx = (hx<<(32-n))|(lx>>n); hx = sx; + } else { + lx = hx>>(n-32); hx = sx; + } + __setHigh(&x,hx|sx); + __setLow(&x, lx); + + x *= one; /* create necessary signal */ + } + return x; /* exact output */ + +} + OVERLOADABLE double rint(double x) { long ret; diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.h b/backend/src/libocl/tmpl/ocl_math_20.tmpl.h index 4ef159c..d503d40 100644 --- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.h @@ -216,6 +216,7 @@ OVERLOADABLE double fabs(double x); OVERLOADABLE double fdim(double x, double y); OVERLOADABLE double fmax(double a, double b); OVERLOADABLE double fmin(double a, double b); +OVERLOADABLE double fmod (double x, double y); OVERLOADABLE double floor(double x); OVERLOADABLE double fract(double x, global double *p); OVERLOADABLE double fract(double x, local double *p); -- 2.7.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet