copy it from fdlibm

Signed-off-by: rander <rander.w...@intel.com>
---
 backend/src/libocl/tmpl/ocl_math.tmpl.cl    | 112 ++++++++++++++++++++++++++++
 backend/src/libocl/tmpl/ocl_math.tmpl.h     |   1 +
 backend/src/libocl/tmpl/ocl_math_20.tmpl.cl | 112 ++++++++++++++++++++++++++++
 backend/src/libocl/tmpl/ocl_math_20.tmpl.h  |   1 +
 4 files changed, 226 insertions(+)

diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index 95fbf0b..a50d4db 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -4442,6 +4442,118 @@ OVERLOADABLE double fmin(double a, double b)
        return (c <= 0) ? a:b;
 }
 
+OVERLOADABLE double fmod (double x, double y)
+{
+       const double one = 1.0, Zero[] = {0.0, -0.0,};
+       int n,hx,hy,hz,ix,iy,sx,i;
+       uint lx,ly,lz;
+
+       hx = __HI(x);
+       lx = __LO(x);
+       hy = __HI(y);
+       ly = __LO(y);
+       sx = hx&0x80000000;             /* sign of x */
+       hx ^=sx;                /* |x| */
+       hy &= 0x7fffffff;       /* |y| */
+
+    /* purge off exception values */
+       if((hy|ly)==0||(hx>=0x7ff00000)||       /* y=0,or x not finite */
+         ((hy|((ly|-ly)>>31))>0x7ff00000))     /* or y is NaN */
+           return (x*y)/(x*y);
+       if(hx<=hy) {
+           if((hx<hy)||(lx<ly)) return x;      /* |x|<|y| return x */
+           if(lx==ly) 
+               return Zero[(uint)sx>>31];      /* |x|=|y| return x*0*/
+       }
+
+    /* determine ix = ilogb(x) */
+       if(hx<0x00100000) {     /* subnormal x */
+           if(hx==0) {
+               for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
+           } else {
+               for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
+           }
+       } else ix = (hx>>20)-1023;
+
+    /* determine iy = ilogb(y) */
+       if(hy<0x00100000) {     /* subnormal y */
+           if(hy==0) {
+               for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
+           } else {
+               for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
+           }
+       } else iy = (hy>>20)-1023;
+
+    /* set up {hx,lx}, {hy,ly} and align y to x */
+       if(ix >= -1022) 
+           hx = 0x00100000|(0x000fffff&hx);
+       else {          /* subnormal x, shift x to normal */
+           n = -1022-ix;
+           if(n<=31) {
+               hx = (hx<<n)|(lx>>(32-n));
+               lx <<= n;
+           } else {
+               hx = lx<<(n-32);
+               lx = 0;
+           }
+       }
+       if(iy >= -1022) 
+           hy = 0x00100000|(0x000fffff&hy);
+       else {          /* subnormal y, shift y to normal */
+           n = -1022-iy;
+           if(n<=31) {
+               hy = (hy<<n)|(ly>>(32-n));
+               ly <<= n;
+           } else {
+               hy = ly<<(n-32);
+               ly = 0;
+           }
+       }
+
+    /* fix point fmod */
+       n = ix - iy;
+       while(n--) {
+           hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+           if(hz<0){hx = hx+hx+(lx>>31); lx = lx+lx;}
+           else {
+               if((hz|lz)==0)          /* return sign(x)*0 */
+                   return Zero[(uint)sx>>31];
+               hx = hz+hz+(lz>>31); lx = lz+lz;
+           }
+       }
+       hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+       if(hz>=0) {hx=hz;lx=lz;}
+
+    /* convert back to floating value and restore the sign */
+       if((hx|lx)==0)                  /* return sign(x)*0 */
+           return Zero[(uint)sx>>31];  
+       while(hx<0x00100000) {          /* normalize x */
+           hx = hx+hx+(lx>>31); lx = lx+lx;
+           iy -= 1;
+       }
+       if(iy>= -1022) {        /* normalize output */
+               hx = ((hx-0x00100000)|((iy+1023)<<20));
+               __setHigh(&x,hx|sx);
+               __setLow(&x, lx);
+       } else {                /* subnormal output */
+           n = -1022 - iy;
+           if(n<=20) {
+               lx = (lx>>n)|((uint)hx<<(32-n));
+               hx >>= n;
+           } else if (n<=31) {
+               lx = (hx<<(32-n))|(lx>>n); hx = sx;
+           } else {
+               lx = hx>>(n-32); hx = sx;
+           }
+       __setHigh(&x,hx|sx);
+       __setLow(&x, lx);
+
+           x *= one;           /* create necessary signal */
+       }
+       return x;               /* exact output */
+
+}
+
 OVERLOADABLE double rint(double x)
 {
        long ret;
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.h 
b/backend/src/libocl/tmpl/ocl_math.tmpl.h
index 2e2938a..aee332c 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.h
@@ -240,6 +240,7 @@ OVERLOADABLE double fdim(double x, double y);
 OVERLOADABLE double floor(double x);
 OVERLOADABLE double fmax(double a, double b);
 OVERLOADABLE double fmin(double a, double b);
+OVERLOADABLE double fmod (double x, double y);
 OVERLOADABLE double fract(double x, global double *p);
 OVERLOADABLE double fract(double x, local double *p);
 OVERLOADABLE double fract(double x, private double *p);
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
index e639e27..a084a88 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
@@ -4318,6 +4318,118 @@ OVERLOADABLE double fmin(double a, double b)
        return (c <= 0) ? a:b;
 }
 
+OVERLOADABLE double fmod (double x, double y)
+{
+       const double one = 1.0, Zero[] = {0.0, -0.0,};
+       int n,hx,hy,hz,ix,iy,sx,i;
+       uint lx,ly,lz;
+
+       hx = __HI(x);
+       lx = __LO(x);
+       hy = __HI(y);
+       ly = __LO(y);
+       sx = hx&0x80000000;             /* sign of x */
+       hx ^=sx;                /* |x| */
+       hy &= 0x7fffffff;       /* |y| */
+
+    /* purge off exception values */
+       if((hy|ly)==0||(hx>=0x7ff00000)||       /* y=0,or x not finite */
+         ((hy|((ly|-ly)>>31))>0x7ff00000))     /* or y is NaN */
+           return (x*y)/(x*y);
+       if(hx<=hy) {
+           if((hx<hy)||(lx<ly)) return x;      /* |x|<|y| return x */
+           if(lx==ly) 
+               return Zero[(uint)sx>>31];      /* |x|=|y| return x*0*/
+       }
+
+    /* determine ix = ilogb(x) */
+       if(hx<0x00100000) {     /* subnormal x */
+           if(hx==0) {
+               for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
+           } else {
+               for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
+           }
+       } else ix = (hx>>20)-1023;
+
+    /* determine iy = ilogb(y) */
+       if(hy<0x00100000) {     /* subnormal y */
+           if(hy==0) {
+               for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
+           } else {
+               for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
+           }
+       } else iy = (hy>>20)-1023;
+
+    /* set up {hx,lx}, {hy,ly} and align y to x */
+       if(ix >= -1022) 
+           hx = 0x00100000|(0x000fffff&hx);
+       else {          /* subnormal x, shift x to normal */
+           n = -1022-ix;
+           if(n<=31) {
+               hx = (hx<<n)|(lx>>(32-n));
+               lx <<= n;
+           } else {
+               hx = lx<<(n-32);
+               lx = 0;
+           }
+       }
+       if(iy >= -1022) 
+           hy = 0x00100000|(0x000fffff&hy);
+       else {          /* subnormal y, shift y to normal */
+           n = -1022-iy;
+           if(n<=31) {
+               hy = (hy<<n)|(ly>>(32-n));
+               ly <<= n;
+           } else {
+               hy = ly<<(n-32);
+               ly = 0;
+           }
+       }
+
+    /* fix point fmod */
+       n = ix - iy;
+       while(n--) {
+           hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+           if(hz<0){hx = hx+hx+(lx>>31); lx = lx+lx;}
+           else {
+               if((hz|lz)==0)          /* return sign(x)*0 */
+                   return Zero[(uint)sx>>31];
+               hx = hz+hz+(lz>>31); lx = lz+lz;
+           }
+       }
+       hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
+       if(hz>=0) {hx=hz;lx=lz;}
+
+    /* convert back to floating value and restore the sign */
+       if((hx|lx)==0)                  /* return sign(x)*0 */
+           return Zero[(uint)sx>>31];  
+       while(hx<0x00100000) {          /* normalize x */
+           hx = hx+hx+(lx>>31); lx = lx+lx;
+           iy -= 1;
+       }
+       if(iy>= -1022) {        /* normalize output */
+               hx = ((hx-0x00100000)|((iy+1023)<<20));
+               __setHigh(&x,hx|sx);
+               __setLow(&x, lx);
+       } else {                /* subnormal output */
+           n = -1022 - iy;
+           if(n<=20) {
+               lx = (lx>>n)|((uint)hx<<(32-n));
+               hx >>= n;
+           } else if (n<=31) {
+               lx = (hx<<(32-n))|(lx>>n); hx = sx;
+           } else {
+               lx = hx>>(n-32); hx = sx;
+           }
+       __setHigh(&x,hx|sx);
+       __setLow(&x, lx);
+
+           x *= one;           /* create necessary signal */
+       }
+       return x;               /* exact output */
+
+}
+
 OVERLOADABLE double rint(double x)
 {
        long ret;
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.h 
b/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
index 4ef159c..d503d40 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
@@ -216,6 +216,7 @@ OVERLOADABLE double fabs(double x);
 OVERLOADABLE double fdim(double x, double y);
 OVERLOADABLE double fmax(double a, double b);
 OVERLOADABLE double fmin(double a, double b);
+OVERLOADABLE double fmod (double x, double y);
 OVERLOADABLE double floor(double x);
 OVERLOADABLE double fract(double x, global double *p);
 OVERLOADABLE double fract(double x, local double *p);
-- 
2.7.4

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to