Pushed, thanks.

> -----Original Message-----
> From: Beignet [mailto:[email protected]] On Behalf Of
> Pan, Xiuli
> Sent: Wednesday, June 7, 2017 15:53
> To: Wang, Rander <[email protected]>; [email protected]
> Cc: Wang, Rander <[email protected]>
> Subject: Re: [Beignet] [PATCH] backend: refine hypot function
> 
> LGTM.
> Only test about correctness performance need recheck.
> 
> -----Original Message-----
> From: Beignet [mailto:[email protected]] On Behalf Of
> rander.wang
> Sent: Thursday, May 18, 2017 16:18
> To: [email protected]
> Cc: Wang, Rander <[email protected]>
> Subject: [Beignet] [PATCH] backend: refine hypot function
> 
>        the test OCL_Magnitude of opencv is slow on beignet because
>          of hypot. refine the hypot, change algorithm and remove
>          unnecessary code to get 30% up
> 
> Signed-off-by: rander.wang <[email protected]>
> ---
>  backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 75
> ++++++++++++++++++++-----
>  1 file changed, 61 insertions(+), 14 deletions(-)
> 
> diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
> b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
> index 6b942db..ab03cb4 100644
> --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
> +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
> @@ -2894,12 +2894,36 @@ float __gen_ocl_internal_pown(float x, int y) {
>    return as_float((a & (0x807FFFFFu)) | (u & 0x80000000u) | 0x3F000000);
> float __gen_ocl_internal_frexp(float x, int *exp) { BODY; }
> 
> +float __fast_scalbnf (float x, int n){
> +  /* copy from fdlibm */
> +  float two25 = 3.355443200e+07,  /* 0x4c000000 */
> +  twom25 = 2.9802322388e-08,          /* 0x33000000 */
> +  huge = 1.0e+30,
> +  tiny = 1.0e-30;
> +  int k,ix,t,tmp;
> +  float retVal;
> +
> +  GEN_OCL_GET_FLOAT_WORD(ix,x);
> +  k = (ix&0x7f800000)>>23; /* extract exponent */  t = k;  k = k+n; tmp
> + = (ix&0x807fffff);  x = as_float(tmp |(k << 23));  retVal = (k > 0)?
> + x:0.0f;  retVal = (k > 0xfe)? INFINITY:retVal;  retVal = (k <= -25)?
> + 0.0f:retVal;  x = as_float(tmp | ((k + 25) << 23));  retVal = ((k > 0)
> + && (k <= 25)) ? x*twom25:retVal;  retVal = (t == 0)?
> + 0.0f:retVal;
> +
> +  return retVal;
> +}
> +
>  OVERLOADABLE float hypot(float x, float y) {
>    if (__ocl_math_fastpath_flag)
>      return __gen_ocl_internal_fastpath_hypot(x, y);
> 
> -  //return __gen_ocl_sqrt(x*x + y*y);
> -  float a,b,an,bn,cn;
> +  float a,b,an,bn,cn, retVal;
>    int e;
>    if (isfinite (x) && isfinite (y)){      /* Determine absolute values.  */
>    x = __gen_ocl_fabs (x);
> @@ -2907,19 +2931,42 @@ OVERLOADABLE float hypot(float x, float y) {
>    /* Find the bigger and the smaller one.  */
>    a = max(x,y);
>    b = min(x,y);
> -  /* Now 0 <= b <= a.  */
> -  /* Write a = an * 2^e, b = bn * 2^e with 0 <= bn <= an < 1.  */
> -  an = __gen_ocl_internal_frexp (a, &e);
> -  bn = ldexp (b, - e);
> -  /* Through the normalization, no unneeded overflow or underflow will
> occur here.  */
> -  cn = __gen_ocl_sqrt (an * an + bn * bn);
> -  return ldexp (cn, e);
> -  }else{
> -    if (isinf (x) || isinf (y))  /* x or y is infinite.  Return +Infinity.  
> */
> -      return INFINITY;
> -    else        /* x or y is NaN.  Return NaN.  */
> -      return x + y;
> +
> +   bool skip = false;
> +     uint u = as_uint(a);
> +     uint x = u;
> +     if (x == 0) {
> +       e = 0;
> +       an = x;
> +      skip = true;
> +     }
> +
> +     if (x >= 0x800000) {
> +       e = (x >> 23) - 126;
> +       an = as_float((u & (0x807FFFFFu)) | 0x3F000000);
> +     skip = true;
> +     }
> +
> +   if(!skip)
> +    {
> +      int msbOne = clz(x);
> +      x <<= (msbOne -8);
> +      e = -117 -msbOne;
> +      an = as_float((x & (0x807FFFFFu)) | 0x3F000000);
> +    }
> +
> +     bn = __fast_scalbnf (b, - e);
> +     /* Through the normalization, no unneeded overflow or underflow
> will occur here.  */
> +     cn = __gen_ocl_sqrt (mad(an, an,  bn * bn));
> +     retVal = __fast_scalbnf (cn, e);
>    }
> +  else
> +  {
> +    retVal = NAN; /* x or y is NaN.  Return NaN.  */
> +    retVal = (isinf (x) || isinf (y)) ?  INFINITY:retVal; /* x or y is
> + infinite.  Return +Infinity.  */  }
> +
> +  return retVal;
>  }
> 
>  OVERLOADABLE float powr(float x, float y) {
> --
> 2.7.4
> 
> _______________________________________________
> Beignet mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/beignet
_______________________________________________
Beignet mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to