From: Grigore Lupescu <grigore.lupescu at intel.com>

Optimization for exp10, log2, log and log10.

Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
 backend/src/libocl/include/ocl_float.h   |  1 +
 backend/src/libocl/tmpl/ocl_math.tmpl.cl | 30 +++++++++++++++++++++++-------
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/backend/src/libocl/include/ocl_float.h 
b/backend/src/libocl/include/ocl_float.h
index e63eaf9..6be6c7c 100644
--- a/backend/src/libocl/include/ocl_float.h
+++ b/backend/src/libocl/include/ocl_float.h
@@ -81,6 +81,7 @@ INLINE_OVERLOADABLE int __ocl_finitef (float x){
 #define M_E_F        2.718281828459045F
 #define M_LOG2E_F    1.4426950408889634F
 #define M_LOG10E_F   0.43429448190325176F
+#define M_LOG210_F   3.3219280948873626F
 #define M_LN2_F      0.6931471805599453F
 #define M_LN10_F     2.302585092994046F
 #define M_PI_F       3.141592653589793F
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index 782bfd2..6460755 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -57,7 +57,7 @@ OVERLOADABLE float native_tan(float x) {
 }
 OVERLOADABLE float native_exp2(float x) { return __gen_ocl_exp(x); }
 OVERLOADABLE float native_exp(float x) { return __gen_ocl_exp(M_LOG2E_F*x); }
-OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); }
+OVERLOADABLE float native_exp10(float x) { return __gen_ocl_exp(M_LOG210_F*x); 
}
 OVERLOADABLE float native_divide(float x, float y) { return x/y; }
 
 /* Fast path */
@@ -257,6 +257,7 @@ OVERLOADABLE float __gen_ocl_internal_log10(float x) {
  * is preserved.
  * ====================================================
  */
+
   union {float f; unsigned i; }u;
   const float
   zero       = 0.0,
@@ -1666,12 +1667,6 @@ OVERLOADABLE float __gen_ocl_internal_rint(float x) {
 }
 
 OVERLOADABLE float __gen_ocl_internal_exp(float x) {
-  //use native instruction when it has enough precision
-  if (x > -0x1.6p1 && x < 0x1.6p1)
-  {
-    return native_exp(x);
-  }
-
   float o_threshold = 8.8721679688e+01,  /* 0x42b17180 */
   u_threshold = -1.0397208405e+02,  /* 0xc2cff1b5 */
   twom100 = 7.8886090522e-31,   /* 2**-100=0x0d800000 */
@@ -3527,6 +3522,10 @@ OVERLOADABLE float log(float x) {
   if (__ocl_math_fastpath_flag)
     return __gen_ocl_internal_fastpath_log(x);
 
+  /* Use native/faster instruction when it has enough precision */
+  if(x > 0x1.1p0)
+    return __gen_ocl_internal_fastpath_log(x);
+
   return  __gen_ocl_internal_log(x);
 }
 
@@ -3534,6 +3533,10 @@ OVERLOADABLE float log2(float x) {
   if (__ocl_math_fastpath_flag)
     return __gen_ocl_internal_fastpath_log2(x);
 
+  /* Use native/faster instruction when it has enough precision */
+  if(x > 0x1.1p0)
+    return __gen_ocl_internal_fastpath_log2(x);
+
   return  __gen_ocl_internal_log2(x);
 }
 
@@ -3541,6 +3544,10 @@ OVERLOADABLE float log10(float x) {
   if (__ocl_math_fastpath_flag)
     return __gen_ocl_internal_fastpath_log10(x);
 
+  /* Use native/faster instruction when it has enough precision */
+  if(x > 0x1.1p0)
+    return __gen_ocl_internal_fastpath_log10(x);
+
   return  __gen_ocl_internal_log10(x);
 }
 
@@ -3548,10 +3555,15 @@ OVERLOADABLE float exp(float x) {
   if (__ocl_math_fastpath_flag)
     return __gen_ocl_internal_fastpath_exp(x);
 
+  /* Use native/faster instruction when it has enough precision */
+  if (x > -0x1.6p1 && x < 0x1.6p1)
+    return __gen_ocl_internal_fastpath_exp(x);
+
   return  __gen_ocl_internal_exp(x);
 }
 
 OVERLOADABLE float exp2(float x) {
+  /* Use native/faster instruction when it has enough precision, exp2 always */
   return native_exp2(x);
 }
 
@@ -3559,6 +3571,10 @@ OVERLOADABLE float exp10(float x) {
   if (__ocl_math_fastpath_flag)
     return __gen_ocl_internal_fastpath_exp10(x);
 
+  /* Use native/faster instruction when it has enough precision */
+  if((x < -0x1.4p+5) || (x > +0x1.4p+5))
+    return __gen_ocl_internal_fastpath_exp10(x);
+
   return  __gen_ocl_internal_exp10(x);
 }
 
-- 
2.5.0

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to