From: Grigore Lupescu <grigore.lupescu at intel.com> Optimization for exp10, log2, log and log10.
Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com> --- backend/src/libocl/include/ocl_float.h | 1 + backend/src/libocl/tmpl/ocl_math.tmpl.cl | 30 +++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/backend/src/libocl/include/ocl_float.h b/backend/src/libocl/include/ocl_float.h index e63eaf9..6be6c7c 100644 --- a/backend/src/libocl/include/ocl_float.h +++ b/backend/src/libocl/include/ocl_float.h @@ -81,6 +81,7 @@ INLINE_OVERLOADABLE int __ocl_finitef (float x){ #define M_E_F 2.718281828459045F #define M_LOG2E_F 1.4426950408889634F #define M_LOG10E_F 0.43429448190325176F +#define M_LOG210_F 3.3219280948873626F #define M_LN2_F 0.6931471805599453F #define M_LN10_F 2.302585092994046F #define M_PI_F 3.141592653589793F diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl index 782bfd2..6460755 100644 --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl @@ -57,7 +57,7 @@ OVERLOADABLE float native_tan(float x) { } OVERLOADABLE float native_exp2(float x) { return __gen_ocl_exp(x); } OVERLOADABLE float native_exp(float x) { return __gen_ocl_exp(M_LOG2E_F*x); } -OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); } +OVERLOADABLE float native_exp10(float x) { return __gen_ocl_exp(M_LOG210_F*x); } OVERLOADABLE float native_divide(float x, float y) { return x/y; } /* Fast path */ @@ -257,6 +257,7 @@ OVERLOADABLE float __gen_ocl_internal_log10(float x) { * is preserved. * ==================================================== */ + union {float f; unsigned i; }u; const float zero = 0.0, @@ -1666,12 +1667,6 @@ OVERLOADABLE float __gen_ocl_internal_rint(float x) { } OVERLOADABLE float __gen_ocl_internal_exp(float x) { - //use native instruction when it has enough precision - if (x > -0x1.6p1 && x < 0x1.6p1) - { - return native_exp(x); - } - float o_threshold = 8.8721679688e+01, /* 0x42b17180 */ u_threshold = -1.0397208405e+02, /* 0xc2cff1b5 */ twom100 = 7.8886090522e-31, /* 2**-100=0x0d800000 */ @@ -3527,6 +3522,10 @@ OVERLOADABLE float log(float x) { if (__ocl_math_fastpath_flag) return __gen_ocl_internal_fastpath_log(x); + /* Use native/faster instruction when it has enough precision */ + if(x > 0x1.1p0) + return __gen_ocl_internal_fastpath_log(x); + return __gen_ocl_internal_log(x); } @@ -3534,6 +3533,10 @@ OVERLOADABLE float log2(float x) { if (__ocl_math_fastpath_flag) return __gen_ocl_internal_fastpath_log2(x); + /* Use native/faster instruction when it has enough precision */ + if(x > 0x1.1p0) + return __gen_ocl_internal_fastpath_log2(x); + return __gen_ocl_internal_log2(x); } @@ -3541,6 +3544,10 @@ OVERLOADABLE float log10(float x) { if (__ocl_math_fastpath_flag) return __gen_ocl_internal_fastpath_log10(x); + /* Use native/faster instruction when it has enough precision */ + if(x > 0x1.1p0) + return __gen_ocl_internal_fastpath_log10(x); + return __gen_ocl_internal_log10(x); } @@ -3548,10 +3555,15 @@ OVERLOADABLE float exp(float x) { if (__ocl_math_fastpath_flag) return __gen_ocl_internal_fastpath_exp(x); + /* Use native/faster instruction when it has enough precision */ + if (x > -0x1.6p1 && x < 0x1.6p1) + return __gen_ocl_internal_fastpath_exp(x); + return __gen_ocl_internal_exp(x); } OVERLOADABLE float exp2(float x) { + /* Use native/faster instruction when it has enough precision, exp2 always */ return native_exp2(x); } @@ -3559,6 +3571,10 @@ OVERLOADABLE float exp10(float x) { if (__ocl_math_fastpath_flag) return __gen_ocl_internal_fastpath_exp10(x); + /* Use native/faster instruction when it has enough precision */ + if((x < -0x1.4p+5) || (x > +0x1.4p+5)) + return __gen_ocl_internal_fastpath_exp10(x); + return __gen_ocl_internal_exp10(x); } -- 2.5.0 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet