Good catch. The patch LGTM, will push latter.
I just checked the ocl_stdlib.tmpl.h. There should be some other builtin
function which should be
refined the same way. At least, the exp() and exp10() should be optimized.
The exp() is used in luxmark's advancepath
kernel. That should bring some benefit for luxmark's performance under
strict conformance condition.

> -----Original Message-----
> From: Beignet [mailto:[email protected]] On Behalf Of
> Ruiling Song
> Sent: Monday, May 19, 2014 4:43 PM
> To: [email protected]
> Cc: Ruiling Song
> Subject: [Beignet] [PATCH] GBE: optimize builtin atan2.
> 
> clang will generate extra stores for the implementation.
> So, put the data in __constant address space.
> This will improve opencv test PhaseFixture_Phase by 3x.
> 
> Signed-off-by: Ruiling Song <[email protected]>
> ---
>  backend/src/ocl_stdlib.tmpl.h |   25 +++++++++++++------------
>  1 file changed, 13 insertions(+), 12 deletions(-)
> 
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index
> cd8b918..01bb337 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -2386,20 +2386,21 @@ INLINE_OVERLOADABLE float
> __gen_ocl_internal_acos(float x) {  INLINE_OVERLOADABLE float
> __gen_ocl_internal_acospi(float x) {
>    return __gen_ocl_internal_acos(x) / M_PI_F;  }
> +__constant float atanhi[4] = {
> +  4.6364760399e-01, /* atan(0.5)hi 0x3eed6338 */
> +  7.8539812565e-01, /* atan(1.0)hi 0x3f490fda */
> +  9.8279368877e-01, /* atan(1.5)hi 0x3f7b985e */
> +  1.5707962513e+00, /* atan(inf)hi 0x3fc90fda */ }; __constant float
> +atanlo[4] = {
> +  5.0121582440e-09, /* atan(0.5)lo 0x31ac3769 */
> +  3.7748947079e-08, /* atan(1.0)lo 0x33222168 */
> +  3.4473217170e-08, /* atan(1.5)lo 0x33140fb4 */
> +  7.5497894159e-08, /* atan(inf)lo 0x33a22168 */ };
> +
>  INLINE_OVERLOADABLE float __gen_ocl_internal_atan(float x) {
>    /* copied from fdlibm */
> -  float atanhi[4];
> -  atanhi[0] = 4.6364760399e-01; /* atan(0.5)hi 0x3eed6338 */
> -  atanhi[1] = 7.8539812565e-01; /* atan(1.0)hi 0x3f490fda */
> -  atanhi[2] = 9.8279368877e-01; /* atan(1.5)hi 0x3f7b985e */
> -  atanhi[3] = 1.5707962513e+00; /* atan(inf)hi 0x3fc90fda */
> -
> -  float atanlo[4];
> -  atanlo[0] = 5.0121582440e-09; /* atan(0.5)lo 0x31ac3769 */
> -  atanlo[1] =  3.7748947079e-08; /* atan(1.0)lo 0x33222168 */
> -  atanlo[2] =  3.4473217170e-08; /* atan(1.5)lo 0x33140fb4 */
> -  atanlo[3] =  7.5497894159e-08; /* atan(inf)lo 0x33a22168 */
> -
>    float aT[11];
>    aT[0] = 3.3333334327e-01; /* 0x3eaaaaaa */
>    aT[1] =  -2.0000000298e-01; /* 0xbe4ccccd */
> --
> 1.7.10.4
> 
> _______________________________________________
> Beignet mailing list
> [email protected]
> http://lists.freedesktop.org/mailman/listinfo/beignet

_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to