Convert input to float and convert float to input type again, as c. Compare the input and c, if not match the rtz/rtp/rtn require, +/- 1 ULP.
Signed-off-by: Yang Rong <rong.r.y...@intel.com> --- backend/src/gen_convert.sh | 154 ++++++++++++++++++++++++++++++++++++++++ backend/src/ocl_convert.h | 172 +++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 314 insertions(+), 12 deletions(-) diff --git a/backend/src/gen_convert.sh b/backend/src/gen_convert.sh index 793ed5b..0aafc3f 100755 --- a/backend/src/gen_convert.sh +++ b/backend/src/gen_convert.sh @@ -230,6 +230,154 @@ float __gen_ocl_rndz(float x); float __gen_ocl_rnde(float x); float __gen_ocl_rndu(float x); float __gen_ocl_rndd(float x); +INLINE_OVERLOADABLE float __convert_float_rtz(long x) +{ + union { + uint u; + float f; + } u; + u.f = x; + long l = u.f; + if((l > x && x > 0) || x >= 0x7fffffc000000000 || + (l < x && x < 0)) { + u.u -= 1; + } + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtp(long x) +{ + union { + uint u; + float f; + } u; + u.f = x; + long l = u.f; //can not use u.f < x + if(l < x && x < 0x7fffffc000000000) { + if(x > 0) + u.u = u.u + 1; + else + u.u = u.u - 1; + } + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtn(long x) +{ + union { + uint u; + float f; + } u; + u.f = x; + long l = u.f; //avoid overflow + if(l > x || x >= 0x7fffffc000000000) { + if(x > 0) + u.u = u.u - 1; + else + u.u = u.u + 1; + } + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtz(ulong x) +{ + union { + uint u; + float f; + } u; + u.f = x; + ulong l = u.f; + if(l > x || x >= 0xffffff8000000000) + u.u -= 1; + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtp(ulong x) +{ + union { + uint u; + float f; + } u; + u.f = x; + ulong l = u.f; //can not use u.f < x + if(l < x && x < 0xffffff8000000000) + u.u = u.u + 1; + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtn(ulong x) +{ + return __convert_float_rtz(x); +} +INLINE_OVERLOADABLE float __convert_float_rtz(int x) +{ + union { + uint u; + float f; + } u; + u.f = x; + long i = u.f; + if((i > x && x > 0) || + (i < x && x < 0)) { + u.u -= 1; + } + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtp(int x) +{ + union { + uint u; + float f; + } u; + u.f = x; + int i = u.f; + if(i < x) { + if(x > 0) + u.u += 1; + else + u.u -= 1; + } + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtn(int x) +{ + union { + uint u; + float f; + } u; + u.f = x; + long i = u.f; //avoid overflow + if(i > x) { + if(x > 0) + u.u = u.u - 1; + else + u.u = u.u + 1; + } + return u.f; +} +INLINE_OVERLOADABLE long __convert_float_rtz(uint x) +{ + union { + uint u; + float f; + } u; + u.f = x; + ulong i = u.f; + if(i > x) + u.u -= 1; + return u.f; +} +INLINE_OVERLOADABLE long __convert_float_rtp(uint x) +{ + union { + uint u; + float f; + } u; + u.f = x; + uint i = u.f; + if(i < x) + u.u += 1; + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtn(uint x) +{ + return __convert_float_rtz(x); +} ' # convert_DSTTYPE_ROUNDING function @@ -253,6 +401,8 @@ for vector_length in $VECTOR_LENGTHS; do echo "INLINE_OVERLOADABLE $tbasetype convert_${tbasetype}_rtz($fbasetype x)" if test $fbasetype = "float" -a $tbasetype != "float"; then echo "{ return __gen_ocl_rndz(x); }" + elif [ "$fbasetype" = "int" -o "$fbasetype" = "uint" -o "$fbasetype" = "long" -o "$fbasetype" = "ulong" ] && [ "$tbasetype" = "float" ]; then + echo "{ return __convert_${tbasetype}_rtz(x); }" else echo "{ return x; }" fi @@ -260,6 +410,8 @@ for vector_length in $VECTOR_LENGTHS; do echo "INLINE_OVERLOADABLE $tbasetype convert_${tbasetype}_rtp($fbasetype x)" if test $fbasetype = "float" -a $tbasetype != "float"; then echo "{ return __gen_ocl_rndu(x); }" + elif [ "$fbasetype" = "int" -o "$fbasetype" = "uint" -o "$fbasetype" = "long" -o "$fbasetype" = "ulong" ] && [ "$tbasetype" = "float" ]; then + echo "{ return __convert_${tbasetype}_rtp(x); }" else echo "{ return x; }" fi @@ -267,6 +419,8 @@ for vector_length in $VECTOR_LENGTHS; do echo "INLINE_OVERLOADABLE $tbasetype convert_${tbasetype}_rtn($fbasetype x)" if test $fbasetype = "float" -a $tbasetype != "float"; then echo "{ return __gen_ocl_rndd(x); }" + elif [ "$fbasetype" = "int" -o "$fbasetype" = "uint" -o "$fbasetype" = "long" -o "$fbasetype" = "ulong" ] && [ "$tbasetype" = "float" ]; then + echo "{ return __convert_${tbasetype}_rtn(x); }" else echo "{ return x; }" fi diff --git a/backend/src/ocl_convert.h b/backend/src/ocl_convert.h index e37cecb..9ef8bd0 100644 --- a/backend/src/ocl_convert.h +++ b/backend/src/ocl_convert.h @@ -3800,6 +3800,154 @@ float __gen_ocl_rndz(float x); float __gen_ocl_rnde(float x); float __gen_ocl_rndu(float x); float __gen_ocl_rndd(float x); +INLINE_OVERLOADABLE float __convert_float_rtz(long x) +{ + union { + uint u; + float f; + } u; + u.f = x; + long l = u.f; + if((l > x && x > 0) || x >= 0x7fffffc000000000 || + (l < x && x < 0)) { + u.u -= 1; + } + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtp(long x) +{ + union { + uint u; + float f; + } u; + u.f = x; + long l = u.f; //can not use u.f < x + if(l < x && x < 0x7fffffc000000000) { + if(x > 0) + u.u = u.u + 1; + else + u.u = u.u - 1; + } + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtn(long x) +{ + union { + uint u; + float f; + } u; + u.f = x; + long l = u.f; //avoid overflow + if(l > x || x >= 0x7fffffc000000000) { + if(x > 0) + u.u = u.u - 1; + else + u.u = u.u + 1; + } + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtz(ulong x) +{ + union { + uint u; + float f; + } u; + u.f = x; + ulong l = u.f; + if(l > x || x >= 0xffffff8000000000) + u.u -= 1; + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtp(ulong x) +{ + union { + uint u; + float f; + } u; + u.f = x; + ulong l = u.f; //can not use u.f < x + if(l < x && x < 0xffffff8000000000) + u.u = u.u + 1; + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtn(ulong x) +{ + return __convert_float_rtz(x); +} +INLINE_OVERLOADABLE float __convert_float_rtz(int x) +{ + union { + uint u; + float f; + } u; + u.f = x; + long i = u.f; + if((i > x && x > 0) || + (i < x && x < 0)) { + u.u -= 1; + } + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtp(int x) +{ + union { + uint u; + float f; + } u; + u.f = x; + int i = u.f; + if(i < x) { + if(x > 0) + u.u += 1; + else + u.u -= 1; + } + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtn(int x) +{ + union { + uint u; + float f; + } u; + u.f = x; + long i = u.f; //avoid overflow + if(i > x) { + if(x > 0) + u.u = u.u - 1; + else + u.u = u.u + 1; + } + return u.f; +} +INLINE_OVERLOADABLE long __convert_float_rtz(uint x) +{ + union { + uint u; + float f; + } u; + u.f = x; + ulong i = u.f; + if(i > x) + u.u -= 1; + return u.f; +} +INLINE_OVERLOADABLE long __convert_float_rtp(uint x) +{ + union { + uint u; + float f; + } u; + u.f = x; + uint i = u.f; + if(i < x) + u.u += 1; + return u.f; +} +INLINE_OVERLOADABLE float __convert_float_rtn(uint x) +{ + return __convert_float_rtz(x); +} INLINE_OVERLOADABLE long convert_long_rte(long x) { return x; } @@ -3868,11 +4016,11 @@ INLINE_OVERLOADABLE uchar convert_uchar_rtn(long x) INLINE_OVERLOADABLE float convert_float_rte(long x) { return x; } INLINE_OVERLOADABLE float convert_float_rtz(long x) -{ return x; } +{ return __convert_float_rtz(x); } INLINE_OVERLOADABLE float convert_float_rtp(long x) -{ return x; } +{ return __convert_float_rtp(x); } INLINE_OVERLOADABLE float convert_float_rtn(long x) -{ return x; } +{ return __convert_float_rtn(x); } INLINE_OVERLOADABLE long convert_long_rte(ulong x) { return x; } INLINE_OVERLOADABLE long convert_long_rtz(ulong x) @@ -3940,11 +4088,11 @@ INLINE_OVERLOADABLE uchar convert_uchar_rtn(ulong x) INLINE_OVERLOADABLE float convert_float_rte(ulong x) { return x; } INLINE_OVERLOADABLE float convert_float_rtz(ulong x) -{ return x; } +{ return __convert_float_rtz(x); } INLINE_OVERLOADABLE float convert_float_rtp(ulong x) -{ return x; } +{ return __convert_float_rtp(x); } INLINE_OVERLOADABLE float convert_float_rtn(ulong x) -{ return x; } +{ return __convert_float_rtn(x); } INLINE_OVERLOADABLE long convert_long_rte(int x) { return x; } INLINE_OVERLOADABLE long convert_long_rtz(int x) @@ -4012,11 +4160,11 @@ INLINE_OVERLOADABLE uchar convert_uchar_rtn(int x) INLINE_OVERLOADABLE float convert_float_rte(int x) { return x; } INLINE_OVERLOADABLE float convert_float_rtz(int x) -{ return x; } +{ return __convert_float_rtz(x); } INLINE_OVERLOADABLE float convert_float_rtp(int x) -{ return x; } +{ return __convert_float_rtp(x); } INLINE_OVERLOADABLE float convert_float_rtn(int x) -{ return x; } +{ return __convert_float_rtn(x); } INLINE_OVERLOADABLE long convert_long_rte(uint x) { return x; } INLINE_OVERLOADABLE long convert_long_rtz(uint x) @@ -4084,11 +4232,11 @@ INLINE_OVERLOADABLE uchar convert_uchar_rtn(uint x) INLINE_OVERLOADABLE float convert_float_rte(uint x) { return x; } INLINE_OVERLOADABLE float convert_float_rtz(uint x) -{ return x; } +{ return __convert_float_rtz(x); } INLINE_OVERLOADABLE float convert_float_rtp(uint x) -{ return x; } +{ return __convert_float_rtp(x); } INLINE_OVERLOADABLE float convert_float_rtn(uint x) -{ return x; } +{ return __convert_float_rtn(x); } INLINE_OVERLOADABLE long convert_long_rte(short x) { return x; } INLINE_OVERLOADABLE long convert_long_rtz(short x) -- 1.8.1.2 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet