This patch LGTM, will push latter, thanks.
On Tue, Jan 27, 2015 at 11:39:21AM +0800, [email protected] wrote: > From: Luo Xionghu <[email protected]> > > the fbh style is inefficient. > > v2: use llvm.ctlz to call llvm intrinsic instead of beignet non-standard > intrinsic call style; remove the non-standard clz call path. > > Signed-off-by: Luo Xionghu <[email protected]> > --- > backend/src/libocl/CMakeLists.txt | 2 +- > backend/src/libocl/src/ocl_clz.ll | 44 ++++++++++++++++ > backend/src/libocl/tmpl/ocl_integer.tmpl.cl | 78 > +++++------------------------ > backend/src/libocl/tmpl/ocl_integer.tmpl.h | 9 ++++ > 4 files changed, 67 insertions(+), 66 deletions(-) > create mode 100644 backend/src/libocl/src/ocl_clz.ll > > diff --git a/backend/src/libocl/CMakeLists.txt > b/backend/src/libocl/CMakeLists.txt > index 314d373..16f00ee 100644 > --- a/backend/src/libocl/CMakeLists.txt > +++ b/backend/src/libocl/CMakeLists.txt > @@ -181,7 +181,7 @@ MACRO(ADD_LL_TO_BC_TARGET M) > ) > ENDMACRO(ADD_LL_TO_BC_TARGET) > > -SET (OCL_LL_MODULES ocl_barrier ocl_memcpy ocl_memset) > +SET (OCL_LL_MODULES ocl_barrier ocl_memcpy ocl_memset ocl_clz) > FOREACH(f ${OCL_LL_MODULES}) > COPY_THE_LL(${f}) > ADD_LL_TO_BC_TARGET(${f}) > diff --git a/backend/src/libocl/src/ocl_clz.ll > b/backend/src/libocl/src/ocl_clz.ll > new file mode 100644 > index 0000000..0863b6f > --- /dev/null > +++ b/backend/src/libocl/src/ocl_clz.ll > @@ -0,0 +1,44 @@ > +declare i8 @llvm.ctlz.i8(i8, i1) > +declare i16 @llvm.ctlz.i16(i16, i1) > +declare i32 @llvm.ctlz.i32(i32, i1) > +declare i64 @llvm.ctlz.i64(i64, i1) > + > +define i8 @clz_s8(i8 %x) nounwind readnone alwaysinline { > + %call = call i8 @llvm.ctlz.i8(i8 %x, i1 0) > + ret i8 %call > +} > + > +define i8 @clz_u8(i8 %x) nounwind readnone alwaysinline { > + %call = call i8 @llvm.ctlz.i8(i8 %x, i1 0) > + ret i8 %call > +} > + > +define i16 @clz_s16(i16 %x) nounwind readnone alwaysinline { > + %call = call i16 @llvm.ctlz.i16(i16 %x, i1 0) > + ret i16 %call > +} > + > +define i16 @clz_u16(i16 %x) nounwind readnone alwaysinline { > + %call = call i16 @llvm.ctlz.i16(i16 %x, i1 0) > + ret i16 %call > +} > + > +define i32 @clz_s32(i32 %x) nounwind readnone alwaysinline { > + %call = call i32 @llvm.ctlz.i32(i32 %x, i1 0) > + ret i32 %call > +} > + > +define i32 @clz_u32(i32 %x) nounwind readnone alwaysinline { > + %call = call i32 @llvm.ctlz.i32(i32 %x, i1 0) > + ret i32 %call > +} > + > +define i64 @clz_s64(i64 %x) nounwind readnone alwaysinline { > + %call = call i64 @llvm.ctlz.i64(i64 %x, i1 0) > + ret i64 %call > +} > + > +define i64 @clz_u64(i64 %x) nounwind readnone alwaysinline { > + %call = call i64 @llvm.ctlz.i64(i64 %x, i1 0) > + ret i64 %call > +} > diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl > b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl > index 6da0bab..a5e1dbc 100644 > --- a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl > +++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl > @@ -19,6 +19,8 @@ > > PURE CONST uint __gen_ocl_fbh(uint); > PURE CONST uint __gen_ocl_fbl(uint); > + > + > PURE CONST OVERLOADABLE uint __gen_ocl_cbit(uint); > PURE CONST OVERLOADABLE uint __gen_ocl_cbit(int); > PURE CONST OVERLOADABLE uint __gen_ocl_cbit(ushort); > @@ -26,71 +28,17 @@ PURE CONST OVERLOADABLE uint __gen_ocl_cbit(short); > PURE CONST OVERLOADABLE uint __gen_ocl_cbit(uchar); > PURE CONST OVERLOADABLE uint __gen_ocl_cbit(char); > > -OVERLOADABLE char clz(char x) { > - if (x < 0) > - return 0; > - if (x == 0) > - return 8; > - return __gen_ocl_fbh(x) - 24; > -} > - > -OVERLOADABLE uchar clz(uchar x) { > - if (x == 0) > - return 8; > - return __gen_ocl_fbh(x) - 24; > -} > - > -OVERLOADABLE short clz(short x) { > - if (x < 0) > - return 0; > - if (x == 0) > - return 16; > - return __gen_ocl_fbh(x) - 16; > -} > - > -OVERLOADABLE ushort clz(ushort x) { > - if (x == 0) > - return 16; > - return __gen_ocl_fbh(x) - 16; > -} > - > -OVERLOADABLE int clz(int x) { > - if (x < 0) > - return 0; > - if (x == 0) > - return 32; > - return __gen_ocl_fbh(x); > -} > - > -OVERLOADABLE uint clz(uint x) { > - if (x == 0) > - return 32; > - return __gen_ocl_fbh(x); > -} > - > -OVERLOADABLE long clz(long x) { > - union { int i[2]; long x; } u; > - u.x = x; > - if (u.i[1] & 0x80000000u) > - return 0; > - if (u.i[1] == 0 && u.i[0] == 0) > - return 64; > - uint v = clz(u.i[1]); > - if(v == 32) > - v += clz(u.i[0]); > - return v; > -} > - > -OVERLOADABLE ulong clz(ulong x) { > - if (x == 0) > - return 64; > - union { uint i[2]; ulong x; } u; > - u.x = x; > - uint v = clz(u.i[1]); > - if(v == 32) > - v += clz(u.i[0]); > - return v; > -} > +#define SDEF(TYPE, TYPE_NAME, SIZE) \ > +OVERLOADABLE TYPE clz(TYPE x){ return clz_##TYPE_NAME##SIZE(x);} > +SDEF(char, s, 8); > +SDEF(uchar, u, 8); > +SDEF(short, s, 16); > +SDEF(ushort, u, 16); > +SDEF(int, s, 32); > +SDEF(uint, u, 32); > +SDEF(long, s, 64); > +SDEF(ulong, u, 64); > +#undef SDEF > > #define SDEF(TYPE) \ > OVERLOADABLE TYPE popcount(TYPE x){ return __gen_ocl_cbit(x);} > diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.h > b/backend/src/libocl/tmpl/ocl_integer.tmpl.h > index f067b8d..4b3b5ae 100644 > --- a/backend/src/libocl/tmpl/ocl_integer.tmpl.h > +++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.h > @@ -45,6 +45,15 @@ OVERLOADABLE uint clz(uint x); > OVERLOADABLE long clz(long x); > OVERLOADABLE ulong clz(ulong x); > > +char clz_s8(char); > +uchar clz_u8(uchar); > +short clz_s16(short); > +ushort clz_u16(ushort); > +int clz_s32(int); > +uint clz_u32(uint); > +long clz_s64(long); > +ulong clz_u64(ulong); > + > OVERLOADABLE char popcount(char x); > OVERLOADABLE uchar popcount(uchar x); > OVERLOADABLE short popcount(short x); > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
