Pushed, thanks.
On Wed, Oct 09, 2013 at 03:48:46PM +0800, Homer Hsing wrote: > This patch supports saturated conversion of > native GPU data type (char/short/int/float), > from a larger-range data type to a narrower-range data type. > For instance, convert_uchar_sat(int) > > Several test cases are in this patch. > > Signed-off-by: Homer Hsing <homer.x...@intel.com> > --- > backend/src/backend/gen_insn_selection.cpp | 13 +++++- > backend/src/ir/instruction.cpp | 10 ++++- > backend/src/ir/instruction.hpp | 2 + > backend/src/ir/instruction.hxx | 1 + > backend/src/llvm/llvm_gen_backend.cpp | 69 > ++++++++++++++++++++++++++++++ > backend/src/llvm/llvm_gen_ocl_function.hxx | 28 ++++++++++++ > backend/src/ocl_stdlib.tmpl.h | 28 ++++++++++-- > kernels/builtin_convert_sat.cl | 30 +++++++++++++ > utests/CMakeLists.txt | 1 + > utests/builtin_convert_sat.cpp | 69 > ++++++++++++++++++++++++++++++ > 10 files changed, 244 insertions(+), 7 deletions(-) > create mode 100644 kernels/builtin_convert_sat.cl > create mode 100644 utests/builtin_convert_sat.cpp > > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index bd52885..cddd76e 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -2538,15 +2538,20 @@ namespace gbe > const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > > + if(insn.getOpcode() == ir::OP_SAT_CVT) { > + sel.push(); > + sel.curr.saturate = 1; > + } > + > // We need two instructions to make the conversion > if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && > (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) { > GenRegister unpacked; > if (dstFamily == FAMILY_WORD) { > - const uint32_t type = TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W; > + const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : > GEN_TYPE_W; > unpacked = GenRegister::unpacked_uw(sel.reg(FAMILY_DWORD)); > unpacked = GenRegister::retype(unpacked, type); > } else { > - const uint32_t type = TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B; > + const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : > GEN_TYPE_B; > unpacked = GenRegister::unpacked_ub(sel.reg(FAMILY_DWORD)); > unpacked = GenRegister::retype(unpacked, type); > } > @@ -2581,6 +2586,10 @@ namespace gbe > } > } else > sel.MOV(dst, src); > + > + if(insn.getOpcode() == ir::OP_SAT_CVT) > + sel.pop(); > + > return true; > } > DECL_CTOR(ConvertInstruction, 1, 1); > diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp > index a973082..9e5b6f6 100644 > --- a/backend/src/ir/instruction.cpp > +++ b/backend/src/ir/instruction.cpp > @@ -252,9 +252,10 @@ namespace ir { > ConvertInstruction(Type dstType, > Type srcType, > Register dst, > - Register src) > + Register src, > + bool saturated=false) > { > - this->opcode = OP_CVT; > + this->opcode = saturated ? OP_SAT_CVT : OP_CVT; > this->dst[0] = dst; > this->src[0] = src; > this->dstType = dstType; > @@ -1469,6 +1470,11 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, > getInfoType(void), getInfoType()) > return internal::ConvertInstruction(dstType, srcType, dst, > src).convert(); > } > > + // saturated convert > + Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register > src) { > + return internal::ConvertInstruction(dstType, srcType, dst, src, > true).convert(); > + } > + > // For all unary functions with given opcode > Instruction ATOMIC(AtomicOps atomicOp, Register dst, AddressSpace space, > Tuple src) { > return internal::AtomicInstruction(atomicOp, dst, space, src).convert(); > diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp > index 27a34d1..90c819b 100644 > --- a/backend/src/ir/instruction.hpp > +++ b/backend/src/ir/instruction.hpp > @@ -623,6 +623,8 @@ namespace ir { > Instruction GT(Type type, Register dst, Register src0, Register src1); > /*! cvt.{dstType <- srcType} dst src */ > Instruction CVT(Type dstType, Type srcType, Register dst, Register src); > + /*! sat_cvt.{dstType <- srcType} dst src */ > + Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register > src); > /*! atomic dst addr.space {src1 {src2}} */ > Instruction ATOMIC(AtomicOps opcode, Register dst, AddressSpace space, > Tuple src); > /*! bra labelIndex */ > diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx > index 1a9f867..cd60349 100644 > --- a/backend/src/ir/instruction.hxx > +++ b/backend/src/ir/instruction.hxx > @@ -61,6 +61,7 @@ DECL_INSN(LT, CompareInstruction) > DECL_INSN(GE, CompareInstruction) > DECL_INSN(GT, CompareInstruction) > DECL_INSN(CVT, ConvertInstruction) > +DECL_INSN(SAT_CVT, ConvertInstruction) > DECL_INSN(ATOMIC, AtomicInstruction) > DECL_INSN(BRA, BranchInstruction) > DECL_INSN(RET, BranchInstruction) > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index 3fe6085..493d152 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -1910,6 +1910,28 @@ namespace gbe > case GEN_OCL_I64RHADD: > case GEN_OCL_I64_MAD_SAT: > case GEN_OCL_I64_MAD_SATU: > + case GEN_OCL_SAT_CONV_U8_TO_I8: > + case GEN_OCL_SAT_CONV_I16_TO_I8: > + case GEN_OCL_SAT_CONV_U16_TO_I8: > + case GEN_OCL_SAT_CONV_I32_TO_I8: > + case GEN_OCL_SAT_CONV_U32_TO_I8: > + case GEN_OCL_SAT_CONV_F32_TO_I8: > + case GEN_OCL_SAT_CONV_I8_TO_U8: > + case GEN_OCL_SAT_CONV_I16_TO_U8: > + case GEN_OCL_SAT_CONV_U16_TO_U8: > + case GEN_OCL_SAT_CONV_I32_TO_U8: > + case GEN_OCL_SAT_CONV_U32_TO_U8: > + case GEN_OCL_SAT_CONV_F32_TO_U8: > + case GEN_OCL_SAT_CONV_U16_TO_I16: > + case GEN_OCL_SAT_CONV_I32_TO_I16: > + case GEN_OCL_SAT_CONV_U32_TO_I16: > + case GEN_OCL_SAT_CONV_F32_TO_I16: > + case GEN_OCL_SAT_CONV_I16_TO_U16: > + case GEN_OCL_SAT_CONV_I32_TO_U16: > + case GEN_OCL_SAT_CONV_U32_TO_U16: > + case GEN_OCL_SAT_CONV_F32_TO_U16: > + case GEN_OCL_SAT_CONV_F32_TO_I32: > + case GEN_OCL_SAT_CONV_F32_TO_U32: > this->newRegister(&I); > break; > default: > @@ -2415,6 +2437,53 @@ namespace gbe > ctx.I64RHADD(ir::TYPE_U64, dst, src0, src1); > break; > } > +#define DEF(DST_TYPE, SRC_TYPE) \ > + { ctx.SAT_CVT(DST_TYPE, SRC_TYPE, getRegister(&I), > getRegister(I.getOperand(0))); break; } > + case GEN_OCL_SAT_CONV_U8_TO_I8: > + DEF(ir::TYPE_S8, ir::TYPE_U8); > + case GEN_OCL_SAT_CONV_I16_TO_I8: > + DEF(ir::TYPE_S8, ir::TYPE_S16); > + case GEN_OCL_SAT_CONV_U16_TO_I8: > + DEF(ir::TYPE_S8, ir::TYPE_U16); > + case GEN_OCL_SAT_CONV_I32_TO_I8: > + DEF(ir::TYPE_S8, ir::TYPE_S32); > + case GEN_OCL_SAT_CONV_U32_TO_I8: > + DEF(ir::TYPE_S8, ir::TYPE_U32); > + case GEN_OCL_SAT_CONV_F32_TO_I8: > + DEF(ir::TYPE_S8, ir::TYPE_FLOAT); > + case GEN_OCL_SAT_CONV_I8_TO_U8: > + DEF(ir::TYPE_U8, ir::TYPE_S8); > + case GEN_OCL_SAT_CONV_I16_TO_U8: > + DEF(ir::TYPE_U8, ir::TYPE_S16); > + case GEN_OCL_SAT_CONV_U16_TO_U8: > + DEF(ir::TYPE_U8, ir::TYPE_U16); > + case GEN_OCL_SAT_CONV_I32_TO_U8: > + DEF(ir::TYPE_U8, ir::TYPE_S32); > + case GEN_OCL_SAT_CONV_U32_TO_U8: > + DEF(ir::TYPE_U8, ir::TYPE_U32); > + case GEN_OCL_SAT_CONV_F32_TO_U8: > + DEF(ir::TYPE_U8, ir::TYPE_FLOAT); > + case GEN_OCL_SAT_CONV_U16_TO_I16: > + DEF(ir::TYPE_S16, ir::TYPE_U16); > + case GEN_OCL_SAT_CONV_I32_TO_I16: > + DEF(ir::TYPE_S16, ir::TYPE_S32); > + case GEN_OCL_SAT_CONV_U32_TO_I16: > + DEF(ir::TYPE_S16, ir::TYPE_U32); > + case GEN_OCL_SAT_CONV_F32_TO_I16: > + DEF(ir::TYPE_S16, ir::TYPE_FLOAT); > + case GEN_OCL_SAT_CONV_I16_TO_U16: > + DEF(ir::TYPE_U16, ir::TYPE_S16); > + case GEN_OCL_SAT_CONV_I32_TO_U16: > + DEF(ir::TYPE_U16, ir::TYPE_S32); > + case GEN_OCL_SAT_CONV_U32_TO_U16: > + DEF(ir::TYPE_U16, ir::TYPE_U32); > + case GEN_OCL_SAT_CONV_F32_TO_U16: > + DEF(ir::TYPE_U16, ir::TYPE_FLOAT); > + case GEN_OCL_SAT_CONV_F32_TO_I32: > + DEF(ir::TYPE_S32, ir::TYPE_FLOAT); > + case GEN_OCL_SAT_CONV_F32_TO_U32: > + DEF(ir::TYPE_U32, ir::TYPE_FLOAT); > +#undef DEF > default: break; > } > } > diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx > b/backend/src/llvm/llvm_gen_ocl_function.hxx > index 321fc4e..00dc8ab 100644 > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx > @@ -146,3 +146,31 @@ DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, > _Z18__gen_ocl_upsamplell) > > // get sampler info > DECL_LLVM_GEN_FUNCTION(GET_SAMPLER_INFO, __gen_ocl_get_sampler_info) > + > +// saturate convert > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U8_TO_I8, _Z16convert_char_sath) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_I8, _Z16convert_char_sats) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_I8, _Z16convert_char_satt) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_I8, _Z16convert_char_sati) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I8, _Z16convert_char_satj) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I8, _Z16convert_char_satf) > + > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I8_TO_U8, _Z17convert_uchar_satc) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_U8, _Z17convert_uchar_sats) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_U8, _Z17convert_uchar_satt) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U8, _Z17convert_uchar_sati) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_U8, _Z17convert_uchar_satj) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U8, _Z17convert_uchar_satf) > + > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_I16, _Z17convert_short_satt) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_I16, _Z17convert_short_sati) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I16, _Z17convert_short_satj) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I16, _Z17convert_short_satf) > + > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_U16, _Z18convert_ushort_sats) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U16, _Z18convert_ushort_sati) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_U16, _Z18convert_ushort_satj) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U16, _Z18convert_ushort_satf) > + > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I32, _Z15convert_int_satf) > +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf) > diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h > index 170ec70..8dfea09 100644 > --- a/backend/src/ocl_stdlib.tmpl.h > +++ b/backend/src/ocl_stdlib.tmpl.h > @@ -218,9 +218,31 @@ UDEF(uint); > UDEF(ulong); > #undef UDEF > > -uchar INLINE_OVERLOADABLE convert_uchar_sat(float x) { > - return add_sat((uchar)x, (uchar)0); > -} > +#define DEF(DSTTYPE, SRCTYPE) \ > + OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x); > +DEF(char, uchar); > +DEF(char, short); > +DEF(char, ushort); > +DEF(char, int); > +DEF(char, uint); > +DEF(char, float); > +DEF(uchar, char); > +DEF(uchar, short); > +DEF(uchar, ushort); > +DEF(uchar, int); > +DEF(uchar, uint); > +DEF(uchar, float); > +DEF(short, ushort); > +DEF(short, int); > +DEF(short, uint); > +DEF(short, float); > +DEF(ushort, short); > +DEF(ushort, int); > +DEF(ushort, uint); > +DEF(ushort, float); > +DEF(int, float); > +DEF(uint, float); > +#undef DEF > > INLINE_OVERLOADABLE int isfinite(float x) { return __builtin_isfinite(x); } > INLINE_OVERLOADABLE int isinf(float x) { return __builtin_isinf(x); } > diff --git a/kernels/builtin_convert_sat.cl b/kernels/builtin_convert_sat.cl > new file mode 100644 > index 0000000..18d88ab > --- /dev/null > +++ b/kernels/builtin_convert_sat.cl > @@ -0,0 +1,30 @@ > +#define DEF(DSTTYPE, SRCTYPE) \ > + kernel void builtin_convert_ ## SRCTYPE ## _to_ ## DSTTYPE ## _sat(global > SRCTYPE *src, global DSTTYPE *dst) { \ > + int i = get_global_id(0); \ > + dst[i] = convert_ ## DSTTYPE ## _sat(src[i]); \ > +} > + > +DEF(char, uchar); > +DEF(char, short); > +DEF(char, ushort); > +DEF(char, int); > +DEF(char, uint); > +DEF(char, float); > +DEF(uchar, char); > +DEF(uchar, short); > +DEF(uchar, ushort); > +DEF(uchar, int); > +DEF(uchar, uint); > +DEF(uchar, float); > +DEF(short, ushort); > +DEF(short, int); > +DEF(short, uint); > +DEF(short, float); > +DEF(ushort, short); > +DEF(ushort, int); > +DEF(ushort, uint); > +DEF(ushort, float); > +DEF(int, float); > +DEF(uint, float); > +#undef DEF > + > diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt > index 9b93993..72bff84 100644 > --- a/utests/CMakeLists.txt > +++ b/utests/CMakeLists.txt > @@ -126,6 +126,7 @@ set (utests_sources > builtin_num_groups.cpp > builtin_local_id.cpp > builtin_acos_asin.cpp > + builtin_convert_sat.cpp > runtime_createcontext.cpp > runtime_null_kernel_arg.cpp > runtime_event.cpp > diff --git a/utests/builtin_convert_sat.cpp b/utests/builtin_convert_sat.cpp > new file mode 100644 > index 0000000..0bf561c > --- /dev/null > +++ b/utests/builtin_convert_sat.cpp > @@ -0,0 +1,69 @@ > +#include <cstdint> > +#include "utest_helper.hpp" > + > +typedef unsigned char uchar; > +typedef unsigned short ushort; > + > +int64_t my_rand(void) { > + int64_t x = rand() - RAND_MAX/2; > + int64_t y = rand() - RAND_MAX/2; > + return x * y; > +} > + > +#define DEF(DST_TYPE, SRC_TYPE, DST_MIN, DST_MAX) \ > +void builtin_convert_ ## SRC_TYPE ## _to_ ## DST_TYPE ## _sat(void) \ > +{ \ > + const int n = 128; \ > + OCL_CREATE_KERNEL_FROM_FILE("builtin_convert_sat", "builtin_convert_" # > SRC_TYPE "_to_" # DST_TYPE "_sat"); \ > + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(SRC_TYPE), NULL); \ > + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(DST_TYPE), NULL); \ > + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); \ > + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \ > + globals[0] = n; \ > + locals[0] = 16; \ > + OCL_MAP_BUFFER(0); \ > + for (int i = 0; i < n; i++) \ > + ((SRC_TYPE *)buf_data[0])[i] = my_rand(); \ > + OCL_UNMAP_BUFFER(0); \ > + OCL_NDRANGE(1); \ > + OCL_MAP_BUFFER(0); \ > + OCL_MAP_BUFFER(1); \ > + for (int i = 0; i < n; i++) { \ > + SRC_TYPE src = ((SRC_TYPE *)buf_data[0])[i]; \ > + DST_TYPE dst; \ > + if (src > DST_MAX) \ > + dst = DST_MAX; \ > + else if (src < DST_MIN) \ > + dst = DST_MIN; \ > + else \ > + dst = src; \ > + OCL_ASSERT(((DST_TYPE *)buf_data[1])[i] == dst); \ > + } \ > + OCL_UNMAP_BUFFER(0); \ > + OCL_UNMAP_BUFFER(1); \ > +} \ > +MAKE_UTEST_FROM_FUNCTION(builtin_convert_ ## SRC_TYPE ## _to_ ## DST_TYPE ## > _sat); > + > +DEF(char, uchar, -128, 127); > +DEF(char, short, -128, 127); > +DEF(char, ushort, -128, 127); > +DEF(char, int, -128, 127); > +DEF(char, uint, -128, 127); > +DEF(char, float, -128, 127); > +DEF(uchar, char, 0, 255); > +DEF(uchar, short, 0, 255); > +DEF(uchar, ushort, 0, 255); > +DEF(uchar, int, 0, 255); > +DEF(uchar, uint, 0, 255); > +DEF(uchar, float, 0, 255); > +DEF(short, ushort, -32768, 32767); > +DEF(short, int, -32768, 32767); > +DEF(short, uint, -32768, 32767); > +DEF(short, float, -32768, 32767); > +DEF(ushort, short, 0, 65535); > +DEF(ushort, int, 0, 65535); > +DEF(ushort, uint, 0, 65535); > +DEF(ushort, float, 0, 65535); > +DEF(int, float, -0x7FFFFFFF-1, 0x7FFFFFFF); > +DEF(uint, float, 0, 0xffffffffu); > +#undef DEF > -- > 1.8.1.2 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet