One hour ago, I had sent out version 2. http://lists.freedesktop.org/archives/beignet/2013-October/001694.html Warnings was fixed in version 2.
-----Original Message----- From: Yang, Rong R Sent: Thursday, October 10, 2013 11:10 AM To: Xing, Homer; beignet@lists.freedesktop.org Subject: RE: [Beignet] [PATCH] saturated conversion of native GPU data type, larger to narrower LGTM, test pass, but there are warnings: /home/champson/source/beignet/utests/builtin_convert_sat.cpp:51:1: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] -----Original Message----- From: beignet-bounces+rong.r.yang=intel....@lists.freedesktop.org [mailto:beignet-bounces+rong.r.yang=intel....@lists.freedesktop.org] On Behalf Of Homer Hsing Sent: Wednesday, October 09, 2013 3:49 PM To: beignet@lists.freedesktop.org Subject: [Beignet] [PATCH] saturated conversion of native GPU data type, larger to narrower This patch supports saturated conversion of native GPU data type (char/short/int/float), from a larger-range data type to a narrower-range data type. For instance, convert_uchar_sat(int) Several test cases are in this patch. Signed-off-by: Homer Hsing <homer.x...@intel.com> --- backend/src/backend/gen_insn_selection.cpp | 13 +++++- backend/src/ir/instruction.cpp | 10 ++++- backend/src/ir/instruction.hpp | 2 + backend/src/ir/instruction.hxx | 1 + backend/src/llvm/llvm_gen_backend.cpp | 69 ++++++++++++++++++++++++++++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 28 ++++++++++++ backend/src/ocl_stdlib.tmpl.h | 28 ++++++++++-- kernels/builtin_convert_sat.cl | 30 +++++++++++++ utests/CMakeLists.txt | 1 + utests/builtin_convert_sat.cpp | 69 ++++++++++++++++++++++++++++++ 10 files changed, 244 insertions(+), 7 deletions(-) create mode 100644 kernels/builtin_convert_sat.cl create mode 100644 utests/builtin_convert_sat.cpp diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index bd52885..cddd76e 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -2538,15 +2538,20 @@ namespace gbe const GenRegister dst = sel.selReg(insn.getDst(0), dstType); const GenRegister src = sel.selReg(insn.getSrc(0), srcType); + if(insn.getOpcode() == ir::OP_SAT_CVT) { + sel.push(); + sel.curr.saturate = 1; + } + // We need two instructions to make the conversion if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) { GenRegister unpacked; if (dstFamily == FAMILY_WORD) { - const uint32_t type = TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W; + const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : + GEN_TYPE_W; unpacked = GenRegister::unpacked_uw(sel.reg(FAMILY_DWORD)); unpacked = GenRegister::retype(unpacked, type); } else { - const uint32_t type = TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B; + const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : + GEN_TYPE_B; unpacked = GenRegister::unpacked_ub(sel.reg(FAMILY_DWORD)); unpacked = GenRegister::retype(unpacked, type); } @@ -2581,6 +2586,10 @@ namespace gbe } } else sel.MOV(dst, src); + + if(insn.getOpcode() == ir::OP_SAT_CVT) + sel.pop(); + return true; } DECL_CTOR(ConvertInstruction, 1, 1); diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index a973082..9e5b6f6 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -252,9 +252,10 @@ namespace ir { ConvertInstruction(Type dstType, Type srcType, Register dst, - Register src) + Register src, + bool saturated=false) { - this->opcode = OP_CVT; + this->opcode = saturated ? OP_SAT_CVT : OP_CVT; this->dst[0] = dst; this->src[0] = src; this->dstType = dstType; @@ -1469,6 +1470,11 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) return internal::ConvertInstruction(dstType, srcType, dst, src).convert(); } + // saturated convert + Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register src) { + return internal::ConvertInstruction(dstType, srcType, dst, src, + true).convert(); } + // For all unary functions with given opcode Instruction ATOMIC(AtomicOps atomicOp, Register dst, AddressSpace space, Tuple src) { return internal::AtomicInstruction(atomicOp, dst, space, src).convert(); diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 27a34d1..90c819b 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -623,6 +623,8 @@ namespace ir { Instruction GT(Type type, Register dst, Register src0, Register src1); /*! cvt.{dstType <- srcType} dst src */ Instruction CVT(Type dstType, Type srcType, Register dst, Register src); + /*! sat_cvt.{dstType <- srcType} dst src */ Instruction SAT_CVT(Type + dstType, Type srcType, Register dst, Register src); /*! atomic dst addr.space {src1 {src2}} */ Instruction ATOMIC(AtomicOps opcode, Register dst, AddressSpace space, Tuple src); /*! bra labelIndex */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 1a9f867..cd60349 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -61,6 +61,7 @@ DECL_INSN(LT, CompareInstruction) DECL_INSN(GE, CompareInstruction) DECL_INSN(GT, CompareInstruction) DECL_INSN(CVT, ConvertInstruction) +DECL_INSN(SAT_CVT, ConvertInstruction) DECL_INSN(ATOMIC, AtomicInstruction) DECL_INSN(BRA, BranchInstruction) DECL_INSN(RET, BranchInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 3fe6085..493d152 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1910,6 +1910,28 @@ namespace gbe case GEN_OCL_I64RHADD: case GEN_OCL_I64_MAD_SAT: case GEN_OCL_I64_MAD_SATU: + case GEN_OCL_SAT_CONV_U8_TO_I8: + case GEN_OCL_SAT_CONV_I16_TO_I8: + case GEN_OCL_SAT_CONV_U16_TO_I8: + case GEN_OCL_SAT_CONV_I32_TO_I8: + case GEN_OCL_SAT_CONV_U32_TO_I8: + case GEN_OCL_SAT_CONV_F32_TO_I8: + case GEN_OCL_SAT_CONV_I8_TO_U8: + case GEN_OCL_SAT_CONV_I16_TO_U8: + case GEN_OCL_SAT_CONV_U16_TO_U8: + case GEN_OCL_SAT_CONV_I32_TO_U8: + case GEN_OCL_SAT_CONV_U32_TO_U8: + case GEN_OCL_SAT_CONV_F32_TO_U8: + case GEN_OCL_SAT_CONV_U16_TO_I16: + case GEN_OCL_SAT_CONV_I32_TO_I16: + case GEN_OCL_SAT_CONV_U32_TO_I16: + case GEN_OCL_SAT_CONV_F32_TO_I16: + case GEN_OCL_SAT_CONV_I16_TO_U16: + case GEN_OCL_SAT_CONV_I32_TO_U16: + case GEN_OCL_SAT_CONV_U32_TO_U16: + case GEN_OCL_SAT_CONV_F32_TO_U16: + case GEN_OCL_SAT_CONV_F32_TO_I32: + case GEN_OCL_SAT_CONV_F32_TO_U32: this->newRegister(&I); break; default: @@ -2415,6 +2437,53 @@ namespace gbe ctx.I64RHADD(ir::TYPE_U64, dst, src0, src1); break; } +#define DEF(DST_TYPE, SRC_TYPE) \ + { ctx.SAT_CVT(DST_TYPE, SRC_TYPE, getRegister(&I), getRegister(I.getOperand(0))); break; } + case GEN_OCL_SAT_CONV_U8_TO_I8: + DEF(ir::TYPE_S8, ir::TYPE_U8); + case GEN_OCL_SAT_CONV_I16_TO_I8: + DEF(ir::TYPE_S8, ir::TYPE_S16); + case GEN_OCL_SAT_CONV_U16_TO_I8: + DEF(ir::TYPE_S8, ir::TYPE_U16); + case GEN_OCL_SAT_CONV_I32_TO_I8: + DEF(ir::TYPE_S8, ir::TYPE_S32); + case GEN_OCL_SAT_CONV_U32_TO_I8: + DEF(ir::TYPE_S8, ir::TYPE_U32); + case GEN_OCL_SAT_CONV_F32_TO_I8: + DEF(ir::TYPE_S8, ir::TYPE_FLOAT); + case GEN_OCL_SAT_CONV_I8_TO_U8: + DEF(ir::TYPE_U8, ir::TYPE_S8); + case GEN_OCL_SAT_CONV_I16_TO_U8: + DEF(ir::TYPE_U8, ir::TYPE_S16); + case GEN_OCL_SAT_CONV_U16_TO_U8: + DEF(ir::TYPE_U8, ir::TYPE_U16); + case GEN_OCL_SAT_CONV_I32_TO_U8: + DEF(ir::TYPE_U8, ir::TYPE_S32); + case GEN_OCL_SAT_CONV_U32_TO_U8: + DEF(ir::TYPE_U8, ir::TYPE_U32); + case GEN_OCL_SAT_CONV_F32_TO_U8: + DEF(ir::TYPE_U8, ir::TYPE_FLOAT); + case GEN_OCL_SAT_CONV_U16_TO_I16: + DEF(ir::TYPE_S16, ir::TYPE_U16); + case GEN_OCL_SAT_CONV_I32_TO_I16: + DEF(ir::TYPE_S16, ir::TYPE_S32); + case GEN_OCL_SAT_CONV_U32_TO_I16: + DEF(ir::TYPE_S16, ir::TYPE_U32); + case GEN_OCL_SAT_CONV_F32_TO_I16: + DEF(ir::TYPE_S16, ir::TYPE_FLOAT); + case GEN_OCL_SAT_CONV_I16_TO_U16: + DEF(ir::TYPE_U16, ir::TYPE_S16); + case GEN_OCL_SAT_CONV_I32_TO_U16: + DEF(ir::TYPE_U16, ir::TYPE_S32); + case GEN_OCL_SAT_CONV_U32_TO_U16: + DEF(ir::TYPE_U16, ir::TYPE_U32); + case GEN_OCL_SAT_CONV_F32_TO_U16: + DEF(ir::TYPE_U16, ir::TYPE_FLOAT); + case GEN_OCL_SAT_CONV_F32_TO_I32: + DEF(ir::TYPE_S32, ir::TYPE_FLOAT); + case GEN_OCL_SAT_CONV_F32_TO_U32: + DEF(ir::TYPE_U32, ir::TYPE_FLOAT); #undef DEF default: break; } } diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 321fc4e..00dc8ab 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -146,3 +146,31 @@ DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell) // get sampler info DECL_LLVM_GEN_FUNCTION(GET_SAMPLER_INFO, __gen_ocl_get_sampler_info) + +// saturate convert +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U8_TO_I8, _Z16convert_char_sath) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_I8, _Z16convert_char_sats) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_I8, _Z16convert_char_satt) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_I8, _Z16convert_char_sati) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I8, _Z16convert_char_satj) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I8, _Z16convert_char_satf) + +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I8_TO_U8, _Z17convert_uchar_satc) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_U8, _Z17convert_uchar_sats) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_U8, _Z17convert_uchar_satt) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U8, _Z17convert_uchar_sati) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_U8, _Z17convert_uchar_satj) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U8, _Z17convert_uchar_satf) + +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_I16, _Z17convert_short_satt) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_I16, _Z17convert_short_sati) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I16, _Z17convert_short_satj) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I16, _Z17convert_short_satf) + +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_U16, _Z18convert_ushort_sats) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U16, _Z18convert_ushort_sati) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_U16, _Z18convert_ushort_satj) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U16, _Z18convert_ushort_satf) + +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I32, _Z15convert_int_satf) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 170ec70..8dfea09 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -218,9 +218,31 @@ UDEF(uint); UDEF(ulong); #undef UDEF -uchar INLINE_OVERLOADABLE convert_uchar_sat(float x) { - return add_sat((uchar)x, (uchar)0); -} +#define DEF(DSTTYPE, SRCTYPE) \ + OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x); +DEF(char, uchar); DEF(char, short); DEF(char, ushort); DEF(char, int); +DEF(char, uint); DEF(char, float); DEF(uchar, char); DEF(uchar, short); +DEF(uchar, ushort); DEF(uchar, int); DEF(uchar, uint); DEF(uchar, +float); DEF(short, ushort); DEF(short, int); DEF(short, uint); +DEF(short, float); DEF(ushort, short); DEF(ushort, int); DEF(ushort, +uint); DEF(ushort, float); DEF(int, float); DEF(uint, float); #undef +DEF INLINE_OVERLOADABLE int isfinite(float x) { return __builtin_isfinite(x); } INLINE_OVERLOADABLE int isinf(float x) { return __builtin_isinf(x); } diff --git a/kernels/builtin_convert_sat.cl b/kernels/builtin_convert_sat.cl new file mode 100644 index 0000000..18d88ab --- /dev/null +++ b/kernels/builtin_convert_sat.cl @@ -0,0 +1,30 @@ +#define DEF(DSTTYPE, SRCTYPE) \ + kernel void builtin_convert_ ## SRCTYPE ## _to_ ## DSTTYPE ## +_sat(global SRCTYPE *src, global DSTTYPE *dst) { \ + int i = get_global_id(0); \ + dst[i] = convert_ ## DSTTYPE ## _sat(src[i]); \ } + +DEF(char, uchar); +DEF(char, short); +DEF(char, ushort); +DEF(char, int); +DEF(char, uint); +DEF(char, float); +DEF(uchar, char); +DEF(uchar, short); +DEF(uchar, ushort); +DEF(uchar, int); +DEF(uchar, uint); +DEF(uchar, float); +DEF(short, ushort); +DEF(short, int); +DEF(short, uint); +DEF(short, float); +DEF(ushort, short); +DEF(ushort, int); +DEF(ushort, uint); +DEF(ushort, float); +DEF(int, float); +DEF(uint, float); +#undef DEF + diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 9b93993..72bff84 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -126,6 +126,7 @@ set (utests_sources builtin_num_groups.cpp builtin_local_id.cpp builtin_acos_asin.cpp + builtin_convert_sat.cpp runtime_createcontext.cpp runtime_null_kernel_arg.cpp runtime_event.cpp diff --git a/utests/builtin_convert_sat.cpp b/utests/builtin_convert_sat.cpp new file mode 100644 index 0000000..0bf561c --- /dev/null +++ b/utests/builtin_convert_sat.cpp @@ -0,0 +1,69 @@ +#include <cstdint> +#include "utest_helper.hpp" + +typedef unsigned char uchar; +typedef unsigned short ushort; + +int64_t my_rand(void) { + int64_t x = rand() - RAND_MAX/2; + int64_t y = rand() - RAND_MAX/2; + return x * y; +} + +#define DEF(DST_TYPE, SRC_TYPE, DST_MIN, DST_MAX) \ void +builtin_convert_ ## SRC_TYPE ## _to_ ## DST_TYPE ## _sat(void) \ { \ + const int n = 128; \ + OCL_CREATE_KERNEL_FROM_FILE("builtin_convert_sat", "builtin_convert_" +# SRC_TYPE "_to_" # DST_TYPE "_sat"); \ + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(SRC_TYPE), NULL); \ + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(DST_TYPE), NULL); \ + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); \ + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \ + globals[0] = n; \ + locals[0] = 16; \ + OCL_MAP_BUFFER(0); \ + for (int i = 0; i < n; i++) \ + ((SRC_TYPE *)buf_data[0])[i] = my_rand(); \ + OCL_UNMAP_BUFFER(0); \ + OCL_NDRANGE(1); \ + OCL_MAP_BUFFER(0); \ + OCL_MAP_BUFFER(1); \ + for (int i = 0; i < n; i++) { \ + SRC_TYPE src = ((SRC_TYPE *)buf_data[0])[i]; \ + DST_TYPE dst; \ + if (src > DST_MAX) \ + dst = DST_MAX; \ + else if (src < DST_MIN) \ + dst = DST_MIN; \ + else \ + dst = src; \ + OCL_ASSERT(((DST_TYPE *)buf_data[1])[i] == dst); \ + } \ + OCL_UNMAP_BUFFER(0); \ + OCL_UNMAP_BUFFER(1); \ +} \ +MAKE_UTEST_FROM_FUNCTION(builtin_convert_ ## SRC_TYPE ## _to_ ## +DST_TYPE ## _sat); + +DEF(char, uchar, -128, 127); +DEF(char, short, -128, 127); +DEF(char, ushort, -128, 127); +DEF(char, int, -128, 127); +DEF(char, uint, -128, 127); +DEF(char, float, -128, 127); +DEF(uchar, char, 0, 255); +DEF(uchar, short, 0, 255); +DEF(uchar, ushort, 0, 255); +DEF(uchar, int, 0, 255); +DEF(uchar, uint, 0, 255); +DEF(uchar, float, 0, 255); +DEF(short, ushort, -32768, 32767); +DEF(short, int, -32768, 32767); +DEF(short, uint, -32768, 32767); +DEF(short, float, -32768, 32767); +DEF(ushort, short, 0, 65535); +DEF(ushort, int, 0, 65535); +DEF(ushort, uint, 0, 65535); +DEF(ushort, float, 0, 65535); +DEF(int, float, -0x7FFFFFFF-1, 0x7FFFFFFF); DEF(uint, float, 0, +0xffffffffu); #undef DEF -- 1.8.1.2 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet