From: Pan Xiuli <[email protected]> Add half type for mad encoder and libocl.
Signed-off-by: Pan Xiuli <[email protected]> --- backend/src/backend/gen8_encoder.cpp | 13 +++++++++---- backend/src/backend/gen_insn_compact.cpp | 2 ++ backend/src/libocl/tmpl/ocl_math.tmpl.cl | 8 +++----- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp index 2a79e30..277260f 100644 --- a/backend/src/backend/gen8_encoder.cpp +++ b/backend/src/backend/gen8_encoder.cpp @@ -509,11 +509,19 @@ namespace gbe assert(dest.file == GEN_GENERAL_REGISTER_FILE); assert(dest.nr < 128); assert(dest.address_mode == GEN_ADDRESS_DIRECT); - assert(dest.type == GEN_TYPE_F); + assert(src0.type == GEN_TYPE_HF || src0.type == GEN_TYPE_F || src0.type == GEN_TYPE_DF); + assert(src0.type == dest.type); + assert(src0.type == src1.type); + assert(src0.type == src2.type); + int32_t dataType = src0.type == GEN_TYPE_DF ? 3 : (src0.type == GEN_TYPE_HF ? 4 : 0); //gen8_insn->bits1.da3src.dest_reg_file = 0; gen8_insn->bits1.da3src.dest_reg_nr = dest.nr; gen8_insn->bits1.da3src.dest_subreg_nr = dest.subnr / 4; gen8_insn->bits1.da3src.dest_writemask = 0xf; + gen8_insn->bits1.da3src.dest_type = dataType; + gen8_insn->bits1.da3src.src_type = dataType; + gen8_insn->bits1.da3src.src1_type = src1.type == GEN_TYPE_HF; + gen8_insn->bits1.da3src.src2_type = src2.type == GEN_TYPE_HF; this->setHeader(insn); gen8_insn->header.access_mode = GEN_ALIGN_16; gen8_insn->header.execution_size = execution_size; @@ -521,7 +529,6 @@ namespace gbe assert(src0.file == GEN_GENERAL_REGISTER_FILE); assert(src0.address_mode == GEN_ADDRESS_DIRECT); assert(src0.nr < 128); - assert(src0.type == GEN_TYPE_F); gen8_insn->bits2.da3src.src0_swizzle = NO_SWIZZLE; gen8_insn->bits2.da3src.src0_subreg_nr = src0.subnr / 4 ; gen8_insn->bits2.da3src.src0_reg_nr = src0.nr; @@ -532,7 +539,6 @@ namespace gbe assert(src1.file == GEN_GENERAL_REGISTER_FILE); assert(src1.address_mode == GEN_ADDRESS_DIRECT); assert(src1.nr < 128); - assert(src1.type == GEN_TYPE_F); gen8_insn->bits2.da3src.src1_swizzle = NO_SWIZZLE; gen8_insn->bits2.da3src.src1_subreg_nr_low = (src1.subnr / 4) & 0x3; gen8_insn->bits3.da3src.src1_subreg_nr_high = (src1.subnr / 4) >> 2; @@ -544,7 +550,6 @@ namespace gbe assert(src2.file == GEN_GENERAL_REGISTER_FILE); assert(src2.address_mode == GEN_ADDRESS_DIRECT); assert(src2.nr < 128); - assert(src2.type == GEN_TYPE_F); gen8_insn->bits3.da3src.src2_swizzle = NO_SWIZZLE; gen8_insn->bits3.da3src.src2_subreg_nr = src2.subnr / 4; gen8_insn->bits3.da3src.src2_rep_ctrl = src2.vstride == GEN_VERTICAL_STRIDE_0; diff --git a/backend/src/backend/gen_insn_compact.cpp b/backend/src/backend/gen_insn_compact.cpp index 036d057..5de451c 100644 --- a/backend/src/backend/gen_insn_compact.cpp +++ b/backend/src/backend/gen_insn_compact.cpp @@ -788,6 +788,8 @@ namespace gbe { return false; if(opcode != GEN_OPCODE_MAD && opcode != GEN_OPCODE_LRP) return false; + if(src0.type != GEN_TYPE_F) + return false; assert(src0.file == GEN_GENERAL_REGISTER_FILE); assert(src0.address_mode == GEN_ADDRESS_DIRECT); assert(src0.nr < 128); diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl index 0d2a57d..9f10713 100644 --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl @@ -2494,7 +2494,8 @@ OVERLOADABLE float ldexp(float x, int n) { return __gen_ocl_internal_ldexp(x, n); } -CONST float __gen_ocl_mad(float a, float b, float c) __asm("llvm.fma" ".f32"); +CONST OVERLOADABLE float __gen_ocl_mad(float a, float b, float c) __asm("llvm.fma" ".f32"); +CONST OVERLOADABLE half __gen_ocl_mad(half a, half b, half c) __asm("llvm.fma" ".f16"); PURE CONST float __gen_ocl_fmax(float a, float b); PURE CONST float __gen_ocl_fmin(float a, float b); @@ -3722,10 +3723,7 @@ OVERLOADABLE half exp2(half x) { return (half)exp2(_x); } OVERLOADABLE half mad(half a, half b, half c) { - float _a = (float)a; - float _b = (float)b; - float _c = (float)c; - return (half)mad(_a, _b, _c); + return __gen_ocl_mad(a,b,c); } OVERLOADABLE half sin(half x) { float _x = (float)x; -- 2.7.4 _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
