passed piglit test cases: piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-mul_hi-1.0.generated.cl piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-mul_hi-1.0.generated.cl
version 2: temp flag register is allocated by RA version 3: divide subnr of flag register by typesize version 4: fix a typo Signed-off-by: Homer Hsing <homer.x...@intel.com> --- backend/src/backend/gen_context.cpp | 79 ++++++++++++++++++++++ backend/src/backend/gen_context.hpp | 4 ++ .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 + backend/src/backend/gen_insn_selection.cpp | 22 ++++++ backend/src/backend/gen_insn_selection.hpp | 4 +- backend/src/backend/gen_insn_selection.hxx | 1 + backend/src/ir/instruction.cpp | 1 + backend/src/ir/instruction.hpp | 2 + backend/src/ir/instruction.hxx | 1 + backend/src/llvm/llvm_gen_backend.cpp | 18 +++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 2 + backend/src/ocl_stdlib.tmpl.h | 6 +- 12 files changed, 137 insertions(+), 4 deletions(-) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 9ccf1bf..4ebd59a 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -444,6 +444,85 @@ namespace gbe p->pop(); } + void GenContext::I64FullAdd(GenRegister high1, GenRegister low1, GenRegister high2, GenRegister low2) { + addWithCarry(low1, low1, low2); + addWithCarry(high1, high1, high2); + p->ADD(high1, high1, low2); + } + + void GenContext::I64FullMult(GenRegister dst1, GenRegister dst2, GenRegister dst3, GenRegister dst4, GenRegister x_high, GenRegister x_low, GenRegister y_high, GenRegister y_low) { + GenRegister &e = dst1, &f = dst2, &g = dst3, &h = dst4, + &a = x_high, &b = x_low, &c = y_high, &d = y_low; + I32FullMult(e, h, b, d); + I32FullMult(f, g, a, d); + addWithCarry(g, g, e); + addWithCarry(f, f, e); + I32FullMult(e, d, b, c); + I64FullAdd(f, g, e, d); + I32FullMult(b, d, a, c); + I64FullAdd(e, f, b, d); + } + + void GenContext::I64ABS(GenRegister sign, GenRegister high, GenRegister low, GenRegister tmp, GenRegister flagReg) { + p->SHR(sign, high, GenRegister::immud(31)); + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); + p->CMP(GEN_CONDITIONAL_NZ, sign, GenRegister::immud(0)); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->NOT(high, high); + p->NOT(low, low); + p->MOV(tmp, GenRegister::immud(1)); + addWithCarry(low, low, tmp); + p->ADD(high, high, tmp); + p->pop(); + } + + void GenContext::emitI64MULHIInstruction(const SelectionInstruction &insn) { + GenRegister dest = ra->genReg(insn.dst(0)); + GenRegister x = ra->genReg(insn.src(0)); + GenRegister y = ra->genReg(insn.src(1)); + GenRegister a = ra->genReg(insn.dst(1)); + GenRegister b = ra->genReg(insn.dst(2)); + GenRegister c = ra->genReg(insn.dst(3)); + GenRegister d = ra->genReg(insn.dst(4)); + GenRegister e = ra->genReg(insn.dst(5)); + GenRegister f = ra->genReg(insn.dst(6)); + GenRegister g = ra->genReg(insn.dst(7)); + GenRegister h = ra->genReg(insn.dst(8)); + GenRegister i = ra->genReg(insn.dst(9)); + GenRegister flagReg = ra->genReg(insn.dst(10)); + loadTopHalf(a, x); + loadBottomHalf(b, x); + loadTopHalf(c, y); + loadBottomHalf(d, y); + if(x.type == GEN_TYPE_UL) { + I64FullMult(e, f, g, h, a, b, c, d); + } else { + I64ABS(e, a, b, i, flagReg); + I64ABS(f, c, d, i, flagReg); + p->XOR(i, e, f); + I64FullMult(e, f, g, h, a, b, c, d); + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); + p->CMP(GEN_CONDITIONAL_NZ, i, GenRegister::immud(0)); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->NOT(e, e); + p->NOT(f, f); + p->NOT(g, g); + p->NOT(h, h); + p->MOV(i, GenRegister::immud(1)); + addWithCarry(h, h, i); + addWithCarry(g, g, i); + addWithCarry(f, f, i); + p->ADD(e, e, i); + p->pop(); + } + storeTopHalf(dest, e); + storeBottomHalf(dest, f); + } + void GenContext::emitI64HADDInstruction(const SelectionInstruction &insn) { GenRegister dest = ra->genReg(insn.dst(0)); GenRegister x = ra->genReg(insn.src(0)); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 1de0b3d..6fe71c5 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -86,7 +86,10 @@ namespace gbe void addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1); void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1); + void I64ABS(GenRegister sign, GenRegister high, GenRegister low, GenRegister tmp, GenRegister flagReg); + void I64FullAdd(GenRegister high1, GenRegister low1, GenRegister high2, GenRegister low2); void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1); + void I64FullMult(GenRegister dst1, GenRegister dst2, GenRegister dst3, GenRegister dst4, GenRegister x_high, GenRegister x_low, GenRegister y_high, GenRegister y_low); void saveFlag(GenRegister dest, int flag, int subFlag); void UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister low, GenRegister tmp); @@ -97,6 +100,7 @@ namespace gbe void emitBinaryInstruction(const SelectionInstruction &insn); void emitBinaryWithTempInstruction(const SelectionInstruction &insn); void emitTernaryInstruction(const SelectionInstruction &insn); + void emitI64MULHIInstruction(const SelectionInstruction &insn); void emitI64HADDInstruction(const SelectionInstruction &insn); void emitI64RHADDInstruction(const SelectionInstruction &insn); void emitI64ShiftInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx index a4ba90b..a420cfc 100644 --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx @@ -9,6 +9,7 @@ DECL_GEN7_SCHEDULE(I64Shift, 20, 4, 2) DECL_GEN7_SCHEDULE(I64HADD, 20, 4, 2) DECL_GEN7_SCHEDULE(I64RHADD, 20, 4, 2) DECL_GEN7_SCHEDULE(I64ToFloat, 20, 4, 2) +DECL_GEN7_SCHEDULE(I64MULHI, 20, 4, 2) DECL_GEN7_SCHEDULE(Compare, 20, 4, 2) DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2) DECL_GEN7_SCHEDULE(Jump, 14, 1, 1) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 06d7382..2791a0e 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -471,6 +471,8 @@ namespace gbe #undef I64Shift /*! Convert 64-bit integer to 32-bit float */ void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[4]); + /*! High 64bit of x*y */ + void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]); /*! (x+y)>>1 without mod. overflow */ void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]); /*! (x+y+1)>>1 without mod. overflow */ @@ -1087,6 +1089,15 @@ namespace gbe insn->dst(i + 1) = tmp[i]; } + void Selection::Opaque::I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64_MUL_HI, 11, 2); + insn->dst(0) = dst; + insn->src(0) = src0; + insn->src(1) = src1; + for(int i = 0; i < 10; i ++) + insn->dst(i + 1) = tmp[i]; + } + void Selection::Opaque::I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64HADD, 5, 2); insn->dst(0) = dst; @@ -1680,6 +1691,17 @@ namespace gbe sel.MUL_HI(dst, src0, src1, temp); break; } + case OP_I64_MUL_HI: + { + GenRegister temp[10]; + for(int i=0; i<9; i++) { + temp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); + temp[i].type = GEN_TYPE_UD; + } + temp[9] = sel.selReg(sel.reg(FAMILY_BOOL)); + sel.I64_MUL_HI(dst, src0, src1, temp); + break; + } case OP_MUL: if (type == TYPE_U32 || type == TYPE_S32) { sel.pop(); diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index 73e81de..2422b2b 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -86,8 +86,8 @@ namespace gbe const GenRegister &src(uint32_t srcID) const { return regs[dstNum+srcID]; } /*! No more than 17 sources (used by typed writes on simd8 mode.) */ enum { MAX_SRC_NUM = 17 }; - /*! No more than 4 destinations (used by samples and untyped reads) */ - enum { MAX_DST_NUM = 4 }; + /*! No more than 11 destinations (used by samples and untyped reads) */ + enum { MAX_DST_NUM = 11 }; /*! State of the instruction (extra fields neeed for the encoding) */ GenInstructionState state; union { diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index c4cf652..86d1756 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -58,6 +58,7 @@ DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction) DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction) DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction) DECL_SELECTION_IR(MUL_HI, BinaryWithTempInstruction) +DECL_SELECTION_IR(I64_MUL_HI, I64MULHIInstruction) DECL_SELECTION_IR(FBH, UnaryInstruction) DECL_SELECTION_IR(FBL, UnaryInstruction) DECL_SELECTION_IR(HADD, BinaryWithTempInstruction) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 1925d93..8130b8b 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -1333,6 +1333,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) DECL_EMIT_FUNCTION(SUB) DECL_EMIT_FUNCTION(SUBSAT) DECL_EMIT_FUNCTION(MUL_HI) + DECL_EMIT_FUNCTION(I64_MUL_HI) DECL_EMIT_FUNCTION(UPSAMPLE_SHORT) DECL_EMIT_FUNCTION(UPSAMPLE_INT) DECL_EMIT_FUNCTION(UPSAMPLE_LONG) diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 2a06f76..f165595 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -519,6 +519,8 @@ namespace ir { Instruction SIN(Type type, Register dst, Register src); /*! mul_hi.type dst src */ Instruction MUL_HI(Type type, Register dst, Register src0, Register src1); + /*! i64_mul_hi.type dst src */ + Instruction I64_MUL_HI(Type type, Register dst, Register src0, Register src1); /*! upsample_short.type dst src */ Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1); /*! upsample_int.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 7ead344..135dc82 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -73,6 +73,7 @@ DECL_INSN(SYNC, SyncInstruction) DECL_INSN(LABEL, LabelInstruction) DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction) DECL_INSN(MUL_HI, BinaryInstruction) +DECL_INSN(I64_MUL_HI, BinaryInstruction) DECL_INSN(FBH, UnaryInstruction) DECL_INSN(FBL, UnaryInstruction) DECL_INSN(HADD, BinaryInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 5284ce5..3371054 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1878,6 +1878,8 @@ namespace gbe } case GEN_OCL_MUL_HI_INT: case GEN_OCL_MUL_HI_UINT: + case GEN_OCL_MUL_HI_I64: + case GEN_OCL_MUL_HI_UI64: case GEN_OCL_UPSAMPLE_SHORT: case GEN_OCL_UPSAMPLE_INT: case GEN_OCL_UPSAMPLE_LONG: @@ -2255,6 +2257,22 @@ namespace gbe ctx.MUL_HI(getUnsignedType(ctx, I.getType()), dst, src0, src1); break; } + case GEN_OCL_MUL_HI_I64: + { + GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + ctx.I64_MUL_HI(getType(ctx, I.getType()), dst, src0, src1); + break; + } + case GEN_OCL_MUL_HI_UI64: + { + GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + ctx.I64_MUL_HI(getUnsignedType(ctx, I.getType()), dst, src0, src1); + break; + } case GEN_OCL_UPSAMPLE_SHORT: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 58df2b0..7b5a2d3 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -128,6 +128,8 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm) // integer built-in functions DECL_LLVM_GEN_FUNCTION(MUL_HI_INT, _Z16__gen_ocl_mul_hiii) DECL_LLVM_GEN_FUNCTION(MUL_HI_UINT, _Z16__gen_ocl_mul_hijj) +DECL_LLVM_GEN_FUNCTION(MUL_HI_I64, _Z16__gen_ocl_mul_hill) +DECL_LLVM_GEN_FUNCTION(MUL_HI_UI64, _Z16__gen_ocl_mul_himm) DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh) DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl) DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 9b76ba1..f21ba4f 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -343,6 +343,8 @@ INLINE_OVERLOADABLE ulong clz(ulong x) { OVERLOADABLE int __gen_ocl_mul_hi(int x, int y); OVERLOADABLE uint __gen_ocl_mul_hi(uint x, uint y); +OVERLOADABLE long __gen_ocl_mul_hi(long x, long y); +OVERLOADABLE ulong __gen_ocl_mul_hi(ulong x, ulong y); INLINE_OVERLOADABLE char mul_hi(char x, char y) { return (x * y) >> 8; } INLINE_OVERLOADABLE uchar mul_hi(uchar x, uchar y) { return (x * y) >> 8; } INLINE_OVERLOADABLE short mul_hi(short x, short y) { return (x * y) >> 16; } @@ -350,10 +352,10 @@ INLINE_OVERLOADABLE ushort mul_hi(ushort x, ushort y) { return (x * y) >> 16; } INLINE_OVERLOADABLE int mul_hi(int x, int y) { return __gen_ocl_mul_hi(x, y); } INLINE_OVERLOADABLE uint mul_hi(uint x, uint y) { return __gen_ocl_mul_hi(x, y); } INLINE_OVERLOADABLE long mul_hi(long x, long y) { - return 0; + return __gen_ocl_mul_hi(x, y); } INLINE_OVERLOADABLE ulong mul_hi(ulong x, ulong y) { - return 0; + return __gen_ocl_mul_hi(x, y); } #define DEF(type) INLINE_OVERLOADABLE type mad_hi(type a, type b, type c) { return mul_hi(a, b) + c; } -- 1.8.1.2 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet