Signed-off-by: Boyan Ding <boyan.j.d...@gmail.com> --- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 32 ++++++++++++++++++++++ .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + 2 files changed, 33 insertions(+)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 732e1a93b4..7e4d175a99 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -68,6 +68,32 @@ NVC0LegalizeSSA::handleDIV(Instruction *i) } void +NVC0LegalizeSSA::handleRCPRSQLib(Instruction *i, Value *src[]) +{ + FlowInstruction *call; + Value *def[2]; + int builtin; + + def[0] = bld.mkMovToReg(0, src[0])->getDef(0); + def[1] = bld.mkMovToReg(1, src[1])->getDef(0); + + if (i->op == OP_RCP) + builtin = NVC0_BUILTIN_RCP_F64; + else + builtin = NVC0_BUILTIN_RSQ_F64; + + call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL); + bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), def[0], def[1]); + bld.mkClobber(FILE_GPR, 0x3fc, 2); + bld.mkClobber(FILE_PREDICATE, i->op == OP_RSQ ? 0x3 : 0x1, 0); + + call->fixed = 1; + call->absolute = call->builtin = 1; + call->target.builtin = builtin; + delete_Instruction(prog, i); +} + +void NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) { assert(i->dType == TYPE_F64); @@ -80,6 +106,12 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) Value *src[2], *dst[2], *def = i->getDef(0); bld.mkSplit(src, 4, i->getSrc(0)); + int chip = prog->getTarget()->getChipset(); + if (chip >= NVISA_GK20A_CHIPSET && chip < NVISA_GM107_CHIPSET) { + handleRCPRSQLib(i, src); + return; + } + // 2. We don't care about the low 32 bits of the destination. Stick a 0 in. dst[0] = bld.loadImm(NULL, 0); dst[1] = bld.getSSA(); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 7fae7e24b9..30bc0b48df 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -33,6 +33,7 @@ private: // we want to insert calls to the builtin library only after optimization void handleDIV(Instruction *); // integer division, modulus + void handleRCPRSQLib(Instruction *, Value *[]); void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt void handleFTZ(Instruction *); void handleSET(CmpInstruction *); -- 2.12.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev