Generating a `cvt u32 $r0 u64 $r1d` or a `cvt u64 $r0d u32 $r2` makes the GPU unhappy. Instead, manually handle the conversion between 64-bit and 32-bit values, and use `cvt` to convert between the original target (resp. source) and 32-bit value. This happens to be the behaviour of NVIDIA's driver.
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr> --- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 59 ++++++++++++++++++++++ .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + 2 files changed, 60 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 2719f2c..c419a68 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1859,6 +1859,63 @@ NVC0LoweringPass::handleOUT(Instruction *i) return true; } +bool +NVC0LoweringPass::handleCVT(Instruction *i) +{ + if (isFloatType(i->dType) || isFloatType(i->sType) || + isSignedIntType(i->dType) xor isSignedIntType(i->sType)) + return false; + + if (typeSizeof(i->sType) == 8) { + Value *toSplit = i->getSrc(0); + if (i->saturate) { + Value *minValue = bld.loadImm(bld.getSSA(8), 0ul); + Value *maxValue = bld.loadImm(bld.getSSA(8), UINT32_MAX); + if (isSignedType(i->sType)) { + minValue = bld.loadImm(bld.getSSA(8), INT32_MIN); + maxValue = bld.loadImm(bld.getSSA(8), INT32_MAX); + } + Value *minRes = bld.mkOp2v(OP_MAX, i->sType, bld.getSSA(8), toSplit, + minValue); + toSplit = bld.mkOp2v(OP_MIN, i->sType, bld.getSSA(8), minRes, + maxValue); + } + + Value *value32[2] = { bld.getSSA(), bld.getSSA() }; + bld.mkSplit(value32, 4, toSplit); + if (typeSizeof(i->dType) == 4) { + bld.mkMov(i->getDef(0), value32[0], i->dType); + delete_Instruction(prog, i); + return true; + } + + i->setSrc(0, bld.getSSA()); + i->sType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32; + bld.mkMov(i->getSrc(0), value32[0], i->sType); + } else if (typeSizeof(i->dType) == 8) { + bld.setPosition(i, true); + Value *res = i->getDef(0); + Value *high32 = bld.loadImm(bld.getSSA(), + isSignedType(i->sType) ? UINT32_MAX : 0u); + Value *low32 = i->getSrc(0); + DataType resType = i->dType; + + if (typeSizeof(i->sType) <= 2) { + i->dType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32; + low32 = bld.getSSA(); + i->setDef(0, low32); + } else if (typeSizeof(i->sType) == 4) { + delete_Instruction(prog, i); + } + + Value *merged64 = bld.mkOp2v(OP_MERGE, resType, bld.getSSA(8), low32, + high32); + bld.mkMov(res, merged64, resType); + } + + return true; +} + // Generate a binary predicate if an instruction is predicated by // e.g. an f32 value. void @@ -1894,6 +1951,8 @@ NVC0LoweringPass::visit(Instruction *i) checkPredicate(i); switch (i->op) { + case OP_CVT: + return handleCVT(i); case OP_TEX: case OP_TXB: case OP_TXL: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 6eb8aff..9fc24d9 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -96,6 +96,7 @@ protected: bool handleMOD(Instruction *); bool handleSQRT(Instruction *); bool handlePOW(Instruction *); + bool handleCVT(Instruction *); bool handleTEX(TexInstruction *); bool handleTXD(TexInstruction *); bool handleTXQ(TexInstruction *); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev