On 04:17 PM - Apr 17 2016, Ilia Mirkin wrote: > On Sun, Apr 17, 2016 at 4:07 PM, Pierre Moreau <pierre.mor...@free.fr> wrote: > > Ping :-) > > > > On 10:56 PM - Mar 19 2016, Pierre Moreau wrote: > >> Generating a `cvt u32 $r0 u64 $r1d` or a `cvt u64 $r0d u32 $r2` makes the > >> GPU > >> unhappy. Instead, manually handle the conversion between 64-bit and 32-bit > >> values, and use `cvt` to convert between the original target (resp. source) > >> and 32-bit value. This happens to be the behaviour of NVIDIA's driver. > >> > >> Signed-off-by: Pierre Moreau <pierre.mor...@free.fr> > >> --- > >> .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 59 > >> ++++++++++++++++++++++ > >> .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + > >> 2 files changed, 60 insertions(+) > >> > >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > >> index 2719f2c..c419a68 100644 > >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > >> @@ -1859,6 +1859,63 @@ NVC0LoweringPass::handleOUT(Instruction *i) > >> return true; > >> } > >> > >> +bool > >> +NVC0LoweringPass::handleCVT(Instruction *i) > >> +{ > >> + if (isFloatType(i->dType) || isFloatType(i->sType) || > >> + isSignedIntType(i->dType) xor isSignedIntType(i->sType)) > > I know pre-C89 features are cool, but let's avoid using them. I know > characters like ^ were uncommon on the 1960's and 1970's teletypes, > but I think we're past those days now.
Yeah… Will fix that. > > >> + return false; > >> + > >> + if (typeSizeof(i->sType) == 8) { > >> + Value *toSplit = i->getSrc(0); > >> + if (i->saturate) { > >> + Value *minValue = bld.loadImm(bld.getSSA(8), 0ul); > >> + Value *maxValue = bld.loadImm(bld.getSSA(8), UINT32_MAX); > >> + if (isSignedType(i->sType)) { > >> + minValue = bld.loadImm(bld.getSSA(8), INT32_MIN); > >> + maxValue = bld.loadImm(bld.getSSA(8), INT32_MAX); > >> + } > >> + Value *minRes = bld.mkOp2v(OP_MAX, i->sType, bld.getSSA(8), > >> toSplit, > >> + minValue); > >> + toSplit = bld.mkOp2v(OP_MIN, i->sType, bld.getSSA(8), minRes, > >> + maxValue); > > Aren't you assuming that i->dType == 4 here? It could be an unsigned > <-> signed conversion, at 64-bit. So the clamp values would be I am assuming `i->dType <= 4`: remember the ^ from before! ;-) But, it could be a U64 <=> U64 or S64 <=> S64 conversion, which would then fail… > different. Handling ALL the cases is quite annoying... can you figure > out what the hw doesn't support and just handle that? I doubt it'll be > any slower, and definitely simpler. I don’t remember if I checked U64 <=> S64 conversions… Will need to refresh my memory and log which combinations fail. Thanks! Pierre > > -ilia > > >> + } > >> + > >> + Value *value32[2] = { bld.getSSA(), bld.getSSA() }; > >> + bld.mkSplit(value32, 4, toSplit); > >> + if (typeSizeof(i->dType) == 4) { > >> + bld.mkMov(i->getDef(0), value32[0], i->dType); > >> + delete_Instruction(prog, i); > >> + return true; > >> + } > >> + > >> + i->setSrc(0, bld.getSSA()); > >> + i->sType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32; > >> + bld.mkMov(i->getSrc(0), value32[0], i->sType); > >> + } else if (typeSizeof(i->dType) == 8) { > >> + bld.setPosition(i, true); > >> + Value *res = i->getDef(0); > >> + Value *high32 = bld.loadImm(bld.getSSA(), > >> + isSignedType(i->sType) ? UINT32_MAX : > >> 0u); > >> + Value *low32 = i->getSrc(0); > >> + DataType resType = i->dType; > >> + > >> + if (typeSizeof(i->sType) <= 2) { > >> + i->dType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32; > >> + low32 = bld.getSSA(); > >> + i->setDef(0, low32); > >> + } else if (typeSizeof(i->sType) == 4) { > >> + delete_Instruction(prog, i); > >> + } > >> + > >> + Value *merged64 = bld.mkOp2v(OP_MERGE, resType, bld.getSSA(8), > >> low32, > >> + high32); > >> + bld.mkMov(res, merged64, resType); > >> + } > >> + > >> + return true; > >> +} > >> + > >> // Generate a binary predicate if an instruction is predicated by > >> // e.g. an f32 value. > >> void > >> @@ -1894,6 +1951,8 @@ NVC0LoweringPass::visit(Instruction *i) > >> checkPredicate(i); > >> > >> switch (i->op) { > >> + case OP_CVT: > >> + return handleCVT(i); > >> case OP_TEX: > >> case OP_TXB: > >> case OP_TXL: > >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > >> index 6eb8aff..9fc24d9 100644 > >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > >> @@ -96,6 +96,7 @@ protected: > >> bool handleMOD(Instruction *); > >> bool handleSQRT(Instruction *); > >> bool handlePOW(Instruction *); > >> + bool handleCVT(Instruction *); > >> bool handleTEX(TexInstruction *); > >> bool handleTXD(TexInstruction *); > >> bool handleTXQ(TexInstruction *); > >> -- > >> 2.7.4 > >> > >> _______________________________________________ > >> mesa-dev mailing list > >> mesa-dev@lists.freedesktop.org > >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev