On Thu, Dec 13, 2018 at 6:14 PM Karol Herbst <kher...@redhat.com> wrote: > > gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 and > gp107. > > changes in shader-db: > > total instructions in shared programs : 7614782 -> 7614782 (0.00%) > total cvt instructions in shared programs : 139343 -> 95856 (-31.21%) > total gprs used in shared programs : 798045 -> 798045 (0.00%) > total shared used in shared programs : 639636 -> 639636 (0.00%) > total local used in shared programs : 24648 -> 24648 (0.00%) > total bytes used in shared programs : 81330696 -> 81330696 (0.00%) > > local shared gpr inst cvts bytes > helped 0 0 0 0 14037 0 > hurt 0 0 0 0 0 0 > > Signed-off-by: Karol Herbst <kher...@redhat.com> > --- > .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 55 +++++++++++++++++++ > .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + > 2 files changed, 56 insertions(+) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > index 295497be2f9..267fef0898d 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > @@ -719,6 +719,58 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb) > bb->remove(bb->getEntry()); > } > > +// replaces instructions which would end up as f2f or i2i with faster > +// alternatives: > +// - abs(a) -> add(0, abs a) > +// - neg(a) -> add(0, neg a) > +// - neg(abs a) -> add(0, neg abs a) > +// - sat(a) -> sat add(0, a) > +void > +NVC0LegalizePostRA::replaceCvt(Instruction *cvt) > +{ > + if (typeSizeof(cvt->sType) == 8)
!= 4 Otherwise you're just asking for trouble. > + return; > + if (cvt->sType != cvt->dType) > + return; > + // we could make it work, but in this case we have optimizations disabled > + // and we don't really care either way. > + if (cvt->src(0).getFile() == FILE_IMMEDIATE) > + return; > + > + switch (cvt->op) { > + case OP_ABS: > + if (cvt->src(0).mod) > + break; > + if (!isFloatType(cvt->sType)) Why? What's wrong with IABS -> IADD? Similar question for OP_NEG. > + break; > + cvt->op = OP_ADD; > + cvt->moveSources(0, 1); These two lines, along with moveSources are always repeated. Would be nice to centralize this somehow. > + cvt->src(1).mod = NV50_IR_MOD_ABS; > + cvt->setSrc(0, rZero); > + cvt->src(0).mod = 0; > + break; > + case OP_NEG: > + if (cvt->src(0).mod && (cvt->src(0).mod.neg() || > !isFloatType(cvt->sType))) > + break; > + cvt->op = OP_ADD; > + cvt->moveSources(0, 1); > + cvt->src(1).mod = cvt->src(0).mod ? NV50_IR_MOD_NEG_ABS : > NV50_IR_MOD_NEG; > + cvt->setSrc(0, rZero); > + cvt->src(0).mod = 0; > + break; > + case OP_SAT: > + if (cvt->src(0).mod) > + break; > + cvt->op = OP_ADD; > + cvt->moveSources(0, 1); > + cvt->setSrc(0, rZero); > + cvt->saturate = true; > + break; > + default: > + break; > + } > +} > + > bool > NVC0LegalizePostRA::visit(BasicBlock *bb) > { > @@ -758,6 +810,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb) > next = hi; > } > > + if (i->isCvt()) > + replaceCvt(i); > + > if (i->op != OP_MOV && i->op != OP_PFETCH) > replaceZero(i); > } > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > index e0f50ab0904..4679c56471b 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > @@ -81,6 +81,7 @@ private: > virtual bool visit(Function *); > virtual bool visit(BasicBlock *); > > + void replaceCvt(Instruction *); > void replaceZero(Instruction *); > bool tryReplaceContWithBra(BasicBlock *); > void propagateJoin(BasicBlock *); > -- > 2.19.2 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev