Am 22.06.2017 um 21:28 schrieb Karol Herbst: > If the exponent is a small integer immediate value, we can lower POW to > MULs instead to save a few instructions. Also MUL instructions execute > faster than what we lower POW in the default case to. > > score change for GpuTest /test=pixmark_piano /benchmark /no_scorebox > /msaa=0 /benchmark_duration_ms=60000 /width=1024 /height=640: > 1045 -> 1060 > > changes in shader-db: > total instructions in shared programs : 4350261 -> 4349451 (-0.02%) > total gprs used in shared programs : 525853 -> 525861 (0.00%) > total local used in shared programs : 30081 -> 30081 (0.00%) > total bytes used in shared programs : 39865176 -> 39857712 (-0.02%) > > local gpr inst bytes > helped 0 4 313 313 > hurt 0 12 1 1 > > Signed-off-by: Karol Herbst <karolher...@gmail.com> > --- > .../drivers/nouveau/codegen/nv50_ir_build_util.cpp | 49 > ++++++++++++++++++++++ > 1 file changed, 49 insertions(+) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp > index 5756e1b4d4..b31dcec1ab 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp > @@ -640,6 +640,55 @@ bool > BuildUtil::lowerPOW(Instruction *i) > { > LValue *val = getScratch(); > + ImmediateValue imm; > + > + if (i->src(1).getImmediate(imm)) { > + Value *src = i->getSrc(0); > + if (imm.isInteger(0)) { > + i->op = OP_MOV; > + i->setSrc(0, loadImm(NULL, 1)); > + i->setSrc(1, NULL); > + return true; > + } else > + if (imm.isInteger(1)) { > + i->op = OP_MOV; > + i->setSrc(1, NULL); > + return true; > + } else > + if (imm.isInteger(2)) { > + i->op = OP_MUL; > + i->setSrc(1, src); > + return true; > + } else > + if (imm.isInteger(3)) { > + mkOp2(OP_MUL, i->dType, val, src, src); > + i->op = OP_MUL; > + i->setSrc(1, val); > + return true; > + } else > + if (imm.isInteger(4)) { > + mkOp2(OP_MUL, i->dType, val, src, src); > + i->op = OP_MUL; > + i->setSrc(0, val); > + i->setSrc(1, val); > + return true; > + } else > + if (imm.isInteger(5)) { > + mkOp2(OP_MUL, i->dType, val, src, src); > + mkOp2(OP_MUL, i->dType, val, val, val); > + i->op = OP_MUL; > + i->setSrc(1, val); > + return true; > + } else > + if (imm.isInteger(8)) { > + mkOp2(OP_MUL, i->dType, val, src, src); > + mkOp2(OP_MUL, i->dType, val, val, val); > + i->op = OP_MUL; > + i->setSrc(0, val); > + i->setSrc(1, val); > + return true; > + } > + } > > mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0)); > mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1; >
You could do integer 6 with just 3 muls too :-). But more in general, this looks like it would be useful outside of nouveau too. Roland _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev