And ADD3(d, a, b, c) to ADD(d, b, a + c) as well. Very modest effect because OP_ADD3 only supports integers, but can reduce the number of instructions in some shaders.
total instructions in shared programs :2594754 -> 2594686 (-0.00%) total gprs used in shared programs :366893 -> 366919 (0.01%) total local used in shared programs :31872 -> 31872 (0.00%) local gpr inst bytes helped 0 0 39 39 hurt 0 26 0 0 Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> --- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 62 ++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 6ba2af6..e5e6e8e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -374,6 +374,7 @@ private: void expr(Instruction *, ImmediateValue&, ImmediateValue&); void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&); void opnd(Instruction *, ImmediateValue&, int s); + void opnd2(Instruction *, ImmediateValue&, int, ImmediateValue&, int); void opnd3(Instruction *, ImmediateValue&); void unary(Instruction *, const ImmediateValue&); @@ -429,6 +430,13 @@ ConstantFolding::visit(BasicBlock *bb) opnd(i, src1, 1); if (i->srcExists(2) && i->src(2).getImmediate(src2)) opnd3(i, src2); + if (i->srcExists(2) && + i->src(0).getImmediate(src0) && i->src(2).getImmediate(src2)) + opnd2(i, src0, 0, src2, 2); + else + if (i->srcExists(2) && + i->src(1).getImmediate(src1) && i->src(2).getImmediate(src2)) + opnd2(i, src1, 1, src2, 2); } return true; } @@ -960,6 +968,60 @@ ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2) } void +ConstantFolding::opnd2(Instruction *i, ImmediateValue &imm0, int s0, + ImmediateValue &imm1, int s1) +{ + struct Storage *const a = &imm0.reg, *const b = &imm1.reg; + ImmediateValue src0, src1; + struct Storage res; + DataType type = i->dType; + + memset(&res.data, 0, sizeof(res.data)); + + switch (i->op) { + case OP_ADD3: + switch (i->dType) { + case TYPE_S32: + case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break; + default: + return; + } + break; + default: + return; + } + ++foldCount; + + i->op = OP_ADD; + + if (s0 == 0) { + i->setSrc(0, i->getSrc(1)); + i->src(0).mod = i->src(1).mod; + } + + i->setSrc(1, new_ImmediateValue(i->bb->getProgram(), res.data.u32)); + i->setSrc(2, NULL); + + i->getSrc(1)->reg.data = res.data; + i->getSrc(1)->reg.type = type; + i->getSrc(1)->reg.size = typeSizeof(type); + + src1 = *i->getSrc(1)->asImm(); + + // Move the immediate into position 1, where we know it might be + // emittable. However it might not be anyways, as there may be other + // restrictions, so move it into a separate LValue. + bld.setPosition(i, false); + i->setSrc(1, bld.mkMov(bld.getSSA(type), i->getSrc(1), type)->getDef(0)); + i->src(1).mod = Modifier(0); + + if (i->src(0).getImmediate(src0)) + expr(i, src0, src1); + else + opnd(i, src1, 1); +} + +void ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) { const int t = !s; -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev