changes for GpuTest /test=pixmark_piano /benchmark /no_scorebox /msaa=0
/benchmark_duration_ms=60000 /width=1024 /height=640:

score: 1026 -> 1044

changes for shader-db:

total instructions in shared programs : 2818606 -> 2811662 (-0.25%)
total gprs used in shared programs    : 379273 -> 379273 (0.00%)
total local used in shared programs   : 9505 -> 9505 (0.00%)
total bytes used in shared programs   : 25837192 -> 25773432 (-0.25%)

                local        gpr       inst      bytes
    helped           0           0        3084        3084
      hurt           0           0           0           0

v2: removed TODO
    reorderd to show changes without RA modification
    removed stale debugging print() call

Signed-off-by: Karol Herbst <karolher...@gmail.com>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 64 +++++++++++++++++++---
 1 file changed, 57 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 1f47ba2..bcbc0c0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2954,19 +2954,18 @@ FlatteningPass::tryPredicateConditional(BasicBlock *bb)
 
 // Fold Immediate into MAD; must be done after register allocation due to
 // constraint SDST == SSRC2
-// TODO:
-// Does NVC0+ have other situations where this pass makes sense?
 class PostRaConstantFolding : public Pass
 {
 private:
    virtual bool visit(Instruction *);
-   void handleMAD(Instruction *);
+   void handleMADforNV50(Instruction *);
+   void handleMADforNVC0(Instruction *);
 };
 
 // Fold Immediate into MAD; must be done after register allocation due to
 // constraint SDST == SSRC2
 void
-PostRaConstantFolding::handleMAD(Instruction *i)
+PostRaConstantFolding::handleMADforNV50(Instruction *i)
 {
    if (i->def(0).getFile() != FILE_GPR ||
        i->src(0).getFile() != FILE_GPR ||
@@ -3019,12 +3018,64 @@ PostRaConstantFolding::handleMAD(Instruction *i)
    }
 }
 
+void
+PostRaConstantFolding::handleMADforNVC0(Instruction *i)
+{
+   if (i->def(0).getFile() != FILE_GPR ||
+       i->src(0).getFile() != FILE_GPR ||
+       i->src(1).getFile() != FILE_GPR ||
+       i->src(2).getFile() != FILE_GPR ||
+       i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
+      return;
+
+   int chipset = prog->getTarget()->getChipset();
+   if (i->getPredicate()) {
+      // prior gk110 we can't do that if we have a predicate
+      if (chipset < NVISA_GK20A_CHIPSET)
+         return;
+      // and gk110 can't handle a cc
+      if (chipset < NVISA_GM107_CHIPSET && i->cc)
+         return;
+   }
+
+   // TODO: gm107 can also do this for S32
+   if (i->dType != TYPE_F32)
+      return;
+
+   if ((i->src(2).mod | Modifier(NV50_IR_MOD_NEG)) != 
Modifier(NV50_IR_MOD_NEG))
+      return;
+
+   ImmediateValue val;
+   int s;
+
+   if (i->src(0).getImmediate(val))
+      s = 1;
+   else if (i->src(1).getImmediate(val))
+      s = 0;
+   else
+      return;
+
+   if ((i->src(s).mod | Modifier(NV50_IR_MOD_NEG)) != 
Modifier(NV50_IR_MOD_NEG))
+      return;
+
+   if (s == 1)
+      i->swapSources(0, 1);
+
+   Instruction *imm = i->getSrc(1)->getInsn();
+   i->setSrc(1, imm->getSrc(0));
+   if (imm->isDead(true))
+      delete_Instruction(prog, imm);
+}
+
 bool
 PostRaConstantFolding::visit(Instruction *i)
 {
    switch (i->op) {
    case OP_MAD:
-      handleMAD(i);
+      if (prog->getTarget()->getChipset() < 0xc0)
+         handleMADforNV50(i);
+      else
+         handleMADforNVC0(i);
       break;
    default:
       break;
@@ -3447,8 +3498,7 @@ bool
 Program::optimizePostRA(int level)
 {
    RUN_PASS(2, FlatteningPass, run);
-   if (getTarget()->getChipset() < 0xc0)
-      RUN_PASS(2, PostRaConstantFolding, run);
+   RUN_PASS(2, PostRaConstantFolding, run);
 
    return true;
 }
-- 
2.10.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to