On Tue, Jan 13, 2015 at 3:35 PM, Roy Spliet <[email protected]> wrote:
> Add a specific optimisation pass for NV50 to check whether SRC0 or SRC1 is
> a MOV dst, IMM. If so: fold the IMM in and try to drop the MOV. Must be
> done post-RA because it requires that SDST == SSRC2.
>
> V2: improve readability and add comments to clarify decisions
>
> Signed-off-by: Roy Spliet <[email protected]>
> ---
> .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 60
> ++++++++++++++++++++++
> 1 file changed, 60 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 21d20ca..723c255 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -2259,6 +2259,63 @@ FlatteningPass::tryPredicateConditional(BasicBlock *bb)
>
> //
> =============================================================================
>
> +// Fold Immediate into MAD; must be done after register allocation due to
> +// constraint SDST == SSRC2
> +// TODO:
> +// Does NVC0+ have other situations where this pass makes sense?
> +class NV50PostRaConstantFolding : public Pass
> +{
> +private:
> + virtual bool visit(BasicBlock *);
> +};
> +
> +bool
> +NV50PostRaConstantFolding::visit(BasicBlock *bb)
> +{
> + Value *vtmp;
> + Instruction *def;
> +
> + for (Instruction *i = bb->getFirst(); i; i = i->next) {
> + switch (i->op) {
> + case OP_MAD:
> + if(i->def(0).getFile() != FILE_GPR ||
if (
> + i->src(0).getFile() != FILE_GPR ||
> + i->src(1).getFile() != FILE_GPR ||
> + i->src(2).getFile() != FILE_GPR ||
> + i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
|| neither src0 nor src1's def is an immediate load. There's a
getImmediate() that may do the trick.
Once you know that, you can pre-emptively do the swap (there's a
->swapSources() ) and avoid the loop.
> + break;
> +
> + for (int s = 0; s < 2; s++) {
> + def = i->getSrc(1)->getInsn();
> + if (def->op == OP_MOV && def->src(0).getFile() ==
> FILE_IMMEDIATE) {
> + vtmp = i->getSrc(1);
> + i->setSrc(1, def->getSrc(0));
> +
> + /* There's no post-RA dead code elimination, so do it here
> + * XXX: if we add more code-removing post-RA passes, we might
> + * want to create a post-RA dead-code elim pass */
> + if (vtmp->refCount() == 0)
> + delete_Instruction(bb->getProgram(), def);
> +
> + break;
> + }
> +
> + /* Swap inputs, IMM must be SRC1 */
> + vtmp = i->getSrc(0);
> + i->setSrc(0, i->getSrc(1));
> + i->setSrc(1, vtmp);
> + }
> + break;
> + default:
> + break;
> + }
> + }
> +
> + return true;
> +}
> +
> +//
> =============================================================================
> +
> // Common subexpression elimination. Stupid O^2 implementation.
> class LocalCSE : public Pass
> {
> @@ -2629,6 +2686,9 @@ bool
> Program::optimizePostRA(int level)
> {
> RUN_PASS(2, FlatteningPass, run);
> + if (getTarget()->getChipset() < 0xc0)
> + RUN_PASS(2, NV50PostRaConstantFolding, run);
> +
> return true;
> }
>
> --
> 2.1.0
>
>
>
> _______________________________________________
> Nouveau mailing list
> [email protected]
> http://lists.freedesktop.org/mailman/listinfo/nouveau
_______________________________________________
Nouveau mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/nouveau