Module: Mesa
Branch: master
Commit: 9909fe6bac53dc32c6599820387545f5019f8a85
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9909fe6bac53dc32c6599820387545f5019f8a85

Author: Arcady Goldmints-Orlov <[email protected]>
Date:   Mon Feb  8 17:03:20 2021 -0500

broadcom/compiler: Skip bool_to_cond where possible

This change keeps track of when a boolean temp is loaded into the flags
by a comparison instruction and uses that information to skip emitting
instructions to set the flags in ntq_emit_bool_to_cond when the flags
already have the right contents.

total instructions in shared programs: 11116502 -> 11112225 (-0.04%)
instructions in affected programs: 631691 -> 627414 (-0.68%)
helped: 1591
HURT: 754
helped stats (abs) min: 1 max: 94 x̄: 4.14 x̃: 3
helped stats (rel) min: 0.11% max: 13.46% x̄: 2.10% x̃: 1.58%
HURT stats (abs)   min: 1 max: 19 x̄: 3.07 x̃: 2
HURT stats (rel)   min: 0.13% max: 19.67% x̄: 1.88% x̃: 1.15%
95% mean confidence interval for instructions value: -2.02 -1.63
95% mean confidence interval for instructions %-change: -0.94% -0.71%
Instructions are helped.

total uniforms in shared programs: 3281555 -> 3281513 (<.01%)
uniforms in affected programs: 1754 -> 1712 (-2.39%)
helped: 10
HURT: 5
helped stats (abs) min: 1 max: 19 x̄: 7.90 x̃: 5
helped stats (rel) min: 0.56% max: 11.11% x̄: 7.37% x̃: 11.05%
HURT stats (abs)   min: 1 max: 15 x̄: 7.40 x̃: 3
HURT stats (rel)   min: 0.64% max: 9.55% x̄: 5.31% x̃: 3.41%
95% mean confidence interval for uniforms value: -8.57 2.97
95% mean confidence interval for uniforms %-change: -7.35% 1.07%
Inconclusive result (value mean confidence interval includes 0).

total max-temps in shared programs: 1758419 -> 1758174 (-0.01%)
max-temps in affected programs: 7006 -> 6761 (-3.50%)
helped: 290
HURT: 14
helped stats (abs) min: 1 max: 8 x̄: 1.13 x̃: 1
helped stats (rel) min: 0.79% max: 22.86% x̄: 6.61% x̃: 4.88%
HURT stats (abs)   min: 1 max: 13 x̄: 6.00 x̃: 3
HURT stats (rel)   min: 1.54% max: 54.17% x̄: 23.99% x̃: 9.12%
95% mean confidence interval for max-temps value: -1.03 -0.58
95% mean confidence interval for max-temps %-change: -6.24% -4.16%
Max-temps are helped.

total sfu-stalls in shared programs: 23676 -> 23610 (-0.28%)
sfu-stalls in affected programs: 1578 -> 1512 (-4.18%)
helped: 257
HURT: 252
helped stats (abs) min: 1 max: 3 x̄: 1.37 x̃: 1
helped stats (rel) min: 11.11% max: 100.00% x̄: 46.70% x̃: 40.00%
HURT stats (abs)   min: 1 max: 2 x̄: 1.14 x̃: 1
HURT stats (rel)   min: 0.00% max: 200.00% x̄: 41.65% x̃: 25.00%
95% mean confidence interval for sfu-stalls value: -0.25 -0.01
95% mean confidence interval for sfu-stalls %-change: -8.24% 2.33%
Inconclusive result (%-change mean confidence interval includes 0).

total inst-and-stalls in shared programs: 11140178 -> 11135835 (-0.04%)
inst-and-stalls in affected programs: 633972 -> 629629 (-0.69%)
helped: 1581
HURT: 755
helped stats (abs) min: 1 max: 94 x̄: 4.26 x̃: 3
helped stats (rel) min: 0.11% max: 13.46% x̄: 2.12% x̃: 1.59%
HURT stats (abs)   min: 1 max: 17 x̄: 3.17 x̃: 2
HURT stats (rel)   min: 0.05% max: 19.67% x̄: 1.93% x̃: 1.20%
95% mean confidence interval for inst-and-stalls value: -2.06 -1.66
95% mean confidence interval for inst-and-stalls %-change: -0.93% -0.70%
Inst-and-stalls are helped.

Reviewed-by: Iago Toral Quioroga <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8933>

---

 src/broadcom/compiler/nir_to_vir.c   | 22 ++++++++++++++++++----
 src/broadcom/compiler/v3d_compiler.h |  7 +++++++
 src/broadcom/compiler/vir.c          |  3 +++
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/src/broadcom/compiler/nir_to_vir.c 
b/src/broadcom/compiler/nir_to_vir.c
index 804f8f73b34..7611bd00be5 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1137,6 +1137,11 @@ ntq_get_alu_parent(nir_src src)
 static enum v3d_qpu_cond
 ntq_emit_bool_to_cond(struct v3d_compile *c, nir_src src)
 {
+        struct qreg qsrc = ntq_get_src(c, src, 0);
+        /* skip if we already have src in the flags */
+        if (qsrc.file == QFILE_TEMP && c->flags_temp == qsrc.index)
+                return c->flags_cond;
+
         nir_alu_instr *compare = ntq_get_alu_parent(src);
         if (!compare)
                 goto out;
@@ -1146,6 +1151,7 @@ ntq_emit_bool_to_cond(struct v3d_compile *c, nir_src src)
                 return cond;
 
 out:
+
         vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), ntq_get_src(c, src, 0)),
                    V3D_QPU_PF_PUSHZ);
         return V3D_QPU_COND_IFNA;
@@ -1294,6 +1300,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
                 result = vir_MOV(c, vir_SEL(c, cond,
                                             vir_uniform_f(c, 1.0),
                                             vir_uniform_f(c, 0.0)));
+                c->flags_temp = result.index;
+                c->flags_cond = cond;
                 break;
         }
 
@@ -1315,6 +1323,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
                 result = vir_MOV(c, vir_SEL(c, cond,
                                             vir_uniform_ui(c, ~0),
                                             vir_uniform_ui(c, 0)));
+                c->flags_temp = result.index;
+                c->flags_cond = cond;
                 break;
         }
 
@@ -1397,6 +1407,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
                 result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
                                             vir_uniform_ui(c, ~0),
                                             vir_uniform_ui(c, 0)));
+                c->flags_temp = result.index;
+                c->flags_cond = V3D_QPU_COND_IFA;
                 break;
 
         case nir_op_pack_half_2x16_split:
@@ -2672,10 +2684,12 @@ ntq_emit_intrinsic(struct v3d_compile *c, 
nir_intrinsic_instr *instr)
 
         case nir_intrinsic_load_helper_invocation:
                 vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), 
V3D_QPU_PF_PUSHZ);
-                ntq_store_dest(c, &instr->dest, 0,
-                               vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
-                                                  vir_uniform_ui(c, ~0),
-                                                  vir_uniform_ui(c, 0))));
+                struct qreg qdest = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
+                                                       vir_uniform_ui(c, ~0),
+                                                       vir_uniform_ui(c, 0)));
+                c->flags_temp = qdest.index;
+                c->flags_cond = V3D_QPU_COND_IFA;
+                ntq_store_dest(c, &instr->dest, 0, qdest);
                 break;
 
         case nir_intrinsic_load_front_face:
diff --git a/src/broadcom/compiler/v3d_compiler.h 
b/src/broadcom/compiler/v3d_compiler.h
index a9ed28b1a7a..548940c7f92 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -735,6 +735,13 @@ struct v3d_compile {
         struct qblock *cur_block;
         struct qblock *loop_cont_block;
         struct qblock *loop_break_block;
+        /**
+         * Which temp, if any, do we currently have in the flags?
+         * This is set when processing a comparison instruction, and
+         * reset to -1 by anything else that touches the flags.
+         */
+        int32_t flags_temp;
+        enum v3d_qpu_cond flags_cond;
 
         uint64_t *qpu_insts;
         uint32_t qpu_inst_count;
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 1d9aa1f51f4..e6cf729f929 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -234,6 +234,7 @@ vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
 void
 vir_set_pf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_pf pf)
 {
+        c->flags_temp = -1;
         if (vir_is_add(inst)) {
                 inst->qpu.flags.apf = pf;
         } else {
@@ -245,6 +246,7 @@ vir_set_pf(struct v3d_compile *c, struct qinst *inst, enum 
v3d_qpu_pf pf)
 void
 vir_set_uf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_uf uf)
 {
+        c->flags_temp = -1;
         if (vir_is_add(inst)) {
                 inst->qpu.flags.auf = uf;
         } else {
@@ -542,6 +544,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
                                             _mesa_key_pointer_equal);
 
         c->tmu.outstanding_regs = _mesa_pointer_set_create(c);
+        c->flags_temp = -1;
 
         return c;
 }

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to