Module: Mesa
Branch: master
Commit: 1d021539a24736b4f3e9149dc6b6e4963105cfc1
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1d021539a24736b4f3e9149dc6b6e4963105cfc1

Author: Iago Toral Quiroga <[email protected]>
Date:   Fri Feb 26 12:23:12 2021 +0100

broadcom/compiler: track pipelineable ldvary sequences

If we have two (or more) smooth varyings like this:

nop t3; ldvary.rf0
fmul t5, t3, t0
fadd t6, t5, r5
nop t7; ldvary.rf0
fmul t9, t7, t0
fadd t10, t9, r5
nop t11; ldvary.rf0
fmul t13, t11, t0
fadd t14, t13, r5

We may be able to pipeline them like this:

nop                  ; nop               ; ldvary.r4
nop                  ; fmul  r0, r4, rf0 ; ldvary.r1
fadd  rf13, r0, r5   ; fmul  r2, r1, rf0 ; ldvary.r3
fadd  rf12, r2, r5   ; fmul  r4, r3, rf0 ; ldvary.r0

But in order to do this, we will need to manually tweak the
QPU scheduling.

This patch tracks information about ldvary sequences that are
good candidates for pipelining, and a follow-up patch will
use this information to pipeline them when we emit the QPU
code.

v2 (apinheiro):
  - Rename the v3d_compile fields to avoid confusion with the qinst fields.
  - Assert that a sequence's start instruction is not the same as the end.

Reviewed-by: Alejandro PiƱeiro <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9304>

---

 src/broadcom/compiler/nir_to_vir.c   | 71 +++++++++++++++++++++++++++++++++---
 src/broadcom/compiler/v3d_compiler.h | 18 +++++++++
 2 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/src/broadcom/compiler/nir_to_vir.c 
b/src/broadcom/compiler/nir_to_vir.c
index da5aeed7511..9e7d308745b 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -921,6 +921,59 @@ emit_fragcoord_input(struct v3d_compile *c, int attr)
         c->inputs[attr * 4 + 3] = vir_RECIP(c, c->payload_w);
 }
 
+static struct qreg
+ldvary_sequence_inst(struct v3d_compile *c, struct qreg result)
+{
+        struct qinst *producer =
+                   (struct qinst *) c->cur_block->instructions.prev;
+        assert(producer);
+        producer->ldvary_pipelining = true;
+        c->ldvary_sequence_end_inst = producer;
+        return result;
+}
+
+static struct qreg
+emit_smooth_varying(struct v3d_compile *c,
+                    struct qinst *ldvary,
+                    struct qreg vary, struct qreg w, struct qreg r5)
+{
+        if (ldvary) {
+                c->ldvary_sequence_length++;
+                ldvary->ldvary_pipelining = true;
+                if (c->ldvary_sequence_length == 1) {
+                        ldvary->ldvary_pipelining_start = true;
+                        c->ldvary_sequence_start_inst = ldvary;
+                }
+        }
+        return ldvary_sequence_inst(c, vir_FADD(c,
+               ldvary_sequence_inst(c, vir_FMUL(c, vary, w)), r5));
+}
+
+static void
+break_smooth_varying_sequence(struct v3d_compile *c)
+{
+        if (!c->ldvary_sequence_start_inst) {
+                assert(!c->ldvary_sequence_end_inst);
+                assert(c->ldvary_sequence_length == 0);
+                return;
+        }
+
+        assert(c->ldvary_sequence_start_inst);
+        assert(c->ldvary_sequence_end_inst);
+        assert(c->ldvary_sequence_start_inst != c->ldvary_sequence_end_inst);
+
+        /* We need at least two smooth ldvary sequences to do some pipelining 
*/
+        if (c->ldvary_sequence_length == 1)
+                c->ldvary_sequence_start_inst->ldvary_pipelining_start = false;
+
+        if (c->ldvary_sequence_length > 1)
+                c->ldvary_sequence_end_inst->ldvary_pipelining_end = true;
+
+        c->ldvary_sequence_length = 0;
+        c->ldvary_sequence_start_inst = NULL;
+        c->ldvary_sequence_end_inst = NULL;
+}
+
 static struct qreg
 emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
                       int8_t input_idx, uint8_t swizzle, int array_index)
@@ -928,10 +981,11 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable 
*var,
         struct qreg r3 = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R3);
         struct qreg r5 = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R5);
 
+        struct qinst *ldvary = NULL;
         struct qreg vary;
         if (c->devinfo->ver >= 41) {
-                struct qinst *ldvary = vir_add_inst(V3D_QPU_A_NOP, c->undef,
-                                                    c->undef, c->undef);
+                ldvary = vir_add_inst(V3D_QPU_A_NOP, c->undef,
+                                      c->undef, c->undef);
                 ldvary->qpu.sig.ldvary = true;
                 vary = vir_emit_def(c, ldvary);
         } else {
@@ -955,7 +1009,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable 
*var,
          */
         if (!var) {
                 assert(input_idx < 0);
-                return vir_FADD(c, vir_FMUL(c, vary, c->payload_w), r5);
+                return emit_smooth_varying(c, ldvary, vary, c->payload_w, r5);
         }
 
         int i = c->num_inputs++;
@@ -969,19 +1023,22 @@ emit_fragment_varying(struct v3d_compile *c, 
nir_variable *var,
         case INTERP_MODE_SMOOTH:
                 if (var->data.centroid) {
                         BITSET_SET(c->centroid_flags, i);
-                        result = vir_FADD(c, vir_FMUL(c, vary,
-                                                      c->payload_w_centroid), 
r5);
+                        result = emit_smooth_varying(c, ldvary, vary,
+                                                     c->payload_w_centroid, 
r5);
                 } else {
-                        result = vir_FADD(c, vir_FMUL(c, vary, c->payload_w), 
r5);
+                        result = emit_smooth_varying(c, ldvary, vary,
+                                                     c->payload_w, r5);
                 }
                 break;
 
         case INTERP_MODE_NOPERSPECTIVE:
+                break_smooth_varying_sequence(c);
                 BITSET_SET(c->noperspective_flags, i);
                 result = vir_FADD(c, vir_MOV(c, vary), r5);
                 break;
 
         case INTERP_MODE_FLAT:
+                break_smooth_varying_sequence(c);
                 BITSET_SET(c->flat_shade_flags, i);
                 vir_MOV_dest(c, c->undef, vary);
                 result = vir_MOV(c, r5);
@@ -2019,6 +2076,8 @@ ntq_setup_fs_inputs(struct v3d_compile *c)
                         }
                 }
         }
+
+        break_smooth_varying_sequence(c);
 }
 
 static void
diff --git a/src/broadcom/compiler/v3d_compiler.h 
b/src/broadcom/compiler/v3d_compiler.h
index 12fbb64841f..4e7c61c3dea 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -162,6 +162,19 @@ struct qinst {
          * otherwise.
          */
         int uniform;
+
+        /* Set if this instruction participates in a pipelinable sequence of
+         * smooth varyings.
+         */
+        bool ldvary_pipelining;
+        /* Set if this is the ldvary instruction starting a pipelinable
+         * sequence of smooth varyings.
+         */
+        bool ldvary_pipelining_start;
+        /* Set if this is the fadd instruction ending a pipelinable
+         * sequence of smooth varyings.
+         */
+        bool ldvary_pipelining_end;
 };
 
 enum quniform_contents {
@@ -769,6 +782,11 @@ struct v3d_compile {
         uint32_t program_id;
         uint32_t variant_id;
 
+        /* Used to track pipelinable sequences of smooth varyings */
+        struct qinst *ldvary_sequence_start_inst;
+        struct qinst *ldvary_sequence_end_inst;
+        uint32_t ldvary_sequence_length;
+
         /* Set to compile program in in 1x, 2x, or 4x threaded mode, where
          * SIG_THREAD_SWITCH is used to hide texturing latency at the cost of
          * limiting ourselves to the part of the physical reg space.

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to