Module: Mesa Branch: master Commit: 1d021539a24736b4f3e9149dc6b6e4963105cfc1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1d021539a24736b4f3e9149dc6b6e4963105cfc1
Author: Iago Toral Quiroga <[email protected]> Date: Fri Feb 26 12:23:12 2021 +0100 broadcom/compiler: track pipelineable ldvary sequences If we have two (or more) smooth varyings like this: nop t3; ldvary.rf0 fmul t5, t3, t0 fadd t6, t5, r5 nop t7; ldvary.rf0 fmul t9, t7, t0 fadd t10, t9, r5 nop t11; ldvary.rf0 fmul t13, t11, t0 fadd t14, t13, r5 We may be able to pipeline them like this: nop ; nop ; ldvary.r4 nop ; fmul r0, r4, rf0 ; ldvary.r1 fadd rf13, r0, r5 ; fmul r2, r1, rf0 ; ldvary.r3 fadd rf12, r2, r5 ; fmul r4, r3, rf0 ; ldvary.r0 But in order to do this, we will need to manually tweak the QPU scheduling. This patch tracks information about ldvary sequences that are good candidates for pipelining, and a follow-up patch will use this information to pipeline them when we emit the QPU code. v2 (apinheiro): - Rename the v3d_compile fields to avoid confusion with the qinst fields. - Assert that a sequence's start instruction is not the same as the end. Reviewed-by: Alejandro PiƱeiro <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9304> --- src/broadcom/compiler/nir_to_vir.c | 71 +++++++++++++++++++++++++++++++++--- src/broadcom/compiler/v3d_compiler.h | 18 +++++++++ 2 files changed, 83 insertions(+), 6 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index da5aeed7511..9e7d308745b 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -921,6 +921,59 @@ emit_fragcoord_input(struct v3d_compile *c, int attr) c->inputs[attr * 4 + 3] = vir_RECIP(c, c->payload_w); } +static struct qreg +ldvary_sequence_inst(struct v3d_compile *c, struct qreg result) +{ + struct qinst *producer = + (struct qinst *) c->cur_block->instructions.prev; + assert(producer); + producer->ldvary_pipelining = true; + c->ldvary_sequence_end_inst = producer; + return result; +} + +static struct qreg +emit_smooth_varying(struct v3d_compile *c, + struct qinst *ldvary, + struct qreg vary, struct qreg w, struct qreg r5) +{ + if (ldvary) { + c->ldvary_sequence_length++; + ldvary->ldvary_pipelining = true; + if (c->ldvary_sequence_length == 1) { + ldvary->ldvary_pipelining_start = true; + c->ldvary_sequence_start_inst = ldvary; + } + } + return ldvary_sequence_inst(c, vir_FADD(c, + ldvary_sequence_inst(c, vir_FMUL(c, vary, w)), r5)); +} + +static void +break_smooth_varying_sequence(struct v3d_compile *c) +{ + if (!c->ldvary_sequence_start_inst) { + assert(!c->ldvary_sequence_end_inst); + assert(c->ldvary_sequence_length == 0); + return; + } + + assert(c->ldvary_sequence_start_inst); + assert(c->ldvary_sequence_end_inst); + assert(c->ldvary_sequence_start_inst != c->ldvary_sequence_end_inst); + + /* We need at least two smooth ldvary sequences to do some pipelining */ + if (c->ldvary_sequence_length == 1) + c->ldvary_sequence_start_inst->ldvary_pipelining_start = false; + + if (c->ldvary_sequence_length > 1) + c->ldvary_sequence_end_inst->ldvary_pipelining_end = true; + + c->ldvary_sequence_length = 0; + c->ldvary_sequence_start_inst = NULL; + c->ldvary_sequence_end_inst = NULL; +} + static struct qreg emit_fragment_varying(struct v3d_compile *c, nir_variable *var, int8_t input_idx, uint8_t swizzle, int array_index) @@ -928,10 +981,11 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var, struct qreg r3 = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R3); struct qreg r5 = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R5); + struct qinst *ldvary = NULL; struct qreg vary; if (c->devinfo->ver >= 41) { - struct qinst *ldvary = vir_add_inst(V3D_QPU_A_NOP, c->undef, - c->undef, c->undef); + ldvary = vir_add_inst(V3D_QPU_A_NOP, c->undef, + c->undef, c->undef); ldvary->qpu.sig.ldvary = true; vary = vir_emit_def(c, ldvary); } else { @@ -955,7 +1009,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var, */ if (!var) { assert(input_idx < 0); - return vir_FADD(c, vir_FMUL(c, vary, c->payload_w), r5); + return emit_smooth_varying(c, ldvary, vary, c->payload_w, r5); } int i = c->num_inputs++; @@ -969,19 +1023,22 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var, case INTERP_MODE_SMOOTH: if (var->data.centroid) { BITSET_SET(c->centroid_flags, i); - result = vir_FADD(c, vir_FMUL(c, vary, - c->payload_w_centroid), r5); + result = emit_smooth_varying(c, ldvary, vary, + c->payload_w_centroid, r5); } else { - result = vir_FADD(c, vir_FMUL(c, vary, c->payload_w), r5); + result = emit_smooth_varying(c, ldvary, vary, + c->payload_w, r5); } break; case INTERP_MODE_NOPERSPECTIVE: + break_smooth_varying_sequence(c); BITSET_SET(c->noperspective_flags, i); result = vir_FADD(c, vir_MOV(c, vary), r5); break; case INTERP_MODE_FLAT: + break_smooth_varying_sequence(c); BITSET_SET(c->flat_shade_flags, i); vir_MOV_dest(c, c->undef, vary); result = vir_MOV(c, r5); @@ -2019,6 +2076,8 @@ ntq_setup_fs_inputs(struct v3d_compile *c) } } } + + break_smooth_varying_sequence(c); } static void diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 12fbb64841f..4e7c61c3dea 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -162,6 +162,19 @@ struct qinst { * otherwise. */ int uniform; + + /* Set if this instruction participates in a pipelinable sequence of + * smooth varyings. + */ + bool ldvary_pipelining; + /* Set if this is the ldvary instruction starting a pipelinable + * sequence of smooth varyings. + */ + bool ldvary_pipelining_start; + /* Set if this is the fadd instruction ending a pipelinable + * sequence of smooth varyings. + */ + bool ldvary_pipelining_end; }; enum quniform_contents { @@ -769,6 +782,11 @@ struct v3d_compile { uint32_t program_id; uint32_t variant_id; + /* Used to track pipelinable sequences of smooth varyings */ + struct qinst *ldvary_sequence_start_inst; + struct qinst *ldvary_sequence_end_inst; + uint32_t ldvary_sequence_length; + /* Set to compile program in in 1x, 2x, or 4x threaded mode, where * SIG_THREAD_SWITCH is used to hide texturing latency at the cost of * limiting ourselves to the part of the physical reg space. _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
