Module: Mesa Branch: main Commit: 34a12a27277c6fb84970aca34abee6a7d3ba54d3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=34a12a27277c6fb84970aca34abee6a7d3ba54d3
Author: Pavel Ondračka <[email protected]> Date: Fri Jul 14 10:05:35 2023 +0200 r300: cycles estimate for shader-db To account for: - macro MAD in vs - NOPs needed before presubtract - texture scheduling and a proper texture semaphore usage The docs don't mention any other references to extra cycles, so otherwise we assume 1 instruction = 1 cycle. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7573 Reviewed-by: Filip Gawin <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24152> --- .../drivers/r300/compiler/radeon_compiler.c | 28 ++++++++++++++++++---- .../drivers/r300/compiler/radeon_compiler.h | 1 + 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.c b/src/gallium/drivers/r300/compiler/radeon_compiler.c index 44a63f3d23e..6f33bbe6275 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler.c +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.c @@ -357,15 +357,24 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) { struct rc_instruction * tmp; memset(s, 0, sizeof(*s)); + unsigned ip = 0; + unsigned last_begintex = 0; for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; - tmp = tmp->Next){ + tmp = tmp->Next, ip++){ const struct rc_opcode_info * info; rc_for_all_reads_mask(tmp, reg_count_callback, s); if (tmp->Type == RC_INSTRUCTION_NORMAL) { info = rc_get_opcode_info(tmp->U.I.Opcode); - if (info->Opcode == RC_OPCODE_BEGIN_TEX) + if (info->Opcode == RC_OPCODE_BEGIN_TEX) { + /* The R5xx docs mention ~30 cycles in section 8.3.1 */ + s->num_cycles += 30; + last_begintex = ip; continue; + } + if (info->Opcode == RC_OPCODE_MAD && + rc_inst_has_three_diff_temp_srcs(tmp)) + s->num_cycles++; } else { if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) s->num_presub_ops++; @@ -385,6 +394,13 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) { s->num_omod_ops++; } + if (tmp->U.P.Nop) + s->num_cycles++; + /* SemWait has effect only on R500, the more instructions we can put + * between the tex block and the first texture semaphore, the better. + */ + if (tmp->U.P.SemWait && c->is_r500) + s->num_cycles -= ip - last_begintex; info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); } if (info->IsFlowControl) { @@ -400,6 +416,7 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) if (info->HasTexture) s->num_tex_insts++; s->num_insts++; + s->num_cycles++; } /* Increment here because the reg_count_callback store the max * temporary reg index in s->nun_temp_regs. */ @@ -416,11 +433,14 @@ static void print_stats(struct radeon_compiler * c) * only the FS has, because shader-db's report.py wants all shaders to * have the same set. */ - util_debug_message(c->debug, SHADER_INFO, "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol, %u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits", + util_debug_message(c->debug, SHADER_INFO, + "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol," + "%u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits, %u cycles", c->type == RC_VERTEX_PROGRAM ? "VS" : "FS", s.num_insts, s.num_rgb_insts, s.num_alpha_insts, s.num_pred_insts, s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops, - s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals); + s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals, + s.num_cycles); } static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h index cbedabb4fff..0e4321fae83 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler.h +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h @@ -147,6 +147,7 @@ struct radeon_compiler_pass { }; struct rc_program_stats { + unsigned num_cycles; unsigned num_consts; unsigned num_insts; unsigned num_fc_insts;
