Module: Mesa Branch: main Commit: 02784625754856504bdc15238dcdf31ad2c12f27 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=02784625754856504bdc15238dcdf31ad2c12f27
Author: Chia-I Wu <[email protected]> Date: Wed Feb 8 12:25:21 2023 -0800 turnip: skip unnecessary CP_REG_TEST for cond load/store When no attachment allows conditional load/store, skip the unnecessary CP_REG_TEST. This is done to avoid a performance trap on a618 (gen1). CP_REG_TEST or any command that reads a register is slow on a618. glmark2 score goes from 830 to 1001. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8162 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21206> --- src/freedreno/vulkan/tu_cmd_buffer.c | 11 ++++++++--- src/freedreno/vulkan/tu_pass.c | 3 +++ src/freedreno/vulkan/tu_pass.h | 3 +++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 0618b0f24e3..6a551abd017 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -736,7 +736,8 @@ static void tu6_emit_cond_for_load_stores(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t pipe, uint32_t slot, bool skip_wfm) { - if (cmd->state.tiling->binning_possible) { + if (cmd->state.tiling->binning_possible && + cmd->state.pass->has_cond_load_store) { tu_cs_emit_pkt7(cs, CP_REG_TEST, 1); tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(pipe)) | A6XX_CP_REG_TEST_0_BIT(slot) | @@ -855,8 +856,10 @@ tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { tu6_emit_blit_scissor(cmd, cs, true); + const bool cond_exec_allowed = cmd->state.tiling->binning && + cmd->state.pass->has_cond_load_store; for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i) - tu_load_gmem_attachment(cmd, cs, i, cmd->state.tiling->binning, false); + tu_load_gmem_attachment(cmd, cs, i, cond_exec_allowed, false); } static void @@ -873,9 +876,11 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) for (uint32_t a = 0; a < pass->attachment_count; ++a) { if (pass->attachments[a].gmem) { + const bool cond_exec_allowed = cmd->state.tiling->binning_possible && + cmd->state.pass->has_cond_load_store; tu_store_gmem_attachment(cmd, cs, a, a, fb->layers, subpass->multiview_mask, - cmd->state.tiling->binning_possible); + cond_exec_allowed); } } diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c index e84604575db..0b06dc31a22 100644 --- a/src/freedreno/vulkan/tu_pass.c +++ b/src/freedreno/vulkan/tu_pass.c @@ -547,6 +547,9 @@ tu_render_pass_cond_config(struct tu_render_pass *pass) (att->load || att->load_stencil) && !att->clear_mask && !att->will_be_resolved; att->cond_store_allowed = (att->store || att->store_stencil) && !att->clear_mask; + + pass->has_cond_load_store |= + att->cond_load_allowed | att->cond_store_allowed; } } diff --git a/src/freedreno/vulkan/tu_pass.h b/src/freedreno/vulkan/tu_pass.h index a43288ef4b1..3c0b0785fec 100644 --- a/src/freedreno/vulkan/tu_pass.h +++ b/src/freedreno/vulkan/tu_pass.h @@ -108,7 +108,10 @@ struct tu_render_pass uint32_t sysmem_bandwidth_per_pixel; struct tu_subpass_attachment *subpass_attachments; + struct tu_render_pass_attachment *attachments; + bool has_cond_load_store; + struct tu_subpass_barrier end_barrier; struct tu_subpass subpasses[0]; };
