The series is Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>
On 12.10.2016 23:19, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com> The table was copied from the Vulkan driver. The comment lines are as long as the table for cosmetic reasons. --- src/gallium/drivers/radeonsi/si_shader.h | 1 - src/gallium/drivers/radeonsi/si_state_shaders.c | 53 +++++++++++++++---------- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index f2618ac..b07210c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -432,21 +432,20 @@ struct si_shader { struct si_shader_part *prolog; struct si_shader_part *epilog; struct si_shader *gs_copy_shader; struct si_pm4_state *pm4; struct r600_resource *bo; struct r600_resource *scratch_bo; union si_shader_key key; bool is_binary_shared; - unsigned z_order; /* The following data is all that's needed for binary shaders. */ struct radeon_shader_binary binary; struct si_shader_config config; struct si_shader_info info; /* Shader key + LLVM IR + disassembly + statistics. * Generated for debug contexts only. */ char *shader_log; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index be5c659..d339b84 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -800,34 +800,20 @@ static void si_shader_ps(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS, S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B028_DX10_CLAMP(1) | S_00B028_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) | S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) | S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); - - /* DON'T USE EARLY_Z_THEN_RE_Z !!! - * - * It decreases performance by 15% in DiRT: Showdown on Ultra settings. - * And it has pretty complex shaders. - * - * Shaders with side effects that must execute independently of the - * depth test require LATE_Z. - */ - if (info->writes_memory && - !info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) - shader->z_order = V_02880C_LATE_Z; - else - shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z; } static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader *shader) { switch (shader->selector->type) { case PIPE_SHADER_VERTEX: if (shader->key.vs.as_ls) si_shader_ls(shader); else if (shader->key.vs.as_es) @@ -1364,26 +1350,52 @@ static void *si_create_shader_selector(struct pipe_context *ctx, case TGSI_FS_DEPTH_LAYOUT_GREATER: sel->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z); break; case TGSI_FS_DEPTH_LAYOUT_LESS: sel->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z); break; } - if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) - sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1); + /* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as following: + * + * | early Z/S | writes_mem | allow_ReZ? | Z_ORDER | EXEC_ON_HIER_FAIL | EXEC_ON_NOOP + * --|-----------|------------|------------|--------------------|-------------------|------------- + * 1a| false | false | true | EarlyZ_Then_ReZ | 0 | 0 + * 1b| false | false | false | EarlyZ_Then_LateZ | 0 | 0 + * 2 | false | true | n/a | LateZ | 1 | 0 + * 3 | true | false | n/a | EarlyZ_Then_LateZ | 0 | 0 + * 4 | true | true | n/a | EarlyZ_Then_LateZ | 0 | 1 + * + * In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of what's set in the register. + * In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ doesn't make sense. + * + * Don't use ReZ without profiling !!! + * + * ReZ decreases performance by 15% in DiRT: Showdown on Ultra settings, which has pretty complex + * shaders. + */ + if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) { + /* Cases 3, 4. */ + sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) | + S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) | + S_02880C_EXEC_ON_NOOP(sel->info.writes_memory); + } else if (sel->info.writes_memory) { + /* Case 2. */ + sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) | + S_02880C_EXEC_ON_HIER_FAIL(1); + } else { + /* Case 1. */ + sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + } - if (sel->info.writes_memory) - sel->db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1) | - S_02880C_EXEC_ON_NOOP(1); pipe_mutex_init(sel->mutex); util_queue_fence_init(&sel->ready); if ((sctx->b.debug.debug_message && !sctx->b.debug.async) || sctx->is_debug || r600_can_dump_shader(&sscreen->b, sel->info.processor) || !util_queue_is_initialized(&sscreen->shader_compiler_queue)) si_init_shader_selector_async(sel, -1); else util_queue_add_job(&sscreen->shader_compiler_queue, sel, @@ -2206,22 +2218,21 @@ bool si_update_shaders(struct si_context *sctx) if (sctx->ps_shader.cso) { unsigned db_shader_control; r = si_shader_select(ctx, &sctx->ps_shader); if (r) return false; si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4); db_shader_control = sctx->ps_shader.cso->db_shader_control | - S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS) | - S_02880C_Z_ORDER(sctx->ps_shader.current->z_order); + S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS); if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || sctx->sprite_coord_enable != rs->sprite_coord_enable || sctx->flatshade != rs->flatshade) { sctx->sprite_coord_enable = rs->sprite_coord_enable; sctx->flatshade = rs->flatshade; si_mark_atom_dirty(sctx, &sctx->spi_map); } if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps))
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev