The series is

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 12.10.2016 23:19, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

The table was copied from the Vulkan driver. The comment lines are as long
as the table for cosmetic reasons.
---
 src/gallium/drivers/radeonsi/si_shader.h        |  1 -
 src/gallium/drivers/radeonsi/si_state_shaders.c | 53 +++++++++++++++----------
 2 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index f2618ac..b07210c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -432,21 +432,20 @@ struct si_shader {

        struct si_shader_part           *prolog;
        struct si_shader_part           *epilog;

        struct si_shader                *gs_copy_shader;
        struct si_pm4_state             *pm4;
        struct r600_resource            *bo;
        struct r600_resource            *scratch_bo;
        union si_shader_key             key;
        bool                            is_binary_shared;
-       unsigned                        z_order;

        /* The following data is all that's needed for binary shaders. */
        struct radeon_shader_binary     binary;
        struct si_shader_config         config;
        struct si_shader_info           info;

        /* Shader key + LLVM IR + disassembly + statistics.
         * Generated for debug contexts only.
         */
        char                            *shader_log;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index be5c659..d339b84 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -800,34 +800,20 @@ static void si_shader_ps(struct si_shader *shader)

        si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
                       S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
                       S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) |
                       S_00B028_DX10_CLAMP(1) |
                       S_00B028_FLOAT_MODE(shader->config.float_mode));
        si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
                       S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
                       S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) |
                       S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave 
> 0));
-
-       /* DON'T USE EARLY_Z_THEN_RE_Z !!!
-        *
-        * It decreases performance by 15% in DiRT: Showdown on Ultra settings.
-        * And it has pretty complex shaders.
-        *
-        * Shaders with side effects that must execute independently of the
-        * depth test require LATE_Z.
-        */
-       if (info->writes_memory &&
-           !info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
-               shader->z_order = V_02880C_LATE_Z;
-       else
-               shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
 }

 static void si_shader_init_pm4_state(struct si_screen *sscreen,
                                      struct si_shader *shader)
 {
        switch (shader->selector->type) {
        case PIPE_SHADER_VERTEX:
                if (shader->key.vs.as_ls)
                        si_shader_ls(shader);
                else if (shader->key.vs.as_es)
@@ -1364,26 +1350,52 @@ static void *si_create_shader_selector(struct 
pipe_context *ctx,
        case TGSI_FS_DEPTH_LAYOUT_GREATER:
                sel->db_shader_control |=
                        
S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
                break;
        case TGSI_FS_DEPTH_LAYOUT_LESS:
                sel->db_shader_control |=
                        
S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
                break;
        }

-       if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
-               sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1);
+       /* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as 
following:
+        *
+        *   | early Z/S | writes_mem | allow_ReZ? |      Z_ORDER       | 
EXEC_ON_HIER_FAIL | EXEC_ON_NOOP
+        * 
--|-----------|------------|------------|--------------------|-------------------|-------------
+        * 1a|   false   |   false    |   true     | EarlyZ_Then_ReZ    |       
  0         |     0
+        * 1b|   false   |   false    |   false    | EarlyZ_Then_LateZ  |       
  0         |     0
+        * 2 |   false   |   true     |   n/a      |       LateZ        |       
  1         |     0
+        * 3 |   true    |   false    |   n/a      | EarlyZ_Then_LateZ  |       
  0         |     0
+        * 4 |   true    |   true     |   n/a      | EarlyZ_Then_LateZ  |       
  0         |     1
+        *
+        * In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of 
what's set in the register.
+        * In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ 
doesn't make sense.
+        *
+        * Don't use ReZ without profiling !!!
+        *
+        * ReZ decreases performance by 15% in DiRT: Showdown on Ultra 
settings, which has pretty complex
+        * shaders.
+        */
+       if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) {
+               /* Cases 3, 4. */
+               sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) |
+                                         
S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
+                                         
S_02880C_EXEC_ON_NOOP(sel->info.writes_memory);
+       } else if (sel->info.writes_memory) {
+               /* Case 2. */
+               sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) |
+                                         S_02880C_EXEC_ON_HIER_FAIL(1);
+       } else {
+               /* Case 1. */
+               sel->db_shader_control |= 
S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
+       }

-       if (sel->info.writes_memory)
-               sel->db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1) |
-                                         S_02880C_EXEC_ON_NOOP(1);
        pipe_mutex_init(sel->mutex);
        util_queue_fence_init(&sel->ready);

        if ((sctx->b.debug.debug_message && !sctx->b.debug.async) ||
            sctx->is_debug ||
            r600_can_dump_shader(&sscreen->b, sel->info.processor) ||
            !util_queue_is_initialized(&sscreen->shader_compiler_queue))
                si_init_shader_selector_async(sel, -1);
        else
                util_queue_add_job(&sscreen->shader_compiler_queue, sel,
@@ -2206,22 +2218,21 @@ bool si_update_shaders(struct si_context *sctx)
        if (sctx->ps_shader.cso) {
                unsigned db_shader_control;

                r = si_shader_select(ctx, &sctx->ps_shader);
                if (r)
                        return false;
                si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);

                db_shader_control =
                        sctx->ps_shader.cso->db_shader_control |
-                       S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != 
PIPE_FUNC_ALWAYS) |
-                       S_02880C_Z_ORDER(sctx->ps_shader.current->z_order);
+                       S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != 
PIPE_FUNC_ALWAYS);

                if (si_pm4_state_changed(sctx, ps) || 
si_pm4_state_changed(sctx, vs) ||
                    sctx->sprite_coord_enable != rs->sprite_coord_enable ||
                    sctx->flatshade != rs->flatshade) {
                        sctx->sprite_coord_enable = rs->sprite_coord_enable;
                        sctx->flatshade = rs->flatshade;
                        si_mark_atom_dirty(sctx, &sctx->spi_map);
                }

                if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, 
ps))

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to