From: Marek Olšák <[email protected]>

---
 src/gallium/drivers/radeonsi/si_pipe.h          |  3 ++-
 src/gallium/drivers/radeonsi/si_state_draw.c    | 31 ++++++++++++++++-----
 src/gallium/drivers/radeonsi/si_state_shaders.c | 36 ++++++++++++++++++++-----
 3 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 0978831..918aa0f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -355,21 +355,22 @@ struct si_context {
 
        /* Scratch buffer */
        struct r600_atom        scratch_state;
        struct r600_resource    *scratch_buffer;
        unsigned                scratch_waves;
        unsigned                spi_tmpring_size;
 
        struct r600_resource    *compute_scratch_buffer;
 
        /* Emitted derived tessellation state. */
-       struct si_shader        *last_ls; /* local shader (VS) */
+       /* Local shader (VS), or HS if LS-HS are merged. */
+       struct si_shader        *last_ls;
        struct si_shader_selector *last_tcs;
        int                     last_num_tcs_input_cp;
        int                     last_tes_sh_base;
        unsigned                last_num_patches;
 
        /* Debug state. */
        bool                    is_debug;
        struct radeon_saved_cs  last_gfx;
        struct r600_resource    *last_trace_buf;
        struct r600_resource    *trace_buf;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index c24d607..4feadbe 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -89,51 +89,65 @@ static unsigned si_conv_prim_to_gs_out(unsigned mode)
  * LS.LDS_SIZE is shared by all 3 shader stages.
  *
  * The information about LDS and other non-compile-time parameters is then
  * written to userdata SGPRs.
  */
 static void si_emit_derived_tess_state(struct si_context *sctx,
                                       const struct pipe_draw_info *info,
                                       unsigned *num_patches)
 {
        struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
-       struct si_shader_ctx_state *ls = &sctx->vs_shader;
+       struct si_shader *ls_current;
+       struct si_shader_selector *ls;
        /* The TES pointer will only be used for sctx->last_tcs.
         * It would be wrong to think that TCS = TES. */
        struct si_shader_selector *tcs =
                sctx->tcs_shader.cso ? sctx->tcs_shader.cso : 
sctx->tes_shader.cso;
        unsigned tes_sh_base = 
sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
        unsigned num_tcs_input_cp = info->vertices_per_patch;
        unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
        unsigned num_tcs_patch_outputs;
        unsigned input_vertex_size, output_vertex_size, 
pervertex_output_patch_size;
        unsigned input_patch_size, output_patch_size, output_patch0_offset;
        unsigned perpatch_output_offset, lds_size;
        unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets;
        unsigned offchip_layout, hardware_lds_size, ls_hs_config;
 
-       if (sctx->last_ls == ls->current &&
+       /* Since GFX9 has merged LS-HS in the TCS state, set LS = TCS. */
+       if (sctx->b.chip_class >= GFX9) {
+               if (sctx->tcs_shader.cso)
+                       ls_current = sctx->tcs_shader.current;
+               else
+                       ls_current = sctx->fixed_func_tcs_shader.current;
+
+               ls = ls_current->key.part.tcs.ls;
+       } else {
+               ls_current = sctx->vs_shader.current;
+               ls = sctx->vs_shader.cso;
+       }
+
+       if (sctx->last_ls == ls_current &&
            sctx->last_tcs == tcs &&
            sctx->last_tes_sh_base == tes_sh_base &&
            sctx->last_num_tcs_input_cp == num_tcs_input_cp) {
                *num_patches = sctx->last_num_patches;
                return;
        }
 
-       sctx->last_ls = ls->current;
+       sctx->last_ls = ls_current;
        sctx->last_tcs = tcs;
        sctx->last_tes_sh_base = tes_sh_base;
        sctx->last_num_tcs_input_cp = num_tcs_input_cp;
 
        /* This calculates how shader inputs and outputs among VS, TCS, and TES
         * are laid out in LDS. */
-       num_tcs_inputs = util_last_bit64(ls->cso->outputs_written);
+       num_tcs_inputs = util_last_bit64(ls->outputs_written);
 
        if (sctx->tcs_shader.cso) {
                num_tcs_outputs = util_last_bit64(tcs->outputs_written);
                num_tcs_output_cp = 
tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
                num_tcs_patch_outputs = 
util_last_bit64(tcs->patch_outputs_written);
        } else {
                /* No TCS. Route varyings from LS to TES. */
                num_tcs_outputs = num_tcs_inputs;
                num_tcs_output_cp = num_tcs_input_cp;
                num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
@@ -210,33 +224,36 @@ static void si_emit_derived_tess_state(struct si_context 
*sctx,
                assert(lds_size <= 32768);
                lds_size = align(lds_size, 256) / 256;
        }
 
        /* Set SI_SGPR_VS_STATE_BITS. */
        sctx->current_vs_state &= C_VS_STATE_LS_OUT_PATCH_SIZE &
                                  C_VS_STATE_LS_OUT_VERTEX_SIZE;
        sctx->current_vs_state |= tcs_in_layout;
 
        if (sctx->b.chip_class >= GFX9) {
-               // TODO
+               unsigned hs_rsrc2 = ls_current->config.rsrc2 |
+                                   S_00B42C_LDS_SIZE(lds_size);
+
+               radeon_set_sh_reg(cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, 
hs_rsrc2);
        } else {
-               unsigned ls_rsrc2 = ls->current->config.rsrc2;
+               unsigned ls_rsrc2 = ls_current->config.rsrc2;
 
                si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
                ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
 
                /* Due to a hw bug, RSRC2_LS must be written twice with another
                 * LS register written in between. */
                if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
                        radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, 
ls_rsrc2);
                radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
-               radeon_emit(cs, ls->current->config.rsrc1);
+               radeon_emit(cs, ls_current->config.rsrc1);
                radeon_emit(cs, ls_rsrc2);
 
                /* Set userdata SGPRs for TCS. */
                radeon_set_sh_reg_seq(cs,
                        R_00B430_SPI_SHADER_USER_DATA_HS_0 + 
SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
                radeon_emit(cs, offchip_layout);
                radeon_emit(cs, tcs_out_offsets);
                radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
                radeon_emit(cs, tcs_in_layout);
        }
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4ac6182..943f7b9 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -466,39 +466,61 @@ static void si_shader_ls(struct si_screen *sscreen, 
struct si_shader *shader)
                           S_00B528_DX10_CLAMP(1) |
                           S_00B528_FLOAT_MODE(shader->config.float_mode);
        shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_VS_NUM_USER_SGPR) |
                           
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 }
 
 static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
 {
        struct si_pm4_state *pm4;
        uint64_t va;
+       unsigned ls_vgpr_comp_cnt = 0;
 
        pm4 = si_get_shader_pm4_state(shader);
        if (!pm4)
                return;
 
        va = shader->bo->gpu_address;
        si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, 
RADEON_PRIO_SHADER_BINARY);
 
-       si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
-       si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
+       if (sscreen->b.chip_class >= GFX9) {
+               si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
+               si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, va >> 40);
+
+               /* We need at least 2 components for LS.
+                * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
+               ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
+
+               shader->config.rsrc2 =
+                       S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
+                       
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
+       } else {
+               si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
+               si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
+
+               shader->config.rsrc2 =
+                       S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
+                       S_00B42C_OC_LDS_EN(1) |
+                       
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
+       }
+
        si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
                       S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
                       S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) |
                       S_00B428_DX10_CLAMP(1) |
-                      S_00B428_FLOAT_MODE(shader->config.float_mode));
-       si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
-                      S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
-                      S_00B42C_OC_LDS_EN(sscreen->b.chip_class <= VI) |
-                      
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
+                      S_00B428_FLOAT_MODE(shader->config.float_mode) |
+                      S_00B428_LS_VGPR_COMP_CNT(ls_vgpr_comp_cnt));
+
+       if (sscreen->b.chip_class <= VI) {
+               si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
+                              shader->config.rsrc2);
+       }
 }
 
 static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 {
        struct si_pm4_state *pm4;
        unsigned num_user_sgprs;
        unsigned vgpr_comp_cnt;
        uint64_t va;
        unsigned oc_lds_en;
 
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to