Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>
Reviewed-by: Marek Olšák <marek.ol...@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c        | 28 ++++++++++++++++++++-----
 src/gallium/drivers/radeonsi/si_shader.h        |  3 ++-
 src/gallium/drivers/radeonsi/si_state_shaders.c |  9 ++++++++
 3 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 3df7820..1f162b5 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -91,6 +91,12 @@ struct si_shader_context
        int param_tes_rel_patch_id;
        int param_tes_patch_id;
        int param_es2gs_offset;
+       int param_oc_lds;
+
+       /* Sets a bit if the dynamic HS control word was 0x80000000. The bit is
+        * 0x800000 for VS, 0x1 for ES.
+        */
+       int param_tess_offchip;
 
        LLVMTargetMachineRef tm;
 
@@ -2326,14 +2332,14 @@ static void si_llvm_emit_tcs_epilogue(struct 
lp_build_tgsi_context *bld_base)
                tf_soffset = LLVMGetParam(ctx->radeon_bld.main_fn,
                                          SI_PARAM_TESS_FACTOR_OFFSET);
                ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
-                                          SI_TCS_NUM_USER_SGPR, "");
+                                          SI_TCS_NUM_USER_SGPR + 1, "");
 
                /* VGPRs */
                rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
                invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, 
invocation_id);
                tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, 
tf_lds_offset);
 
-               vgpr = SI_TCS_NUM_USER_SGPR + 1;
+               vgpr = SI_TCS_NUM_USER_SGPR + 2;
                ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, 
"");
                ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, 
"");
                ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, 
"");
@@ -4945,7 +4951,11 @@ static void declare_streamout_params(struct 
si_shader_context *ctx,
 
        /* Streamout SGPRs. */
        if (so->num_outputs) {
-               params[ctx->param_streamout_config = (*num_params)++] = i32;
+               if (ctx->type != PIPE_SHADER_TESS_EVAL)
+                       params[ctx->param_streamout_config = (*num_params)++] = 
i32;
+               else
+                       ctx->param_streamout_config = ctx->param_tess_offchip;
+
                params[ctx->param_streamout_write_index = (*num_params)++] = 
i32;
        }
        /* A streamout buffer offset is loaded if the stride is non-zero. */
@@ -5065,6 +5075,7 @@ static void create_function(struct si_shader_context *ctx)
                params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32;
                params[SI_PARAM_TCS_OUT_LAYOUT] = ctx->i32;
                params[SI_PARAM_TCS_IN_LAYOUT] = ctx->i32;
+               params[ctx->param_oc_lds = SI_PARAM_TCS_OC_LDS] = ctx->i32;
                params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx->i32;
                last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
 
@@ -5074,8 +5085,10 @@ static void create_function(struct si_shader_context 
*ctx)
                num_params = SI_PARAM_REL_IDS+1;
 
                if (!ctx->is_monolithic) {
-                       /* PARAM_TESS_FACTOR_OFFSET is after user SGPRs. */
-                       for (i = 0; i <= SI_TCS_NUM_USER_SGPR; i++)
+                       /* SI_PARAM_TCS_OC_LDS and PARAM_TESS_FACTOR_OFFSET are
+                        * placed after the user SGPRs.
+                        */
+                       for (i = 0; i < SI_TCS_NUM_USER_SGPR + 2; i++)
                                returns[num_returns++] = ctx->i32; /* SGPRs */
 
                        for (i = 0; i < 3; i++)
@@ -5089,10 +5102,14 @@ static void create_function(struct si_shader_context 
*ctx)
                num_params = SI_PARAM_TCS_OUT_LAYOUT+1;
 
                if (shader->key.tes.as_es) {
+                       params[ctx->param_oc_lds = num_params++] = ctx->i32;
+                       params[ctx->param_tess_offchip = num_params++] = 
ctx->i32;
                        params[ctx->param_es2gs_offset = num_params++] = 
ctx->i32;
                } else {
+                       params[ctx->param_tess_offchip = num_params++] = 
ctx->i32;
                        declare_streamout_params(ctx, &shader->selector->so,
                                                 params, ctx->i32, &num_params);
+                       params[ctx->param_oc_lds = num_params++] = ctx->i32;
                }
                last_sgpr = num_params - 1;
 
@@ -6640,6 +6657,7 @@ static bool si_compile_tcs_epilog(struct si_screen 
*sscreen,
        params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32;
        params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32;
        params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32;
+       params[ctx.param_oc_lds = SI_PARAM_TCS_OC_LDS] = ctx.i32;
        params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx.i32;
        last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
        num_params = last_sgpr + 1;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 0ab816c..61ddcd1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -166,7 +166,8 @@ enum {
        SI_PARAM_LS_OUT_LAYOUT,  /* same value as TCS_IN_LAYOUT, LS only */
 
        /* TCS only parameters. */
-       SI_PARAM_TESS_FACTOR_OFFSET = SI_PARAM_TCS_IN_LAYOUT + 1,
+       SI_PARAM_TCS_OC_LDS = SI_PARAM_TCS_IN_LAYOUT + 1,
+       SI_PARAM_TESS_FACTOR_OFFSET,
        SI_PARAM_PATCH_ID,
        SI_PARAM_REL_IDS,
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index d8ae2b2..968fc88 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -355,6 +355,7 @@ static void si_shader_hs(struct si_shader *shader)
                       S_00B428_FLOAT_MODE(shader->config.float_mode));
        si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
                       S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
+                      S_00B42C_OC_LDS_EN(1) |
                       
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
 }
 
@@ -364,6 +365,7 @@ static void si_shader_es(struct si_shader *shader)
        unsigned num_user_sgprs;
        unsigned vgpr_comp_cnt;
        uint64_t va;
+       unsigned oc_lds_en;
 
        pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 
@@ -382,6 +384,8 @@ static void si_shader_es(struct si_shader *shader)
        } else
                unreachable("invalid shader selector type");
 
+       oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0;
+
        si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
                       shader->selector->esgs_itemsize / 4);
        si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
@@ -394,6 +398,7 @@ static void si_shader_es(struct si_shader *shader)
                       S_00B328_FLOAT_MODE(shader->config.float_mode));
        si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
                       S_00B32C_USER_SGPR(num_user_sgprs) |
+                      S_00B32C_OC_LDS_EN(oc_lds_en) |
                       
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
 
        if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
@@ -490,6 +495,7 @@ static void si_shader_vs(struct si_shader *shader, struct 
si_shader *gs)
        unsigned num_user_sgprs;
        unsigned nparams, vgpr_comp_cnt;
        uint64_t va;
+       unsigned oc_lds_en;
        unsigned window_space =
           
shader->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
        bool enable_prim_id = si_vs_exports_prim_id(shader);
@@ -547,6 +553,8 @@ static void si_shader_vs(struct si_shader *shader, struct 
si_shader *gs)
                                                   V_02870C_SPI_SHADER_4COMP :
                                                   V_02870C_SPI_SHADER_NONE));
 
+       oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0;
+
        si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
        si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
        si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
@@ -557,6 +565,7 @@ static void si_shader_vs(struct si_shader *shader, struct 
si_shader *gs)
                       S_00B128_FLOAT_MODE(shader->config.float_mode));
        si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
                       S_00B12C_USER_SGPR(num_user_sgprs) |
+                      S_00B12C_OC_LDS_EN(oc_lds_en) |
                       S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
                       S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
                       S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
-- 
2.8.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to