Module: Mesa
Branch: main
Commit: 5632d8d1a777d39c7882dcb011aab4619bcff01a
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5632d8d1a777d39c7882dcb011aab4619bcff01a

Author: Marek Olšák <[email protected]>
Date:   Wed Jun  7 23:48:13 2023 -0400

radeonsi: replace tcs_out_lds_layout with nearly identical tes_offchip_addr

tcs_out_lds_layout is basically renamed to tes_offchip_addr in TCS, using
the same variable as TES and also using the same bit layout. The only
difference in the bit layout was that TCS had to mask out the low bits,
which this also removes.

The enums are renamed to *_SGPR_TCS_OFFCHIP_ADDR so as not to conflict
with *_SGPR_TES_OFFCHIP_ADDR, which are in different user data SGPRs.

Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23517>

---

 src/gallium/drivers/radeonsi/si_nir_lower_abi.c    | 10 +------
 src/gallium/drivers/radeonsi/si_pipe.h             |  1 -
 src/gallium/drivers/radeonsi/si_shader.c           |  6 ++---
 src/gallium/drivers/radeonsi/si_shader.h           |  4 +--
 src/gallium/drivers/radeonsi/si_shader_internal.h  |  6 +----
 src/gallium/drivers/radeonsi/si_shader_llvm_tess.c | 31 +++++++++-------------
 src/gallium/drivers/radeonsi/si_state_draw.cpp     |  5 ++--
 7 files changed, 21 insertions(+), 42 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c 
b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c
index 3beb5746971..73d654a3df8 100644
--- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c
+++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c
@@ -162,15 +162,7 @@ fetch_framebuffer(nir_builder *b, struct si_shader_args 
*args,
 static nir_ssa_def *build_tess_ring_desc(nir_builder *b, struct si_screen 
*screen,
                                          struct si_shader_args *args)
 {
-   nir_ssa_def *addr;
-   if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) {
-      addr = ac_nir_load_arg(b, &args->ac, args->tcs_out_lds_layout);
-      /* TCS only receives high 13 bits of the address. */
-      addr = nir_iand_imm(b, addr, 0xfff80000);
-   } else {
-      assert(b->shader->info.stage == MESA_SHADER_TESS_EVAL);
-      addr = ac_nir_load_arg(b, &args->ac, args->tes_offchip_addr);
-   }
+   nir_ssa_def *addr = ac_nir_load_arg(b, &args->ac, args->tes_offchip_addr);
 
    uint32_t rsrc3 =
       S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 34902f93a24..20860ec4c90 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1183,7 +1183,6 @@ struct si_context {
    unsigned last_tes_sh_base;
    bool last_tess_uses_primid;
    unsigned num_patches_per_workgroup;
-   unsigned tcs_out_layout;
    unsigned tcs_out_offsets;
    unsigned tcs_offchip_layout;
    unsigned tes_offchip_ring_va_sgpr;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 38d29da9185..2b7b00de1c0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -413,7 +413,7 @@ void si_init_shader_args(struct si_shader *shader, struct 
si_shader_args *args)
       declare_per_stage_desc_pointers(args, shader, true);
       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&args->tcs_offchip_layout);
       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&args->tcs_out_lds_offsets);
-      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&args->tcs_out_lds_layout);
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&args->tes_offchip_addr);
       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits);
       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&args->ac.tess_offchip_offset);
       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&args->ac.tcs_factor_offset);
@@ -464,7 +464,7 @@ void si_init_shader_args(struct si_shader *shader, struct 
si_shader_args *args)
       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&args->ac.start_instance);
       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&args->tcs_offchip_layout);
       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&args->tcs_out_lds_offsets);
-      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&args->tcs_out_lds_layout);
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&args->tes_offchip_addr);
       if (stage == MESA_SHADER_VERTEX)
          declare_vb_descriptor_input_sgprs(args, shader);
 
@@ -505,7 +505,7 @@ void si_init_shader_args(struct si_shader *shader, struct 
si_shader_args *args)
              * param_tcs_offchip_layout, and internal_bindings
              * should be passed to the epilog.
              */
-            for (i = 0; i <= 8 + GFX9_SGPR_TCS_OUT_LAYOUT; i++)
+            for (i = 0; i <= 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR; i++)
                ac_add_return(&args->ac, AC_ARG_SGPR);
             for (i = 0; i < 11; i++)
                ac_add_return(&args->ac, AC_ARG_VGPR);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 4c5c1438ee2..31a659301dd 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -170,14 +170,14 @@ enum
    /* GFX6-8: TCS only */
    GFX6_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
    GFX6_SGPR_TCS_OUT_OFFSETS,
-   GFX6_SGPR_TCS_OUT_LAYOUT,
+   GFX6_SGPR_TCS_OFFCHIP_ADDR,
    GFX6_SGPR_TCS_IN_LAYOUT,
    GFX6_TCS_NUM_USER_SGPR,
 
    /* GFX9: Merged LS-HS (VS-TCS) only. */
    GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR,
    GFX9_SGPR_TCS_OUT_OFFSETS,
-   GFX9_SGPR_TCS_OUT_LAYOUT,
+   GFX9_SGPR_TCS_OFFCHIP_ADDR,
    GFX9_TCS_NUM_USER_SGPR,
 
    /* GS limits */
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 4ed2256a2e2..8e8166da653 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -60,12 +60,8 @@ struct si_shader_args {
     *           = 1M, clamped to 32K(LDS limit) / 4 = 8K
     */
    struct ac_arg tcs_out_lds_offsets;
-   /* Layout of TCS outputs / TES inputs:
-    *   [19:31] = high 13 bits of the 32-bit address of tessellation ring 
buffers
-    */
-   struct ac_arg tcs_out_lds_layout;
 
-   /* API TES */
+   /* API TCS & TES */
    struct ac_arg tes_offchip_addr;
    /* PS */
    struct ac_arg pos_fixed_pt;
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c 
b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
index 5be96474202..626dabd3a9a 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
@@ -169,23 +169,16 @@ static LLVMValueRef lshs_lds_load(struct 
si_shader_context *ctx, LLVMTypeRef typ
 
 enum si_tess_ring
 {
-   TCS_FACTOR_RING,
-   TESS_OFFCHIP_RING_TCS,
-   TESS_OFFCHIP_RING_TES,
+   TESS_FACTOR_RING,
+   TESS_OFFCHIP_RING,
 };
 
 static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, 
enum si_tess_ring ring)
 {
    LLVMBuilderRef builder = ctx->ac.builder;
-   LLVMValueRef addr = ac_get_arg(
-      &ctx->ac, ring == TESS_OFFCHIP_RING_TES ? ctx->args->tes_offchip_addr : 
ctx->args->tcs_out_lds_layout);
+   LLVMValueRef addr = ac_get_arg(&ctx->ac, ctx->args->tes_offchip_addr);
 
-   /* TCS only receives high 13 bits of the address. */
-   if (ring == TESS_OFFCHIP_RING_TCS || ring == TCS_FACTOR_RING) {
-      addr = LLVMBuildAnd(builder, addr, LLVMConstInt(ctx->ac.i32, 0xfff80000, 
0), "");
-   }
-
-   if (ring == TCS_FACTOR_RING) {
+   if (ring == TESS_FACTOR_RING) {
       unsigned tf_offset = ctx->screen->hs.tess_offchip_ring_size;
       addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 
0), "");
    }
@@ -335,7 +328,7 @@ static void si_write_tess_factors(struct si_shader_context 
*ctx, union si_shader
       vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
 
    /* Get the buffer. */
-   buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING);
+   buffer = get_tess_ring_descriptor(ctx, TESS_FACTOR_RING);
 
    /* Get the offset. */
    tf_base = ac_get_arg(&ctx->ac, ctx->args->ac.tcs_factor_offset);
@@ -372,7 +365,7 @@ static void si_write_tess_factors(struct si_shader_context 
*ctx, union si_shader
       LLVMValueRef tf_inner_offset;
       unsigned param_outer, param_inner;
 
-      buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
+      buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING);
       base = ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset);
 
       param_outer = 
si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER);
@@ -433,14 +426,14 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx)
    if (ctx->screen->info.gfx_level >= GFX9) {
       ret =
          si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, 8 + 
GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
-      ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_out_lds_layout, 8 + 
GFX9_SGPR_TCS_OUT_LAYOUT);
+      ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, 8 + 
GFX9_SGPR_TCS_OFFCHIP_ADDR);
       /* Tess offchip and tess factor offsets are at the beginning. */
       ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tess_offchip_offset, 
2);
       ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_factor_offset, 4);
-      vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1;
+      vgpr = 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR + 1;
    } else {
       ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, 
GFX6_SGPR_TCS_OFFCHIP_LAYOUT);
-      ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_out_lds_layout, 
GFX6_SGPR_TCS_OUT_LAYOUT);
+      ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, 
GFX6_SGPR_TCS_OFFCHIP_ADDR);
       /* Tess offchip and tess factor offsets are after user SGPRs. */
       ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tess_offchip_offset, 
GFX6_TCS_NUM_USER_SGPR);
       ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_factor_offset, 
GFX6_TCS_NUM_USER_SGPR + 1);
@@ -513,7 +506,7 @@ static void si_set_ls_return_value_for_tcs(struct 
si_shader_context *ctx)
 
    ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, 8 + 
GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
    ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_out_lds_offsets, 8 + 
GFX9_SGPR_TCS_OUT_OFFSETS);
-   ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_out_lds_layout, 8 + 
GFX9_SGPR_TCS_OUT_LAYOUT);
+   ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, 8 + 
GFX9_SGPR_TCS_OFFCHIP_ADDR);
 
    unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
    ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
@@ -581,7 +574,7 @@ void si_llvm_build_tcs_epilog(struct si_shader_context 
*ctx, union si_shader_par
       ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
       ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&ctx->args->tcs_offchip_layout);
       ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
-      ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&ctx->args->tcs_out_lds_layout);
+      ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&ctx->args->tes_offchip_addr);
    } else {
       ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
       ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
@@ -589,7 +582,7 @@ void si_llvm_build_tcs_epilog(struct si_shader_context 
*ctx, union si_shader_par
       ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
       ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&ctx->args->tcs_offchip_layout);
       ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
-      ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&ctx->args->tcs_out_lds_layout);
+      ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&ctx->args->tes_offchip_addr);
       ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
       ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&ctx->args->ac.tess_offchip_offset);
       ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, 
&ctx->args->ac.tcs_factor_offset);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp 
b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index e2150847743..5f388510331 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -797,7 +797,6 @@ void si_update_tess_io_layout_state(struct si_context *sctx)
    assert((ring_va & u_bit_consecutive(0, 19)) == 0);
 
    sctx->tes_offchip_ring_va_sgpr = ring_va;
-   sctx->tcs_out_layout = ring_va;
    sctx->tcs_out_offsets = ((perpatch_output_offset / 4) << 16);
    sctx->tcs_offchip_layout =
       (num_patches - 1) | ((num_tcs_output_cp - 1) << 6) | ((num_tcs_input_cp 
- 1) << 11) |
@@ -863,7 +862,7 @@ static void si_emit_tess_io_layout_state(struct si_context 
*sctx)
          R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 
4, 3);
       radeon_emit(sctx->tcs_offchip_layout);
       radeon_emit(sctx->tcs_out_offsets);
-      radeon_emit(sctx->tcs_out_layout);
+      radeon_emit(sctx->tes_offchip_ring_va_sgpr);
    } else {
       /* Due to a hw bug, RSRC2_LS must be written twice with another
        * LS register written in between. */
@@ -878,7 +877,7 @@ static void si_emit_tess_io_layout_state(struct si_context 
*sctx)
          R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX6_SGPR_TCS_OFFCHIP_LAYOUT * 
4, 4);
       radeon_emit(sctx->tcs_offchip_layout);
       radeon_emit(sctx->tcs_out_offsets);
-      radeon_emit(sctx->tcs_out_layout);
+      radeon_emit(sctx->tes_offchip_ring_va_sgpr);
       radeon_emit(sctx->current_vs_state);
    }
 

Reply via email to