From: Marek Olšák <marek.ol...@amd.com>

If 32-bit pointers are supported, both pointers can be moved into s[0:1]
and then ESGS has exactly the same user data SGPR declarations as VS.

If 32-bit pointers are not supported, only one pointer can be moved into
s[0:1]. In that case, the 2nd pointer is moved before TCS constants,
so that the location is the same in HS and GS.
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 83 +++++++++++++++++------
 src/gallium/drivers/radeonsi/si_shader.c      | 94 ++++++++++++++++++---------
 src/gallium/drivers/radeonsi/si_shader.h      | 37 ++++-------
 3 files changed, 140 insertions(+), 74 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 0bad3e1..5083027 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2107,20 +2107,38 @@ static void si_emit_consecutive_shader_pointers(struct 
si_context *sctx,
                u_bit_scan_consecutive_range(&mask, &start, &count);
 
                struct si_descriptors *descs = &sctx->descriptors[start];
 
                si_emit_shader_pointer_head(cs, descs, sh_base, count);
                for (int i = 0; i < count; i++)
                        si_emit_shader_pointer_body(sctx->screen, cs, descs + 
i);
        }
 }
 
+static void si_emit_disjoint_shader_pointers(struct si_context *sctx,
+                                            unsigned pointer_mask,
+                                            unsigned sh_base)
+{
+       if (!sh_base)
+               return;
+
+       struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+       unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
+
+       while (mask) {
+               struct si_descriptors *descs = 
&sctx->descriptors[u_bit_scan(&mask)];
+
+               si_emit_shader_pointer_head(cs, descs, sh_base, 1);
+               si_emit_shader_pointer_body(sctx->screen, cs, descs);
+       }
+}
+
 static void si_emit_global_shader_pointers(struct si_context *sctx,
                                           struct si_descriptors *descs)
 {
        if (sctx->b.chip_class == GFX9) {
                /* Broadcast it to all shader stages. */
                si_emit_shader_pointer(sctx, descs,
                                       R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
                return;
        }
 
@@ -2143,28 +2161,35 @@ void si_emit_graphics_shader_pointers(struct si_context 
*sctx,
 {
        uint32_t *sh_base = sctx->shader_pointers.sh_base;
 
        if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
                si_emit_global_shader_pointers(sctx,
                                               
&sctx->descriptors[SI_DESCS_RW_BUFFERS]);
        }
 
        si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
                                            sh_base[PIPE_SHADER_VERTEX]);
-       si_emit_consecutive_shader_pointers(sctx, 
SI_DESCS_SHADER_MASK(TESS_CTRL),
-                                           sh_base[PIPE_SHADER_TESS_CTRL]);
        si_emit_consecutive_shader_pointers(sctx, 
SI_DESCS_SHADER_MASK(TESS_EVAL),
                                            sh_base[PIPE_SHADER_TESS_EVAL]);
-       si_emit_consecutive_shader_pointers(sctx, 
SI_DESCS_SHADER_MASK(GEOMETRY),
-                                           sh_base[PIPE_SHADER_GEOMETRY]);
        si_emit_consecutive_shader_pointers(sctx, 
SI_DESCS_SHADER_MASK(FRAGMENT),
                                            sh_base[PIPE_SHADER_FRAGMENT]);
+       if (HAVE_32BIT_POINTERS || sctx->b.chip_class <= VI) {
+               si_emit_consecutive_shader_pointers(sctx, 
SI_DESCS_SHADER_MASK(TESS_CTRL),
+                                                   
sh_base[PIPE_SHADER_TESS_CTRL]);
+               si_emit_consecutive_shader_pointers(sctx, 
SI_DESCS_SHADER_MASK(GEOMETRY),
+                                                   
sh_base[PIPE_SHADER_GEOMETRY]);
+       } else {
+               si_emit_disjoint_shader_pointers(sctx, 
SI_DESCS_SHADER_MASK(TESS_CTRL),
+                                                
sh_base[PIPE_SHADER_TESS_CTRL]);
+               si_emit_disjoint_shader_pointers(sctx, 
SI_DESCS_SHADER_MASK(GEOMETRY),
+                                                sh_base[PIPE_SHADER_GEOMETRY]);
+       }
 
        sctx->shader_pointers_dirty &=
                ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
 
        if (sctx->vertex_buffer_pointer_dirty) {
                si_emit_shader_pointer(sctx, &sctx->vertex_buffers,
                                       sh_base[PIPE_SHADER_VERTEX]);
                sctx->vertex_buffer_pointer_dirty = false;
        }
 
@@ -2626,54 +2651,70 @@ void si_all_resident_buffers_begin_new_cs(struct 
si_context *sctx)
                                        num_resident_img_handles;
 }
 
 /* INIT/DEINIT/UPLOAD */
 
 void si_init_all_descriptors(struct si_context *sctx)
 {
        int i;
 
 #if !HAVE_32BIT_POINTERS
-       STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0);
-       STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0);
+       STATIC_ASSERT(GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES % 2 == 0);
 #endif
 
        for (i = 0; i < SI_NUM_SHADERS; i++) {
-               bool gfx9_tcs = false;
-               bool gfx9_gs = false;
+               bool is_2nd = sctx->b.chip_class >= GFX9 &&
+                                    (i == PIPE_SHADER_TESS_CTRL ||
+                                     i == PIPE_SHADER_GEOMETRY);
                unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + 
SI_NUM_SAMPLERS;
                unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + 
SI_NUM_CONST_BUFFERS;
+               int rel_dw_offset;
                struct si_descriptors *desc;
 
-               if (sctx->b.chip_class >= GFX9) {
-                       gfx9_tcs = i == PIPE_SHADER_TESS_CTRL;
-                       gfx9_gs = i == PIPE_SHADER_GEOMETRY;
+               if (is_2nd) {
+                       if (i == PIPE_SHADER_TESS_CTRL) {
+                               rel_dw_offset = 
(R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS -
+                                                
R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;
+                       } else { /* PIPE_SHADER_GEOMETRY */
+                               rel_dw_offset = 
(R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS -
+                                                
R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
+                       }
+               } else {
+                       rel_dw_offset = SI_SGPR_CONST_AND_SHADER_BUFFERS;
                }
-
                desc = si_const_and_shader_buffer_descriptors(sctx, i);
                si_init_buffer_resources(&sctx->const_and_shader_buffers[i], 
desc,
-                                        num_buffer_slots,
-                                        gfx9_tcs ? 
GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
-                                        gfx9_gs ? 
GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
-                                                  
SI_SGPR_CONST_AND_SHADER_BUFFERS,
+                                        num_buffer_slots, rel_dw_offset,
                                         RADEON_USAGE_READWRITE,
                                         RADEON_USAGE_READ,
                                         RADEON_PRIO_SHADER_RW_BUFFER,
                                         RADEON_PRIO_CONST_BUFFER);
                desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
 
+               if (is_2nd) {
+#if HAVE_32BIT_POINTERS
+                       if (i == PIPE_SHADER_TESS_CTRL) {
+                               rel_dw_offset = 
(R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS -
+                                                
R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;
+                       } else { /* PIPE_SHADER_GEOMETRY */
+                               rel_dw_offset = 
(R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS -
+                                                
R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
+                       }
+#else
+                       rel_dw_offset = GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES;
+#endif
+               } else {
+                       rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES;
+               }
+
                desc = si_sampler_and_image_descriptors(sctx, i);
-               si_init_descriptors(desc,
-                                   gfx9_tcs ? 
GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES :
-                                   gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES :
-                                             SI_SGPR_SAMPLERS_AND_IMAGES,
-                                   16, num_sampler_slots);
+               si_init_descriptors(desc, rel_dw_offset, 16, num_sampler_slots);
 
                int j;
                for (j = 0; j < SI_NUM_IMAGES; j++)
                        memcpy(desc->list + j * 8, null_image_descriptor, 8 * 
4);
                for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++)
                        memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 
4);
        }
 
        si_init_buffer_resources(&sctx->rw_buffers,
                                 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index cc57ba3..0bf5228 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3342,81 +3342,85 @@ static void si_llvm_emit_tcs_epilogue(struct 
ac_shader_abi *abi,
                ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, 
"");
        }
        ctx->return_value = ret;
 }
 
 /* Pass TCS inputs from LS to TCS on GFX9. */
 static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
 {
        LLVMValueRef ret = ctx->return_value;
 
+       ret = si_insert_input_ptr(ctx, ret, 0, 0);
+       if (HAVE_32BIT_POINTERS)
+               ret = si_insert_input_ptr(ctx, ret, 1, 1);
        ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
        ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
        ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
        ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 
5);
 
        ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers,
                                  8 + SI_SGPR_RW_BUFFERS);
        ret = si_insert_input_ptr(ctx, ret,
                                  ctx->param_bindless_samplers_and_images,
                                  8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
 
        ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
                                  8 + SI_SGPR_VS_STATE_BITS);
+
+#if !HAVE_32BIT_POINTERS
+       ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 1,
+                                 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
+#endif
+
        ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
                                  8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
        ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets,
                                  8 + GFX9_SGPR_TCS_OUT_OFFSETS);
        ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout,
                                  8 + GFX9_SGPR_TCS_OUT_LAYOUT);
        ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k,
                                  8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K);
        ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k,
                                  8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K);
 
-       unsigned desc_param = ctx->param_tcs_factor_addr_base64k +
-                             (HAVE_32BIT_POINTERS ? 1 : 2);
-       ret = si_insert_input_ptr(ctx, ret, desc_param,
-                                 8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS);
-       ret = si_insert_input_ptr(ctx, ret, desc_param + 1,
-                                 8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES);
-
        unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
        ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
                                   ac_to_float(&ctx->ac, ctx->abi.tcs_patch_id),
                                   vgpr++, "");
        ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
                                   ac_to_float(&ctx->ac, ctx->abi.tcs_rel_ids),
                                   vgpr++, "");
        ctx->return_value = ret;
 }
 
 /* Pass GS inputs from ES to GS on GFX9. */
 static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
 {
        LLVMValueRef ret = ctx->return_value;
 
+       ret = si_insert_input_ptr(ctx, ret, 0, 0);
+       if (HAVE_32BIT_POINTERS)
+               ret = si_insert_input_ptr(ctx, ret, 1, 1);
        ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
        ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
        ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 
5);
 
        ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers,
                                  8 + SI_SGPR_RW_BUFFERS);
        ret = si_insert_input_ptr(ctx, ret,
                                  ctx->param_bindless_samplers_and_images,
                                  8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
 
-       unsigned desc_param = ctx->param_vs_state_bits + 1;
-       ret = si_insert_input_ptr(ctx, ret, desc_param,
-                                 8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
-       ret = si_insert_input_ptr(ctx, ret, desc_param + 1,
-                                 8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES);
+#if !HAVE_32BIT_POINTERS
+       ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 1,
+                                 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
+#endif
 
        unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
        for (unsigned i = 0; i < 5; i++) {
                unsigned param = ctx->param_gs_vtx01_offset + i;
                ret = si_insert_input_ret_float(ctx, ret, param, vgpr++);
        }
        ctx->return_value = ret;
 }
 
 static void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi,
@@ -4480,44 +4484,58 @@ static unsigned si_get_max_workgroup_size(const struct 
si_shader *shader)
 
        if (!max_work_group_size) {
                /* This is a variable group size compute shader,
                 * compile it for the maximum possible group size.
                 */
                max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
        }
        return max_work_group_size;
 }
 
-static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
-                                           struct si_function_info *fninfo,
-                                           bool assign_params)
+static void declare_const_and_shader_buffers(struct si_shader_context *ctx,
+                                            struct si_function_info *fninfo,
+                                            bool assign_params)
 {
        LLVMTypeRef const_shader_buf_type;
 
        if (ctx->shader->selector->info.const_buffers_declared == 1 &&
            ctx->shader->selector->info.shader_buffers_declared == 0)
                const_shader_buf_type = ctx->f32;
        else
                const_shader_buf_type = ctx->v4i32;
 
        unsigned const_and_shader_buffers =
                add_arg(fninfo, ARG_SGPR,
                        ac_array_in_const32_addr_space(const_shader_buf_type));
 
+       if (assign_params)
+               ctx->param_const_and_shader_buffers = const_and_shader_buffers;
+}
+
+static void declare_samplers_and_images(struct si_shader_context *ctx,
+                                       struct si_function_info *fninfo,
+                                       bool assign_params)
+{
        unsigned samplers_and_images =
                add_arg(fninfo, ARG_SGPR,
                        ac_array_in_const32_addr_space(ctx->v8i32));
 
-       if (assign_params) {
-               ctx->param_const_and_shader_buffers = const_and_shader_buffers;
+       if (assign_params)
                ctx->param_samplers_and_images = samplers_and_images;
-       }
+}
+
+static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
+                                           struct si_function_info *fninfo,
+                                           bool assign_params)
+{
+       declare_const_and_shader_buffers(ctx, fninfo, assign_params);
+       declare_samplers_and_images(ctx, fninfo, assign_params);
 }
 
 static void declare_global_desc_pointers(struct si_shader_context *ctx,
                                         struct si_function_info *fninfo)
 {
        ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR,
                ac_array_in_const32_addr_space(ctx->v4i32));
        ctx->param_bindless_samplers_and_images = add_arg(fninfo, ARG_SGPR,
                ac_array_in_const32_addr_space(ctx->v8i32));
 }
@@ -4668,44 +4686,49 @@ static void create_function(struct si_shader_context 
*ctx)
                 * placed after the user SGPRs.
                 */
                for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
                        returns[num_returns++] = ctx->i32; /* SGPRs */
                for (i = 0; i < 11; i++)
                        returns[num_returns++] = ctx->f32; /* VGPRs */
                break;
 
        case SI_SHADER_MERGED_VERTEX_TESSCTRL:
                /* Merged stages have 8 system SGPRs at the beginning. */
-               add_arg(&fninfo, ARG_SGPR, ctx->i32); /* 
SPI_SHADER_USER_DATA_ADDR_LO_HS */
-               add_arg(&fninfo, ARG_SGPR, ctx->i32); /* 
SPI_SHADER_USER_DATA_ADDR_HI_HS */
+               /* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */
+               if (HAVE_32BIT_POINTERS) {
+                       declare_per_stage_desc_pointers(ctx, &fninfo,
+                                                       ctx->type == 
PIPE_SHADER_TESS_CTRL);
+               } else {
+                       declare_const_and_shader_buffers(ctx, &fninfo,
+                                                        ctx->type == 
PIPE_SHADER_TESS_CTRL);
+               }
                ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
                ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
                ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
                ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
                add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
 
                declare_global_desc_pointers(ctx, &fninfo);
                declare_per_stage_desc_pointers(ctx, &fninfo,
                                                ctx->type == 
PIPE_SHADER_VERTEX);
                declare_vs_specific_input_sgprs(ctx, &fninfo);
 
+               if (!HAVE_32BIT_POINTERS) {
+                       declare_samplers_and_images(ctx, &fninfo,
+                                                   ctx->type == 
PIPE_SHADER_TESS_CTRL);
+               }
                ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
                ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
                ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
                ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, 
ARG_SGPR, ctx->i32);
                ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
-               if (!HAVE_32BIT_POINTERS)
-                       add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
-
-               declare_per_stage_desc_pointers(ctx, &fninfo,
-                                               ctx->type == 
PIPE_SHADER_TESS_CTRL);
 
                /* VGPRs (first TCS, then VS) */
                add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, 
&ctx->abi.tcs_patch_id);
                add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, 
&ctx->abi.tcs_rel_ids);
 
                if (ctx->type == PIPE_SHADER_VERTEX) {
                        declare_vs_input_vgprs(ctx, &fninfo,
                                               &num_prolog_vgprs);
 
                        /* LS return values are inputs to the TCS main shader 
part. */
@@ -4722,22 +4745,28 @@ static void create_function(struct si_shader_context 
*ctx)
                         */
                        for (i = 0; i <= 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K; 
i++)
                                returns[num_returns++] = ctx->i32; /* SGPRs */
                        for (i = 0; i < 11; i++)
                                returns[num_returns++] = ctx->f32; /* VGPRs */
                }
                break;
 
        case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
                /* Merged stages have 8 system SGPRs at the beginning. */
-               add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused 
(SPI_SHADER_USER_DATA_ADDR_LO_GS) */
-               add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused 
(SPI_SHADER_USER_DATA_ADDR_HI_GS) */
+               /* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
+               if (HAVE_32BIT_POINTERS) {
+                       declare_per_stage_desc_pointers(ctx, &fninfo,
+                                                       ctx->type == 
PIPE_SHADER_GEOMETRY);
+               } else {
+                       declare_const_and_shader_buffers(ctx, &fninfo,
+                                                        ctx->type == 
PIPE_SHADER_GEOMETRY);
+               }
                ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
                ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
                ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
                ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused 
(SPI_SHADER_PGM_LO/HI_GS << 8) */
                add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused 
(SPI_SHADER_PGM_LO/HI_GS >> 24) */
 
                declare_global_desc_pointers(ctx, &fninfo);
                declare_per_stage_desc_pointers(ctx, &fninfo,
                                                (ctx->type == 
PIPE_SHADER_VERTEX ||
@@ -4749,22 +4778,24 @@ static void create_function(struct si_shader_context 
*ctx)
                         * Declare as many input SGPRs as the VS has. */
                        ctx->param_tcs_offchip_layout = add_arg(&fninfo, 
ARG_SGPR, ctx->i32);
                        ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, 
ARG_SGPR, ctx->i32);
                        add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
                        add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
                        if (!HAVE_32BIT_POINTERS)
                                add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused 
*/
                        ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, 
ctx->i32); /* unused */
                }
 
-               declare_per_stage_desc_pointers(ctx, &fninfo,
-                                               ctx->type == 
PIPE_SHADER_GEOMETRY);
+               if (!HAVE_32BIT_POINTERS) {
+                       declare_samplers_and_images(ctx, &fninfo,
+                                                   ctx->type == 
PIPE_SHADER_GEOMETRY);
+               }
 
                /* VGPRs (first GS, then VS/TES) */
                ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, 
ctx->i32);
                ctx->param_gs_vtx23_offset = add_arg(&fninfo, ARG_VGPR, 
ctx->i32);
                add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, 
&ctx->abi.gs_prim_id);
                add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, 
&ctx->abi.gs_invocation_id);
                ctx->param_gs_vtx45_offset = add_arg(&fninfo, ARG_VGPR, 
ctx->i32);
 
                if (ctx->type == PIPE_SHADER_VERTEX) {
                        declare_vs_input_vgprs(ctx, &fninfo,
@@ -7257,36 +7288,39 @@ static bool si_shader_select_vs_parts(struct si_screen 
*sscreen,
 static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
                                         union si_shader_part_key *key)
 {
        struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
        struct si_function_info fninfo;
        LLVMValueRef func;
 
        si_init_function_info(&fninfo);
 
        if (ctx->screen->info.chip_class >= GFX9) {
-               add_arg(&fninfo, ARG_SGPR, ctx->i64);
+               add_arg(&fninfo, ARG_SGPR, ctx->i32);
+               add_arg(&fninfo, ARG_SGPR, ctx->i32);
                ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32); /* wave info */
                ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
                add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
                add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
                add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
                add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
                add_arg(&fninfo, ARG_SGPR, ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32);
+               if (!HAVE_32BIT_POINTERS)
+                       add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
                ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32);
                ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, 
ARG_SGPR, ctx->i32);
                ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, 
ctx->i32);
        } else {
                add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
                add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
                add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
                add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index ef4472b..e0d6f70 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -188,52 +188,43 @@ enum {
 
        /* GFX6-8: TCS only */
        GFX6_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
        GFX6_SGPR_TCS_OUT_OFFSETS,
        GFX6_SGPR_TCS_OUT_LAYOUT,
        GFX6_SGPR_TCS_IN_LAYOUT,
        GFX6_SGPR_TCS_OFFCHIP_ADDR_BASE64K,
        GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K,
        GFX6_TCS_NUM_USER_SGPR,
 
+       /* GFX9: Merged shaders. */
+#if HAVE_32BIT_POINTERS
+       /* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO (SGPR0). */
+       /* 2ND_SAMPLERS_AND_IMAGES is set in USER_DATA_ADDR_HI (SGPR1). */
+       GFX9_MERGED_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR,
+#else
+       /* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO/HI 
(SGPR[0:1]). */
+       GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES = SI_VS_NUM_USER_SGPR,
+       GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES_HI,
+       GFX9_MERGED_NUM_USER_SGPR,
+#endif
+
        /* GFX9: Merged LS-HS (VS-TCS) only. */
-       GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR,
+       GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR,
        GFX9_SGPR_TCS_OUT_OFFSETS,
        GFX9_SGPR_TCS_OUT_LAYOUT,
        GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K,
        GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K,
-#if !HAVE_32BIT_POINTERS
-       GFX9_SGPR_unused_to_align_the_next_pointer,
-#endif
-       GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS,
-#if !HAVE_32BIT_POINTERS
-       GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS_HI,
-#endif
-       GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES,
-#if !HAVE_32BIT_POINTERS
-       GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES_HI,
-#endif
        GFX9_TCS_NUM_USER_SGPR,
 
-       /* GFX9: Merged ES-GS (VS-GS or TES-GS). */
-       GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS = SI_VS_NUM_USER_SGPR,
-#if !HAVE_32BIT_POINTERS
-       GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS_HI,
-#endif
-       GFX9_SGPR_GS_SAMPLERS_AND_IMAGES,
-#if !HAVE_32BIT_POINTERS
-       GFX9_SGPR_GS_SAMPLERS_AND_IMAGES_HI,
-#endif
-       GFX9_GS_NUM_USER_SGPR,
-
        /* GS limits */
        GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
+       GFX9_GS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
        SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS + (HAVE_32BIT_POINTERS ? 1 
: 2),
 
        /* PS only */
        SI_SGPR_ALPHA_REF       = SI_NUM_RESOURCE_SGPRS,
        SI_PS_NUM_USER_SGPR,
 };
 
 /* LLVM function parameter indices */
 enum {
        SI_NUM_RESOURCE_PARAMS = 4,
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to