From: Marek Olšák <marek.ol...@amd.com> If 32-bit pointers are supported, both pointers can be moved into s[0:1] and then ESGS has exactly the same user data SGPR declarations as VS.
If 32-bit pointers are not supported, only one pointer can be moved into s[0:1]. In that case, the 2nd pointer is moved before TCS constants, so that the location is the same in HS and GS. --- src/gallium/drivers/radeonsi/si_descriptors.c | 83 +++++++++++++++++------ src/gallium/drivers/radeonsi/si_shader.c | 94 ++++++++++++++++++--------- src/gallium/drivers/radeonsi/si_shader.h | 37 ++++------- 3 files changed, 140 insertions(+), 74 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 0bad3e1..5083027 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -2107,20 +2107,38 @@ static void si_emit_consecutive_shader_pointers(struct si_context *sctx, u_bit_scan_consecutive_range(&mask, &start, &count); struct si_descriptors *descs = &sctx->descriptors[start]; si_emit_shader_pointer_head(cs, descs, sh_base, count); for (int i = 0; i < count; i++) si_emit_shader_pointer_body(sctx->screen, cs, descs + i); } } +static void si_emit_disjoint_shader_pointers(struct si_context *sctx, + unsigned pointer_mask, + unsigned sh_base) +{ + if (!sh_base) + return; + + struct radeon_winsys_cs *cs = sctx->b.gfx.cs; + unsigned mask = sctx->shader_pointers_dirty & pointer_mask; + + while (mask) { + struct si_descriptors *descs = &sctx->descriptors[u_bit_scan(&mask)]; + + si_emit_shader_pointer_head(cs, descs, sh_base, 1); + si_emit_shader_pointer_body(sctx->screen, cs, descs); + } +} + static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs) { if (sctx->b.chip_class == GFX9) { /* Broadcast it to all shader stages. */ si_emit_shader_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_COMMON_0); return; } @@ -2143,28 +2161,35 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx, { uint32_t *sh_base = sctx->shader_pointers.sh_base; if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) { si_emit_global_shader_pointers(sctx, &sctx->descriptors[SI_DESCS_RW_BUFFERS]); } si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX), sh_base[PIPE_SHADER_VERTEX]); - si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL), - sh_base[PIPE_SHADER_TESS_CTRL]); si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL), sh_base[PIPE_SHADER_TESS_EVAL]); - si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY), - sh_base[PIPE_SHADER_GEOMETRY]); si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT), sh_base[PIPE_SHADER_FRAGMENT]); + if (HAVE_32BIT_POINTERS || sctx->b.chip_class <= VI) { + si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL), + sh_base[PIPE_SHADER_TESS_CTRL]); + si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY), + sh_base[PIPE_SHADER_GEOMETRY]); + } else { + si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL), + sh_base[PIPE_SHADER_TESS_CTRL]); + si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY), + sh_base[PIPE_SHADER_GEOMETRY]); + } sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE); if (sctx->vertex_buffer_pointer_dirty) { si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX]); sctx->vertex_buffer_pointer_dirty = false; } @@ -2626,54 +2651,70 @@ void si_all_resident_buffers_begin_new_cs(struct si_context *sctx) num_resident_img_handles; } /* INIT/DEINIT/UPLOAD */ void si_init_all_descriptors(struct si_context *sctx) { int i; #if !HAVE_32BIT_POINTERS - STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0); - STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0); + STATIC_ASSERT(GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES % 2 == 0); #endif for (i = 0; i < SI_NUM_SHADERS; i++) { - bool gfx9_tcs = false; - bool gfx9_gs = false; + bool is_2nd = sctx->b.chip_class >= GFX9 && + (i == PIPE_SHADER_TESS_CTRL || + i == PIPE_SHADER_GEOMETRY); unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS; unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS; + int rel_dw_offset; struct si_descriptors *desc; - if (sctx->b.chip_class >= GFX9) { - gfx9_tcs = i == PIPE_SHADER_TESS_CTRL; - gfx9_gs = i == PIPE_SHADER_GEOMETRY; + if (is_2nd) { + if (i == PIPE_SHADER_TESS_CTRL) { + rel_dw_offset = (R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS - + R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4; + } else { /* PIPE_SHADER_GEOMETRY */ + rel_dw_offset = (R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS - + R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4; + } + } else { + rel_dw_offset = SI_SGPR_CONST_AND_SHADER_BUFFERS; } - desc = si_const_and_shader_buffer_descriptors(sctx, i); si_init_buffer_resources(&sctx->const_and_shader_buffers[i], desc, - num_buffer_slots, - gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS : - gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS : - SI_SGPR_CONST_AND_SHADER_BUFFERS, + num_buffer_slots, rel_dw_offset, RADEON_USAGE_READWRITE, RADEON_USAGE_READ, RADEON_PRIO_SHADER_RW_BUFFER, RADEON_PRIO_CONST_BUFFER); desc->slot_index_to_bind_directly = si_get_constbuf_slot(0); + if (is_2nd) { +#if HAVE_32BIT_POINTERS + if (i == PIPE_SHADER_TESS_CTRL) { + rel_dw_offset = (R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS - + R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4; + } else { /* PIPE_SHADER_GEOMETRY */ + rel_dw_offset = (R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS - + R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4; + } +#else + rel_dw_offset = GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES; +#endif + } else { + rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES; + } + desc = si_sampler_and_image_descriptors(sctx, i); - si_init_descriptors(desc, - gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES : - gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES : - SI_SGPR_SAMPLERS_AND_IMAGES, - 16, num_sampler_slots); + si_init_descriptors(desc, rel_dw_offset, 16, num_sampler_slots); int j; for (j = 0; j < SI_NUM_IMAGES; j++) memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4); for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++) memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4); } si_init_buffer_resources(&sctx->rw_buffers, &sctx->descriptors[SI_DESCS_RW_BUFFERS], diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index cc57ba3..0bf5228 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3342,81 +3342,85 @@ static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi, ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, ""); } ctx->return_value = ret; } /* Pass TCS inputs from LS to TCS on GFX9. */ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) { LLVMValueRef ret = ctx->return_value; + ret = si_insert_input_ptr(ctx, ret, 0, 0); + if (HAVE_32BIT_POINTERS) + ret = si_insert_input_ptr(ctx, ret, 1, 1); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5); ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers, 8 + SI_SGPR_RW_BUFFERS); ret = si_insert_input_ptr(ctx, ret, ctx->param_bindless_samplers_and_images, 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS); + +#if !HAVE_32BIT_POINTERS + ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 1, + 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES); +#endif + ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets, 8 + GFX9_SGPR_TCS_OUT_OFFSETS); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k, 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k, 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K); - unsigned desc_param = ctx->param_tcs_factor_addr_base64k + - (HAVE_32BIT_POINTERS ? 1 : 2); - ret = si_insert_input_ptr(ctx, ret, desc_param, - 8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS); - ret = si_insert_input_ptr(ctx, ret, desc_param + 1, - 8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES); - unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR; ret = LLVMBuildInsertValue(ctx->ac.builder, ret, ac_to_float(&ctx->ac, ctx->abi.tcs_patch_id), vgpr++, ""); ret = LLVMBuildInsertValue(ctx->ac.builder, ret, ac_to_float(&ctx->ac, ctx->abi.tcs_rel_ids), vgpr++, ""); ctx->return_value = ret; } /* Pass GS inputs from ES to GS on GFX9. */ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) { LLVMValueRef ret = ctx->return_value; + ret = si_insert_input_ptr(ctx, ret, 0, 0); + if (HAVE_32BIT_POINTERS) + ret = si_insert_input_ptr(ctx, ret, 1, 1); ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5); ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers, 8 + SI_SGPR_RW_BUFFERS); ret = si_insert_input_ptr(ctx, ret, ctx->param_bindless_samplers_and_images, 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); - unsigned desc_param = ctx->param_vs_state_bits + 1; - ret = si_insert_input_ptr(ctx, ret, desc_param, - 8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS); - ret = si_insert_input_ptr(ctx, ret, desc_param + 1, - 8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES); +#if !HAVE_32BIT_POINTERS + ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 1, + 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES); +#endif unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR; for (unsigned i = 0; i < 5; i++) { unsigned param = ctx->param_gs_vtx01_offset + i; ret = si_insert_input_ret_float(ctx, ret, param, vgpr++); } ctx->return_value = ret; } static void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, @@ -4480,44 +4484,58 @@ static unsigned si_get_max_workgroup_size(const struct si_shader *shader) if (!max_work_group_size) { /* This is a variable group size compute shader, * compile it for the maximum possible group size. */ max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK; } return max_work_group_size; } -static void declare_per_stage_desc_pointers(struct si_shader_context *ctx, - struct si_function_info *fninfo, - bool assign_params) +static void declare_const_and_shader_buffers(struct si_shader_context *ctx, + struct si_function_info *fninfo, + bool assign_params) { LLVMTypeRef const_shader_buf_type; if (ctx->shader->selector->info.const_buffers_declared == 1 && ctx->shader->selector->info.shader_buffers_declared == 0) const_shader_buf_type = ctx->f32; else const_shader_buf_type = ctx->v4i32; unsigned const_and_shader_buffers = add_arg(fninfo, ARG_SGPR, ac_array_in_const32_addr_space(const_shader_buf_type)); + if (assign_params) + ctx->param_const_and_shader_buffers = const_and_shader_buffers; +} + +static void declare_samplers_and_images(struct si_shader_context *ctx, + struct si_function_info *fninfo, + bool assign_params) +{ unsigned samplers_and_images = add_arg(fninfo, ARG_SGPR, ac_array_in_const32_addr_space(ctx->v8i32)); - if (assign_params) { - ctx->param_const_and_shader_buffers = const_and_shader_buffers; + if (assign_params) ctx->param_samplers_and_images = samplers_and_images; - } +} + +static void declare_per_stage_desc_pointers(struct si_shader_context *ctx, + struct si_function_info *fninfo, + bool assign_params) +{ + declare_const_and_shader_buffers(ctx, fninfo, assign_params); + declare_samplers_and_images(ctx, fninfo, assign_params); } static void declare_global_desc_pointers(struct si_shader_context *ctx, struct si_function_info *fninfo) { ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR, ac_array_in_const32_addr_space(ctx->v4i32)); ctx->param_bindless_samplers_and_images = add_arg(fninfo, ARG_SGPR, ac_array_in_const32_addr_space(ctx->v8i32)); } @@ -4668,44 +4686,49 @@ static void create_function(struct si_shader_context *ctx) * placed after the user SGPRs. */ for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 11; i++) returns[num_returns++] = ctx->f32; /* VGPRs */ break; case SI_SHADER_MERGED_VERTEX_TESSCTRL: /* Merged stages have 8 system SGPRs at the beginning. */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* SPI_SHADER_USER_DATA_ADDR_LO_HS */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* SPI_SHADER_USER_DATA_ADDR_HI_HS */ + /* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */ + if (HAVE_32BIT_POINTERS) { + declare_per_stage_desc_pointers(ctx, &fninfo, + ctx->type == PIPE_SHADER_TESS_CTRL); + } else { + declare_const_and_shader_buffers(ctx, &fninfo, + ctx->type == PIPE_SHADER_TESS_CTRL); + } ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ declare_global_desc_pointers(ctx, &fninfo); declare_per_stage_desc_pointers(ctx, &fninfo, ctx->type == PIPE_SHADER_VERTEX); declare_vs_specific_input_sgprs(ctx, &fninfo); + if (!HAVE_32BIT_POINTERS) { + declare_samplers_and_images(ctx, &fninfo, + ctx->type == PIPE_SHADER_TESS_CTRL); + } ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); - if (!HAVE_32BIT_POINTERS) - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - - declare_per_stage_desc_pointers(ctx, &fninfo, - ctx->type == PIPE_SHADER_TESS_CTRL); /* VGPRs (first TCS, then VS) */ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_patch_id); add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_rel_ids); if (ctx->type == PIPE_SHADER_VERTEX) { declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs); /* LS return values are inputs to the TCS main shader part. */ @@ -4722,22 +4745,28 @@ static void create_function(struct si_shader_context *ctx) */ for (i = 0; i <= 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 11; i++) returns[num_returns++] = ctx->f32; /* VGPRs */ } break; case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY: /* Merged stages have 8 system SGPRs at the beginning. */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_USER_DATA_ADDR_LO_GS) */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_USER_DATA_ADDR_HI_GS) */ + /* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */ + if (HAVE_32BIT_POINTERS) { + declare_per_stage_desc_pointers(ctx, &fninfo, + ctx->type == PIPE_SHADER_GEOMETRY); + } else { + declare_const_and_shader_buffers(ctx, &fninfo, + ctx->type == PIPE_SHADER_GEOMETRY); + } ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS << 8) */ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */ declare_global_desc_pointers(ctx, &fninfo); declare_per_stage_desc_pointers(ctx, &fninfo, (ctx->type == PIPE_SHADER_VERTEX || @@ -4749,22 +4778,24 @@ static void create_function(struct si_shader_context *ctx) * Declare as many input SGPRs as the VS has. */ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ if (!HAVE_32BIT_POINTERS) add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ } - declare_per_stage_desc_pointers(ctx, &fninfo, - ctx->type == PIPE_SHADER_GEOMETRY); + if (!HAVE_32BIT_POINTERS) { + declare_samplers_and_images(ctx, &fninfo, + ctx->type == PIPE_SHADER_GEOMETRY); + } /* VGPRs (first GS, then VS/TES) */ ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); ctx->param_gs_vtx23_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.gs_prim_id); add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.gs_invocation_id); ctx->param_gs_vtx45_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); if (ctx->type == PIPE_SHADER_VERTEX) { declare_vs_input_vgprs(ctx, &fninfo, @@ -7257,36 +7288,39 @@ static bool si_shader_select_vs_parts(struct si_screen *sscreen, static void si_build_tcs_epilog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { struct lp_build_tgsi_context *bld_base = &ctx->bld_base; struct si_function_info fninfo; LLVMValueRef func; si_init_function_info(&fninfo); if (ctx->screen->info.chip_class >= GFX9) { - add_arg(&fninfo, ARG_SGPR, ctx->i64); + add_arg(&fninfo, ARG_SGPR, ctx->i32); + add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* wave info */ ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); + if (!HAVE_32BIT_POINTERS) + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); } else { add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index ef4472b..e0d6f70 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -188,52 +188,43 @@ enum { /* GFX6-8: TCS only */ GFX6_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS, GFX6_SGPR_TCS_OUT_OFFSETS, GFX6_SGPR_TCS_OUT_LAYOUT, GFX6_SGPR_TCS_IN_LAYOUT, GFX6_SGPR_TCS_OFFCHIP_ADDR_BASE64K, GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K, GFX6_TCS_NUM_USER_SGPR, + /* GFX9: Merged shaders. */ +#if HAVE_32BIT_POINTERS + /* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO (SGPR0). */ + /* 2ND_SAMPLERS_AND_IMAGES is set in USER_DATA_ADDR_HI (SGPR1). */ + GFX9_MERGED_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR, +#else + /* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO/HI (SGPR[0:1]). */ + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES = SI_VS_NUM_USER_SGPR, + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES_HI, + GFX9_MERGED_NUM_USER_SGPR, +#endif + /* GFX9: Merged LS-HS (VS-TCS) only. */ - GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR, + GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR, GFX9_SGPR_TCS_OUT_OFFSETS, GFX9_SGPR_TCS_OUT_LAYOUT, GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K, GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K, -#if !HAVE_32BIT_POINTERS - GFX9_SGPR_unused_to_align_the_next_pointer, -#endif - GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS, -#if !HAVE_32BIT_POINTERS - GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS_HI, -#endif - GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES, -#if !HAVE_32BIT_POINTERS - GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES_HI, -#endif GFX9_TCS_NUM_USER_SGPR, - /* GFX9: Merged ES-GS (VS-GS or TES-GS). */ - GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS = SI_VS_NUM_USER_SGPR, -#if !HAVE_32BIT_POINTERS - GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS_HI, -#endif - GFX9_SGPR_GS_SAMPLERS_AND_IMAGES, -#if !HAVE_32BIT_POINTERS - GFX9_SGPR_GS_SAMPLERS_AND_IMAGES_HI, -#endif - GFX9_GS_NUM_USER_SGPR, - /* GS limits */ GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS, + GFX9_GS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR, SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS + (HAVE_32BIT_POINTERS ? 1 : 2), /* PS only */ SI_SGPR_ALPHA_REF = SI_NUM_RESOURCE_SGPRS, SI_PS_NUM_USER_SGPR, }; /* LLVM function parameter indices */ enum { SI_NUM_RESOURCE_PARAMS = 4, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev