From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_compute.c | 3 +++ src/gallium/drivers/radeonsi/si_compute.h | 4 ++++ src/gallium/drivers/radeonsi/si_shader.h | 4 ++++ src/gallium/drivers/radeonsi/si_state.h | 3 +++ src/gallium/drivers/radeonsi/si_state_shaders.c | 27 +++++++++++++++++++++++++ 5 files changed, 41 insertions(+)
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 3a519a72..22ef111 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -93,20 +93,23 @@ static void si_create_compute_state_async(void *job, int thread_index) tm = program->compiler_ctx_state.tm; } memset(&sel, 0, sizeof(sel)); sel.screen = program->screen; tgsi_scan_shader(program->tokens, &sel.info); sel.tokens = program->tokens; sel.type = PIPE_SHADER_COMPUTE; sel.local_size = program->local_size; + si_get_active_slot_masks(&sel.info, + &program->active_const_and_shader_buffers, + &program->active_samplers_and_images); program->shader.selector = &sel; program->shader.is_monolithic = true; program->uses_grid_size = sel.info.uses_grid_size; program->uses_block_size = sel.info.uses_block_size; if (si_shader_create(program->screen, tm, &program->shader, debug)) { program->shader.compilation_failed = true; } else { bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0; diff --git a/src/gallium/drivers/radeonsi/si_compute.h b/src/gallium/drivers/radeonsi/si_compute.h index ed33104..764d708 100644 --- a/src/gallium/drivers/radeonsi/si_compute.h +++ b/src/gallium/drivers/radeonsi/si_compute.h @@ -27,20 +27,24 @@ #include "si_shader.h" #define MAX_GLOBAL_BUFFERS 22 struct si_compute { struct si_screen *screen; struct tgsi_token *tokens; struct util_queue_fence ready; struct si_compiler_ctx_state compiler_ctx_state; + /* bitmasks of used descriptor slots */ + uint32_t active_const_and_shader_buffers; + uint64_t active_samplers_and_images; + unsigned ir_type; unsigned local_size; unsigned private_size; unsigned input_size; struct si_shader shader; struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS]; unsigned use_code_object_v2 : 1; unsigned variable_group_size : 1; unsigned uses_grid_size:1; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index ffb7dc3..aab902b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -345,20 +345,24 @@ struct si_shader_selector { */ unsigned colors_written_4bit; /* CS parameters */ unsigned local_size; uint64_t outputs_written; /* "get_unique_index" bits */ uint32_t patch_outputs_written; /* "get_unique_index_patch" bits */ uint64_t inputs_read; /* "get_unique_index" bits */ + + /* bitmasks of used descriptor slots */ + uint32_t active_const_and_shader_buffers; + uint64_t active_samplers_and_images; }; /* Valid shader configurations: * * API shaders VS | TCS | TES | GS |pass| PS * are compiled as: | | | |thru| * | | | | | * Only VS & PS: VS | | | | | PS * GFX6 - with GS: ES | | | GS | VS | PS * - with tess: LS | HS | VS | | | PS diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 9b506a8..f2003a5 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -346,20 +346,23 @@ si_create_sampler_view_custom(struct pipe_context *ctx, const struct pipe_sampler_view *state, unsigned width0, unsigned height0, unsigned force_level); /* si_state_shader.c */ bool si_update_shaders(struct si_context *sctx); void si_init_shader_functions(struct si_context *sctx); bool si_init_shader_cache(struct si_screen *sscreen); void si_destroy_shader_cache(struct si_screen *sscreen); void si_init_shader_selector_async(void *job, int thread_index); +void si_get_active_slot_masks(const struct tgsi_shader_info *info, + uint32_t *const_and_shader_buffers, + uint64_t *samplers_and_images); /* si_state_draw.c */ void si_init_ia_multi_vgt_param_table(struct si_context *sctx); void si_emit_cache_flush(struct si_context *sctx); void si_ce_pre_draw_synchronization(struct si_context *sctx); void si_ce_post_draw_synchronization(struct si_context *sctx); void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo); void si_trace_emit(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 313af85..45d996b 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1896,20 +1896,44 @@ void si_init_shader_selector_async(void *job, int thread_index) sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, tm, sel, debug); if (!sel->gs_copy_shader) { fprintf(stderr, "radeonsi: can't create GS copy shader\n"); return; } si_shader_vs(sscreen, sel->gs_copy_shader, sel); } } +/* Return descriptor slot usage masks from the given shader info. */ +void si_get_active_slot_masks(const struct tgsi_shader_info *info, + uint32_t *const_and_shader_buffers, + uint64_t *samplers_and_images) +{ + unsigned start, num_shaderbufs, num_constbufs, num_images, num_samplers; + + num_shaderbufs = util_last_bit(info->shader_buffers_declared); + num_constbufs = util_last_bit(info->const_buffers_declared); + /* two 8-byte images share one 16-byte slot */ + num_images = align(util_last_bit(info->images_declared), 2); + num_samplers = util_last_bit(info->samplers_declared); + + /* The layout is: sb[last] ... sb[0], cb[0] ... cb[last] */ + start = si_get_shaderbuf_slot(num_shaderbufs - 1); + *const_and_shader_buffers = + u_bit_consecutive(start, num_shaderbufs + num_constbufs); + + /* The layout is: image[last] ... image[0], sampler[0] ... sampler[last] */ + start = si_get_image_slot(num_images - 1) / 2; + *samplers_and_images = + u_bit_consecutive64(start, num_images / 2 + num_samplers); +} + static void *si_create_shader_selector(struct pipe_context *ctx, const struct pipe_shader_state *state) { struct si_screen *sscreen = (struct si_screen *)ctx->screen; struct si_context *sctx = (struct si_context*)ctx; struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector); int i; if (!sel) return NULL; @@ -1922,20 +1946,23 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sel->tokens = tgsi_dup_tokens(state->tokens); if (!sel->tokens) { FREE(sel); return NULL; } sel->so = state->stream_output; tgsi_scan_shader(state->tokens, &sel->info); sel->type = sel->info.processor; p_atomic_inc(&sscreen->b.num_shaders_created); + si_get_active_slot_masks(&sel->info, + &sel->active_const_and_shader_buffers, + &sel->active_samplers_and_images); /* The prolog is a no-op if there are no inputs. */ sel->vs_needs_prolog = sel->type == PIPE_SHADER_VERTEX && sel->info.num_inputs; /* Set which opcode uses which (i,j) pair. */ if (sel->info.uses_persp_opcode_interp_centroid) sel->info.uses_persp_centroid = true; if (sel->info.uses_linear_opcode_interp_centroid) -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev