From: Marek Olšák <marek.ol...@amd.com> A later commit will only upload descriptors used by shaders, so we won't do full dumps anymore, so the only way to have a complete mirror of CE RAM in memory is to do a separate dump after the last draw call. --- src/gallium/drivers/radeonsi/si_descriptors.c | 56 ++++++++++++--------------- src/gallium/drivers/radeonsi/si_hw_context.c | 8 +++- src/gallium/drivers/radeonsi/si_pipe.c | 1 + src/gallium/drivers/radeonsi/si_pipe.h | 3 ++ src/gallium/drivers/radeonsi/si_state.h | 6 +-- 5 files changed, 37 insertions(+), 37 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 91cc9a6..38e4ae1 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -142,74 +142,69 @@ static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned s radeon_emit(sctx->ce_ib, va); radeon_emit(sctx->ce_ib, va >> 32); radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, *out_buf, RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); sctx->ce_need_synchronization = true; return true; } -static void si_ce_reinitialize_descriptors(struct si_context *sctx, - struct si_descriptors *desc) +void si_ce_save_all_descriptors_at_ib_end(struct si_context* sctx) { - if (desc->buffer) { - struct r600_resource *buffer = (struct r600_resource*)desc->buffer; - unsigned list_size = desc->num_elements * desc->element_dw_size * 4; - uint64_t va = buffer->gpu_address + desc->buffer_offset; - struct radeon_winsys_cs *ib = sctx->ce_preamble_ib; - - if (!ib) - ib = sctx->ce_ib; + bool success = si_ce_upload(sctx, 0, sctx->total_ce_ram_allocated, + &sctx->ce_ram_saved_offset, + &sctx->ce_ram_saved_buffer); + (void)success; + assert(success); +} - list_size = align(list_size, 32); +void si_ce_restore_all_descriptors_at_ib_start(struct si_context *sctx) +{ + if (!sctx->ce_ram_saved_buffer) + return; - radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0)); - radeon_emit(ib, va); - radeon_emit(ib, va >> 32); - radeon_emit(ib, list_size / 4); - radeon_emit(ib, desc->ce_offset); + struct radeon_winsys_cs *ib = sctx->ce_preamble_ib; + if (!ib) + ib = sctx->ce_ib; - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, - RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); - } - desc->ce_ram_dirty = false; -} + uint64_t va = sctx->ce_ram_saved_buffer->gpu_address + + sctx->ce_ram_saved_offset; -void si_ce_reinitialize_all_descriptors(struct si_context *sctx) -{ - int i; + radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0)); + radeon_emit(ib, va); + radeon_emit(ib, va >> 32); + radeon_emit(ib, sctx->total_ce_ram_allocated / 4); + radeon_emit(ib, 0); - for (i = 0; i < SI_NUM_DESCS; ++i) - si_ce_reinitialize_descriptors(sctx, &sctx->descriptors[i]); + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, + sctx->ce_ram_saved_buffer, + RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); } void si_ce_enable_loads(struct radeon_winsys_cs *ib) { radeon_emit(ib, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); radeon_emit(ib, CONTEXT_CONTROL_LOAD_ENABLE(1) | CONTEXT_CONTROL_LOAD_CE_RAM(1)); radeon_emit(ib, CONTEXT_CONTROL_SHADOW_ENABLE(1)); } static bool si_upload_descriptors(struct si_context *sctx, struct si_descriptors *desc, struct r600_atom * atom) { unsigned list_size = desc->num_elements * desc->element_dw_size * 4; if (sctx->ce_ib && desc->uses_ce) { uint32_t const* list = (uint32_t const*)desc->list; - if (desc->ce_ram_dirty) - si_ce_reinitialize_descriptors(sctx, desc); - while(desc->dirty_mask) { int begin, count; u_bit_scan_consecutive_range64(&desc->dirty_mask, &begin, &count); begin *= desc->element_dw_size; count *= desc->element_dw_size; radeon_emit(sctx->ce_ib, PKT3(PKT3_WRITE_CONST_RAM, count, 0)); @@ -240,22 +235,20 @@ static bool si_upload_descriptors(struct si_context *sctx, if (atom) si_mark_atom_dirty(sctx, atom); return true; } static void si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc) { - desc->ce_ram_dirty = true; - if (!desc->buffer) return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); } /* SAMPLER VIEWS */ static unsigned @@ -2037,20 +2030,21 @@ void si_init_all_descriptors(struct si_context *sctx) SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS, /* The second set of usage/priority is used by * const buffers in RW buffer slots. */ RADEON_USAGE_READWRITE, RADEON_USAGE_READ, RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER, &ce_offset); si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS, 4, SI_NUM_VERTEX_BUFFERS, NULL); sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS); + sctx->total_ce_ram_allocated = ce_offset; if (sctx->b.chip_class >= GFX9) assert(ce_offset <= 4096); else assert(ce_offset <= 32768); /* Set pipe_context functions. */ sctx->b.b.bind_sampler_states = si_bind_sampler_states; sctx->b.b.set_shader_images = si_set_shader_images; sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer; diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index e15f6a9..5e97d56 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -116,20 +116,24 @@ void si_context_gfx_flush(void *context, unsigned flags, * This code is only needed when the driver flushes the GFX IB * internally, and it never asks for a fence handle. */ if (radeon_emitted(ctx->b.dma.cs, 0)) { assert(fence == NULL); /* internal flushes only */ ctx->b.dma.flush(ctx, flags, NULL); } ctx->gfx_flush_in_progress = true; + /* This CE dump should be done in parallel with the last draw. */ + if (ctx->ce_ib) + si_ce_save_all_descriptors_at_ib_end(ctx); + r600_preflush_suspend_features(&ctx->b); ctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH; /* DRM 3.1.0 doesn't flush TC for VI correctly. */ if (ctx->b.chip_class == VI && ctx->b.screen->info.drm_minor <= 1) ctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_INV_VMEM_L1; @@ -200,22 +204,22 @@ void si_begin_new_cs(struct si_context *ctx) /* The CS initialization should be emitted before everything else. */ si_pm4_emit(ctx, ctx->init_config); if (ctx->init_config_gs_rings) si_pm4_emit(ctx, ctx->init_config_gs_rings); if (ctx->ce_preamble_ib) si_ce_enable_loads(ctx->ce_preamble_ib); else if (ctx->ce_ib) si_ce_enable_loads(ctx->ce_ib); - if (ctx->ce_preamble_ib) - si_ce_reinitialize_all_descriptors(ctx); + if (ctx->ce_ib) + si_ce_restore_all_descriptors_at_ib_start(ctx); if (ctx->b.chip_class >= CIK) si_mark_atom_dirty(ctx, &ctx->prefetch_L2); ctx->framebuffer.dirty_cbufs = (1 << 8) - 1; ctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(ctx, &ctx->framebuffer.atom); si_mark_atom_dirty(ctx, &ctx->clip_regs); si_mark_atom_dirty(ctx, &ctx->clip_state.atom); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index dd962e0..eaa3348 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -48,20 +48,21 @@ static void si_destroy_context(struct pipe_context *context) * properly. */ struct pipe_framebuffer_state fb = {}; context->set_framebuffer_state(context, &fb); si_release_all_descriptors(sctx); if (sctx->ce_suballocator) u_suballocator_destroy(sctx->ce_suballocator); + r600_resource_reference(&sctx->ce_ram_saved_buffer, NULL); pipe_resource_reference(&sctx->esgs_ring, NULL); pipe_resource_reference(&sctx->gsvs_ring, NULL); pipe_resource_reference(&sctx->tf_ring, NULL); pipe_resource_reference(&sctx->tess_offchip_ring, NULL); pipe_resource_reference(&sctx->null_const_buf.buffer, NULL); r600_resource_reference(&sctx->border_color_buffer, NULL); free(sctx->border_color_table); r600_resource_reference(&sctx->scratch_buffer, NULL); r600_resource_reference(&sctx->compute_scratch_buffer, NULL); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 449a802..13ec072 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -229,20 +229,23 @@ struct si_context { struct blitter_context *blitter; void *custom_dsa_flush; void *custom_blend_resolve; void *custom_blend_decompress; void *custom_blend_fastclear; void *custom_blend_dcc_decompress; struct si_screen *screen; struct radeon_winsys_cs *ce_ib; struct radeon_winsys_cs *ce_preamble_ib; + struct r600_resource *ce_ram_saved_buffer; + unsigned ce_ram_saved_offset; + unsigned total_ce_ram_allocated; bool ce_need_synchronization; struct u_suballocator *ce_suballocator; struct si_shader_ctx_state fixed_func_tcs_shader; LLVMTargetMachineRef tm; /* only non-threaded compilation */ bool gfx_flush_in_progress; bool compute_is_busy; /* Atoms (direct states). */ union si_state_atoms atoms; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index c4ef903..9b506a8 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -225,23 +225,20 @@ struct si_descriptors { unsigned buffer_offset; /* Offset in CE RAM */ unsigned ce_offset; /* elements of the list that are changed and need to be uploaded */ uint64_t dirty_mask; /* Whether CE is used to upload this descriptor array. */ bool uses_ce; - /* Whether the CE ram is dirty and needs to be reinitialized entirely - * before we can do partial updates. */ - bool ce_ram_dirty; /* The shader userdata offset within a shader where the 64-bit pointer to the descriptor * array will be stored. */ unsigned shader_userdata_offset; }; struct si_sampler_views { struct pipe_sampler_view *views[SI_NUM_SAMPLERS]; struct si_sampler_state *sampler_states[SI_NUM_SAMPLERS]; @@ -275,21 +272,22 @@ struct si_buffer_resources { #define si_pm4_delete_state(sctx, member, value) \ do { \ if ((sctx)->queued.named.member == (value)) { \ (sctx)->queued.named.member = NULL; \ } \ si_pm4_free_state(sctx, (struct si_pm4_state *)(value), \ si_pm4_block_idx(member)); \ } while(0) /* si_descriptors.c */ -void si_ce_reinitialize_all_descriptors(struct si_context *sctx); +void si_ce_save_all_descriptors_at_ib_end(struct si_context* sctx); +void si_ce_restore_all_descriptors_at_ib_start(struct si_context *sctx); void si_ce_enable_loads(struct radeon_winsys_cs *ib); void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct r600_texture *tex, const struct legacy_surf_level *base_level_info, unsigned base_level, unsigned first_level, unsigned block_width, bool is_stencil, uint32_t *state); void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, uint slot, struct pipe_constant_buffer *cbuf); void si_get_shader_buffers(struct si_context *sctx, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev