This fixes a problem with the CE preamble and restoring only stuff in the preamble when needed.
To illustrate suppose we have two graphics IB's 1 and 2, which are submitted in that order. Furthermore suppose IB 1 does not use CE ram, but IB 2 does, and we have a context switch at the start of IB 1, but not between IB 1 and IB 2. The old code put the CE RAM loads in the preamble of IB 2. As the preamble of IB 1 does not have the loads and the preamble of IB 2 does not get executed, the old values are not load into CE RAM. Fix this by always restoring the entire CE RAM. Signed-off-by: Bas Nieuwenhuizen <[email protected]> Cc: "12.0" <[email protected]> --- src/gallium/drivers/radeonsi/si_descriptors.c | 56 +++++++++++++-------------- src/gallium/drivers/radeonsi/si_hw_context.c | 4 ++ src/gallium/drivers/radeonsi/si_pipe.c | 7 ++++ src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state.h | 7 ++-- 5 files changed, 43 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index baddc5f..5a1a344 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -160,30 +160,39 @@ static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned s return true; } -static void si_reinitialize_ce_ram(struct si_context *sctx, - struct si_descriptors *desc) +void si_ce_save_ram(struct si_context *sctx) { - if (desc->buffer) { - struct r600_resource *buffer = (struct r600_resource*)desc->buffer; - unsigned list_size = desc->num_elements * desc->element_dw_size * 4; - uint64_t va = buffer->gpu_address + desc->buffer_offset; - struct radeon_winsys_cs *ib = sctx->ce_preamble_ib; + if (!sctx->ce_ib) + return; - if (!ib) - ib = sctx->ce_ib; + radeon_emit(sctx->ce_ib, PKT3(PKT3_DUMP_CONST_RAM, 3, 0)); + radeon_emit(sctx->ce_ib, 0); + radeon_emit(sctx->ce_ib, SI_CE_RAM_SIZE / 4); + radeon_emit(sctx->ce_ib, sctx->ce_ram_data->gpu_address); + radeon_emit(sctx->ce_ib, sctx->ce_ram_data->gpu_address >> 32); - list_size = align(list_size, 32); + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->ce_ram_data, + RADEON_USAGE_WRITE, RADEON_PRIO_DESCRIPTORS); +} - radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0)); - radeon_emit(ib, va); - radeon_emit(ib, va >> 32); - radeon_emit(ib, list_size / 4); - radeon_emit(ib, desc->ce_offset); +void si_ce_restore_ram(struct si_context *sctx) +{ + struct radeon_winsys_cs *ib = sctx->ce_preamble_ib; - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, - RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); - } - desc->ce_ram_dirty = false; + if (!ib) + ib = sctx->ce_ib; + + if (!ib) + return; + + radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0)); + radeon_emit(ib, sctx->ce_ram_data->gpu_address); + radeon_emit(ib, sctx->ce_ram_data->gpu_address >> 32); + radeon_emit(ib, SI_CE_RAM_SIZE / 4); + radeon_emit(ib, 0); + + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->ce_ram_data, + RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); } void si_ce_enable_loads(struct radeon_winsys_cs *ib) @@ -206,9 +215,6 @@ static bool si_upload_descriptors(struct si_context *sctx, if (sctx->ce_ib) { uint32_t const* list = (uint32_t const*)desc->list; - if (desc->ce_ram_dirty) - si_reinitialize_ce_ram(sctx, desc); - while(desc->dirty_mask) { int begin, count; u_bit_scan_consecutive_range(&desc->dirty_mask, &begin, @@ -287,8 +293,6 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx, RADEON_USAGE_READ); } - views->desc.ce_ram_dirty = true; - if (!views->desc.buffer) return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, views->desc.buffer, @@ -489,8 +493,6 @@ si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *imag RADEON_USAGE_READWRITE); } - images->desc.ce_ram_dirty = true; - if (images->desc.buffer) { radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, images->desc.buffer, @@ -737,8 +739,6 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx, buffers->shader_usage, buffers->priority); } - buffers->desc.ce_ram_dirty = true; - if (!buffers->desc.buffer) return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index fa6a2cb..1088c21 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -121,6 +121,8 @@ void si_context_gfx_flush(void *context, unsigned flags, ctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_INV_VMEM_L1; + si_ce_save_ram(ctx); + si_emit_cache_flush(ctx, NULL); /* force to keep tiling flags */ @@ -213,6 +215,8 @@ void si_begin_new_cs(struct si_context *ctx) else if (ctx->ce_ib) si_ce_enable_loads(ctx->ce_ib); + si_ce_restore_ram(ctx); + ctx->framebuffer.dirty_cbufs = (1 << 8) - 1; ctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(ctx, &ctx->framebuffer.atom); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 7bb3dc2..a204b9a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -47,6 +47,7 @@ static void si_destroy_context(struct pipe_context *context) if (sctx->ce_suballocator) u_suballocator_destroy(sctx->ce_suballocator); + r600_resource_reference(&sctx->ce_ram_data, NULL); pipe_resource_reference(&sctx->esgs_ring, NULL); pipe_resource_reference(&sctx->gsvs_ring, NULL); pipe_resource_reference(&sctx->tf_ring, NULL); @@ -168,6 +169,12 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, PIPE_USAGE_DEFAULT, FALSE); if (!sctx->ce_suballocator) goto fail; + + sctx->ce_ram_data = (struct r600_resource*) + pipe_buffer_create(screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_DEFAULT, SI_CE_RAM_SIZE); + if (!sctx->ce_ram_data) + goto fail; } sctx->b.gfx.flush = si_context_gfx_flush; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index b3fe656..a62d558 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -199,6 +199,7 @@ struct si_context { struct radeon_winsys_cs *ce_preamble_ib; bool ce_need_synchronization; struct u_suballocator *ce_suballocator; + struct r600_resource *ce_ram_data; struct pipe_fence_handle *last_gfx_fence; struct si_shader_ctx_state fixed_func_tcs_shader; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index e5795eb..2dd166f 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -41,6 +41,7 @@ #define SI_NUM_SHADER_BUFFERS 16 #define SI_TESS_OFFCHIP_BLOCK_SIZE (8192 * 4) +#define SI_CE_RAM_SIZE 32768 struct si_screen; struct si_shader; @@ -205,10 +206,6 @@ struct si_descriptors { /* elements of the list that are changed and need to be uploaded */ unsigned dirty_mask; - /* Whether the CE ram is dirty and needs to be reinitialized entirely - * before we can do partial updates. */ - bool ce_ram_dirty; - /* The shader userdata offset within a shader where the 64-bit pointer to the descriptor * array will be stored. */ unsigned shader_userdata_offset; @@ -250,6 +247,8 @@ struct si_buffer_resources { } while(0) /* si_descriptors.c */ +void si_ce_save_ram(struct si_context *sctx); +void si_ce_restore_ram(struct si_context *sctx); void si_ce_enable_loads(struct radeon_winsys_cs *ib); void si_set_mutable_tex_desc_fields(struct r600_texture *tex, const struct radeon_surf_level *base_level_info, -- 2.8.3 _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
