On Sun, Apr 17, 2016 at 1:43 AM, Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> wrote: > v2: Load previous list for new CS instead of re-emitting > all descriptors. > > Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> > --- > src/gallium/drivers/radeonsi/si_descriptors.c | 70 > +++++++++++++++++++++++---- > 1 file changed, 60 insertions(+), 10 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c > b/src/gallium/drivers/radeonsi/si_descriptors.c > index 8ca0253..e4f06e7 100644 > --- a/src/gallium/drivers/radeonsi/si_descriptors.c > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c > @@ -60,6 +60,7 @@ > #include "si_shader.h" > #include "sid.h" > > +#include "util/u_math.h" > #include "util/u_memory.h" > #include "util/u_suballoc.h" > #include "util/u_upload_mgr.h" > @@ -152,29 +153,78 @@ static bool si_ce_upload(struct si_context *sctx, > unsigned ce_offset, unsigned s > return true; > } > > +static void si_reinitialize_ce_ram(struct si_context *sctx, > + struct si_descriptors *desc) > +{ > + if (desc->buffer) { > + struct r600_resource *buffer = (struct > r600_resource*)desc->buffer; > + unsigned list_size = desc->num_elements * > desc->element_dw_size * 4; > + uint64_t va = buffer->gpu_address + desc->buffer_offset; > + struct radeon_winsys_cs *ib = sctx->ce_preamble_ib; > + > + if (!ib) > + ib = sctx->ce_ib; > + > + list_size = align(list_size, 32); > + > + radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0)); > + radeon_emit(ib, va); > + radeon_emit(ib, va >> 32); > + radeon_emit(ib, list_size / 4); > + radeon_emit(ib, desc->ce_offset); > + > + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, > desc->buffer, > + RADEON_USAGE_READ, > RADEON_PRIO_DESCRIPTORS); > + } > + desc->ce_ram_dirty = false; > +} > > static bool si_upload_descriptors(struct si_context *sctx, > struct si_descriptors *desc) > { > unsigned list_size = desc->num_elements * desc->element_dw_size * 4; > - void *ptr; > > if (!desc->dirty_mask) > return true; > > - u_upload_alloc(sctx->b.uploader, 0, list_size, 256, > - &desc->buffer_offset, > - (struct pipe_resource**)&desc->buffer, &ptr); > - if (!desc->buffer) > - return false; /* skip the draw call */ > + if (sctx->ce_ib) { > + uint32_t const* list = (uint32_t const*)desc->list; > > - util_memcpy_cpu_to_le32(ptr, desc->list, list_size); > + if (desc->ce_ram_dirty) > + si_reinitialize_ce_ram(sctx, desc); > > - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, > - RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); > + while(desc->dirty_mask) { > + int begin, count; > + u_bit_scan_consecutive_range64(&desc->dirty_mask, > &begin, > + &count); > > - desc->dirty_mask = 0; > + begin *= desc->element_dw_size; > + count *= desc->element_dw_size; > + > + radeon_emit(sctx->ce_ib, > + PKT3(PKT3_WRITE_CONST_RAM, count, 0)); > + radeon_emit(sctx->ce_ib, desc->ce_offset + begin * 4); > + radeon_emit_array(sctx->ce_ib, list + begin, count); > + } > + > + if (!si_ce_upload(sctx, desc->ce_offset, list_size, > + &desc->buffer_offset, > &desc->buffer)) > + return false; > + } else { > + void *ptr; > + > + u_upload_alloc(sctx->b.uploader, 0, list_size, 256, > + &desc->buffer_offset, > + (struct pipe_resource**)&desc->buffer, &ptr); > + if (!desc->buffer) > + return false; /* skip the draw call */ > + > + util_memcpy_cpu_to_le32(ptr, desc->list, list_size); > + } > + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, > + RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
For clarity, it would be better to do radeon_add_to_buffer_list in si_ce_upload, because that's the first IB user. In any case: Reviewed-by: Marek Olšák <marek.ol...@amd.com> Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev