From: Marek Olšák <marek.ol...@amd.com> VBO descriptor code will change a lot one day. --- src/gallium/drivers/radeonsi/si_blit.c | 2 +- src/gallium/drivers/radeonsi/si_cp_dma.c | 5 ++- src/gallium/drivers/radeonsi/si_debug.c | 14 ++++++-- src/gallium/drivers/radeonsi/si_descriptors.c | 50 +++++++++++++-------------- src/gallium/drivers/radeonsi/si_hw_context.c | 2 +- src/gallium/drivers/radeonsi/si_pipe.h | 6 +++- 6 files changed, 46 insertions(+), 33 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 370ce04..f1c4f6d 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -79,21 +79,21 @@ void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) void si_blitter_end(struct pipe_context *ctx) { struct si_context *sctx = (struct si_context *)ctx; sctx->b.render_cond_force_off = false; /* Restore shader pointers because the VS blit shader changed all * non-global VS user SGPRs. */ sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX); - sctx->vertex_buffer_pointer_dirty = true; + sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL; si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom); } static unsigned u_max_sample(struct pipe_resource *r) { return r->nr_samples ? r->nr_samples - 1 : 0; } static unsigned si_blit_dbcb_copy(struct si_context *sctx, diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index b665926..15bd305 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -509,23 +509,22 @@ static void cik_prefetch_shader_async(struct si_context *sctx, assert(state->nbo == 1); cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0); } static void cik_prefetch_VBO_descriptors(struct si_context *sctx) { if (!sctx->vertex_elements) return; - cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b, - sctx->vertex_buffers.gpu_address - - sctx->vertex_buffers.buffer->gpu_address, + cik_prefetch_TC_L2_async(sctx, &sctx->vb_descriptors_buffer->b.b, + sctx->vb_descriptors_offset, sctx->vertex_elements->desc_list_byte_size); } void cik_emit_prefetch_L2(struct si_context *sctx) { /* Prefetch shaders and VBO descriptors to TC L2. */ if (sctx->b.chip_class >= GFX9) { /* Choose the right spot for the VBO prefetch. */ if (sctx->tes_shader.cso) { if (sctx->prefetch_L2_mask & SI_PREFETCH_HS) diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 385ce39..1f25f4e 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -733,24 +733,34 @@ static void si_dump_descriptors(struct si_context *sctx, enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >> SI_NUM_SHADER_BUFFERS; enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask & u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS); enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >> (32 - SI_NUM_SHADER_BUFFERS); enabled_samplers = sctx->samplers[processor].enabled_mask; enabled_images = sctx->images[processor].enabled_mask; } - if (processor == PIPE_SHADER_VERTEX) { + if (processor == PIPE_SHADER_VERTEX && + sctx->vb_descriptors_buffer && + sctx->vb_descriptors_gpu_list && + sctx->vertex_elements) { assert(info); /* only CS may not have an info struct */ + struct si_descriptors desc = {}; - si_dump_descriptor_list(sctx->screen, &sctx->vertex_buffers, name, + desc.buffer = sctx->vb_descriptors_buffer; + desc.list = sctx->vb_descriptors_gpu_list; + desc.gpu_list = sctx->vb_descriptors_gpu_list; + desc.element_dw_size = 4; + desc.num_active_slots = sctx->vertex_elements->desc_list_byte_size / 16; + + si_dump_descriptor_list(sctx->screen, &desc, name, " - Vertex buffer", 4, info->num_inputs, si_identity, log); } si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], name, " - Constant buffer", 4, util_last_bit(enabled_constbuf), si_get_constbuf_slot, log); si_dump_descriptor_list(sctx->screen, diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 76f2a3e..f6bc3cf 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1040,84 +1040,82 @@ static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers, assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size); *offset = va - res->gpu_address; } } /* VERTEX BUFFERS */ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx) { - struct si_descriptors *desc = &sctx->vertex_buffers; int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0; int i; for (i = 0; i < count; i++) { int vb = sctx->vertex_elements->vertex_buffer_index[i]; if (vb >= ARRAY_SIZE(sctx->vertex_buffer)) continue; if (!sctx->vertex_buffer[vb].buffer.resource) continue; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, (struct r600_resource*)sctx->vertex_buffer[vb].buffer.resource, RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); } - if (!desc->buffer) + if (!sctx->vb_descriptors_buffer) return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, - desc->buffer, RADEON_USAGE_READ, - RADEON_PRIO_DESCRIPTORS); + sctx->vb_descriptors_buffer, RADEON_USAGE_READ, + RADEON_PRIO_DESCRIPTORS); } bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) { struct si_vertex_elements *velems = sctx->vertex_elements; - struct si_descriptors *desc = &sctx->vertex_buffers; unsigned i, count; unsigned desc_list_byte_size; unsigned first_vb_use_mask; uint32_t *ptr; if (!sctx->vertex_buffers_dirty || !velems) return true; count = velems->count; if (!count) return true; desc_list_byte_size = velems->desc_list_byte_size; first_vb_use_mask = velems->first_vb_use_mask; /* Vertex buffer descriptors are the only ones which are uploaded * directly through a staging buffer and don't go through * the fine-grained upload path. */ - unsigned buffer_offset = 0; u_upload_alloc(sctx->b.b.const_uploader, 0, desc_list_byte_size, si_optimal_tcc_alignment(sctx, desc_list_byte_size), - &buffer_offset, - (struct pipe_resource**)&desc->buffer, (void**)&ptr); - if (!desc->buffer) { - desc->gpu_address = 0; + &sctx->vb_descriptors_offset, + (struct pipe_resource**)&sctx->vb_descriptors_buffer, + (void**)&ptr); + if (!sctx->vb_descriptors_buffer) { + sctx->vb_descriptors_offset = 0; + sctx->vb_descriptors_gpu_list = NULL; return false; } - desc->gpu_address = desc->buffer->gpu_address + buffer_offset; - desc->list = ptr; + sctx->vb_descriptors_gpu_list = ptr; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, - desc->buffer, RADEON_USAGE_READ, - RADEON_PRIO_DESCRIPTORS); + sctx->vb_descriptors_buffer, RADEON_USAGE_READ, + RADEON_PRIO_DESCRIPTORS); assert(count <= SI_MAX_ATTRIBS); for (i = 0; i < count; i++) { struct pipe_vertex_buffer *vb; struct r600_resource *rbuffer; unsigned vbo_index = velems->vertex_buffer_index[i]; uint32_t *desc = &ptr[i*4]; vb = &sctx->vertex_buffer[vbo_index]; @@ -1980,29 +1978,29 @@ void si_update_all_texture_descriptors(struct si_context *sctx) /* SHADER USER DATA */ static void si_mark_shader_pointers_dirty(struct si_context *sctx, unsigned shader) { sctx->shader_pointers_dirty |= u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS, SI_NUM_SHADER_DESCS); if (shader == PIPE_SHADER_VERTEX) - sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL; + sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL; si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom); } static void si_shader_pointers_begin_new_cs(struct si_context *sctx) { sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS); - sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL; + sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL; si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom); sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL; sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL; } /* Set a base register address for user data constants in the given shader. * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*. */ static void si_set_user_data_base(struct si_context *sctx, unsigned shader, uint32_t new_base) @@ -2182,22 +2180,28 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx, si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL), sh_base[PIPE_SHADER_TESS_CTRL]); si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY), sh_base[PIPE_SHADER_GEOMETRY]); } sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE); if (sctx->vertex_buffer_pointer_dirty) { - si_emit_shader_pointer(sctx, &sctx->vertex_buffers, - sh_base[PIPE_SHADER_VERTEX]); + struct radeon_winsys_cs *cs = sctx->b.gfx.cs; + unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + + SI_SGPR_VERTEX_BUFFERS * 4; + + si_emit_shader_pointer_head(cs, sh_offset, 1); + si_emit_shader_pointer_body(sctx->screen, cs, + sctx->vb_descriptors_buffer->gpu_address + + sctx->vb_descriptors_offset); sctx->vertex_buffer_pointer_dirty = false; } if (sctx->graphics_bindless_pointer_dirty) { si_emit_global_shader_pointers(sctx, &sctx->bindless_descriptors); sctx->graphics_bindless_pointer_dirty = false; } } @@ -2719,25 +2723,20 @@ void si_init_all_descriptors(struct si_context *sctx) si_init_buffer_resources(&sctx->rw_buffers, &sctx->descriptors[SI_DESCS_RW_BUFFERS], SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS, /* The second set of usage/priority is used by * const buffers in RW buffer slots. */ RADEON_USAGE_READWRITE, RADEON_USAGE_READ, RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER); sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = SI_NUM_RW_BUFFERS; - si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS, - 4, SI_NUM_VERTEX_BUFFERS); - FREE(sctx->vertex_buffers.list); /* not used */ - sctx->vertex_buffers.list = NULL; - /* Initialize an array of 1024 bindless descriptors, when the limit is * reached, just make it larger and re-upload the whole array. */ si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors, SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, 1024); sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS); /* Set pipe_context functions. */ @@ -2825,22 +2824,23 @@ void si_release_all_descriptors(struct si_context *sctx) si_release_image_views(&sctx->images[i]); } si_release_buffer_resources(&sctx->rw_buffers, &sctx->descriptors[SI_DESCS_RW_BUFFERS]); for (i = 0; i < SI_NUM_VERTEX_BUFFERS; i++) pipe_vertex_buffer_unreference(&sctx->vertex_buffer[i]); for (i = 0; i < SI_NUM_DESCS; ++i) si_release_descriptors(&sctx->descriptors[i]); - sctx->vertex_buffers.list = NULL; /* points into a mapped buffer */ - si_release_descriptors(&sctx->vertex_buffers); + r600_resource_reference(&sctx->vb_descriptors_buffer, NULL); + sctx->vb_descriptors_gpu_list = NULL; /* points into a mapped buffer */ + si_release_bindless_descriptors(sctx); } void si_all_descriptors_begin_new_cs(struct si_context *sctx) { int i; for (i = 0; i < SI_NUM_SHADERS; i++) { si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]); si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i]); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 3823be0..61c8d70 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -209,21 +209,21 @@ void si_begin_new_cs(struct si_context *ctx) if (ctx->queued.named.hs) ctx->prefetch_L2_mask |= SI_PREFETCH_HS; if (ctx->queued.named.es) ctx->prefetch_L2_mask |= SI_PREFETCH_ES; if (ctx->queued.named.gs) ctx->prefetch_L2_mask |= SI_PREFETCH_GS; if (ctx->queued.named.vs) ctx->prefetch_L2_mask |= SI_PREFETCH_VS; if (ctx->queued.named.ps) ctx->prefetch_L2_mask |= SI_PREFETCH_PS; - if (ctx->vertex_buffers.buffer && ctx->vertex_elements) + if (ctx->vb_descriptors_buffer && ctx->vertex_elements) ctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS; /* CLEAR_STATE disables all colorbuffers, so only enable bound ones. */ bool has_clear_state = ctx->screen->has_clear_state; if (has_clear_state) { ctx->framebuffer.dirty_cbufs = u_bit_consecutive(0, ctx->framebuffer.state.nr_cbufs); /* CLEAR_STATE disables the zbuffer, so only enable it if it's bound. */ ctx->framebuffer.dirty_zsbuf = ctx->framebuffer.state.zsbuf != NULL; } else { diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 896b640..2053dcb 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -490,22 +490,26 @@ struct si_context { struct si_shader_ctx_state tcs_shader; struct si_shader_ctx_state tes_shader; struct si_cs_shader_state cs_shader_state; /* shader information */ struct si_vertex_elements *vertex_elements; unsigned sprite_coord_enable; bool flatshade; bool do_update_shaders; + /* vertex buffer descriptors */ + uint32_t *vb_descriptors_gpu_list; + struct r600_resource *vb_descriptors_buffer; + unsigned vb_descriptors_offset; + /* shader descriptors */ - struct si_descriptors vertex_buffers; struct si_descriptors descriptors[SI_NUM_DESCS]; unsigned descriptors_dirty; unsigned shader_pointers_dirty; unsigned shader_needs_decompress_mask; struct si_buffer_resources rw_buffers; struct si_buffer_resources const_and_shader_buffers[SI_NUM_SHADERS]; struct si_samplers samplers[SI_NUM_SHADERS]; struct si_images images[SI_NUM_SHADERS]; /* other shader resources */ -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev