There is an issue independent of this patch: si_update_all_resident_texture_descriptors only updates resident descriptors. Non-resident descriptors aren't updated, and si_make_texture_handle_resident doesn't update them either. I think it should be fixed in si_make_texture_handle_resident. Same for images. It should be a separate patch.
This patch is: Reviewed-by: Marek Olšák <marek.ol...@amd.com> Marek On Wed, Jul 26, 2017 at 4:21 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > Using VRAM address as bindless handles is not a good idea because > we have to use LLVMIntToPTr and the LLVM CSE pass can't optimize > because it has no information about the pointer. > > Instead, use slots indexes like the existing descriptors. Note > that we use fixed 16-dword slots for both samplers and images. > This doesn't really matter because no real apps use image handles. > > This improves performance with DOW3 by +7%. > > v2: - inline si_release_bindless_descriptors() > - fix overwriting sampler and image slots > - use fixed 16-dword slots for images > > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > src/gallium/drivers/radeonsi/si_descriptors.c | 345 > +++++++++++----------- > src/gallium/drivers/radeonsi/si_pipe.c | 12 - > src/gallium/drivers/radeonsi/si_pipe.h | 23 +- > src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 35 ++- > 4 files changed, 195 insertions(+), 220 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c > b/src/gallium/drivers/radeonsi/si_descriptors.c > index 06a171ff9e..543a19ba1e 100644 > --- a/src/gallium/drivers/radeonsi/si_descriptors.c > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c > @@ -1852,16 +1852,20 @@ static void si_rebind_buffer(struct pipe_context > *ctx, struct pipe_resource *buf > > /* Bindless texture handles */ > if (rbuffer->texture_handle_allocated) { > + struct si_descriptors *descs = &sctx->bindless_descriptors; > + > util_dynarray_foreach(&sctx->resident_tex_handles, > struct si_texture_handle *, tex_handle) > { > struct pipe_sampler_view *view = (*tex_handle)->view; > - struct si_bindless_descriptor *desc = > (*tex_handle)->desc; > + unsigned desc_slot = (*tex_handle)->desc_slot; > > if (view->texture == buf) { > si_set_buf_desc_address(rbuffer, > view->u.buf.offset, > - &desc->desc_list[4]); > - desc->dirty = true; > + descs->list + > + desc_slot * 16 + 4); > + > + (*tex_handle)->desc_dirty = true; > sctx->bindless_descriptors_dirty = true; > > radeon_add_to_buffer_list_check_mem( > @@ -1874,10 +1878,12 @@ static void si_rebind_buffer(struct pipe_context > *ctx, struct pipe_resource *buf > > /* Bindless image handles */ > if (rbuffer->image_handle_allocated) { > + struct si_descriptors *descs = &sctx->bindless_descriptors; > + > util_dynarray_foreach(&sctx->resident_img_handles, > struct si_image_handle *, img_handle) { > struct pipe_image_view *view = &(*img_handle)->view; > - struct si_bindless_descriptor *desc = > (*img_handle)->desc; > + unsigned desc_slot = (*img_handle)->desc_slot; > > if (view->resource == buf) { > if (view->access & PIPE_IMAGE_ACCESS_WRITE) > @@ -1885,8 +1891,10 @@ static void si_rebind_buffer(struct pipe_context *ctx, > struct pipe_resource *buf > > si_set_buf_desc_address(rbuffer, > view->u.buf.offset, > - &desc->desc_list[4]); > - desc->dirty = true; > + descs->list + > + desc_slot * 16 + 4); > + > + (*img_handle)->desc_dirty = true; > sctx->bindless_descriptors_dirty = true; > > radeon_add_to_buffer_list_check_mem( > @@ -1918,11 +1926,19 @@ static void si_invalidate_buffer(struct pipe_context > *ctx, struct pipe_resource > } > > static void si_upload_bindless_descriptor(struct si_context *sctx, > - struct si_bindless_descriptor *desc) > + unsigned desc_slot, > + unsigned num_dwords) > { > + struct si_descriptors *desc = &sctx->bindless_descriptors; > struct radeon_winsys_cs *cs = sctx->b.gfx.cs; > - uint64_t va = desc->buffer->gpu_address + desc->offset; > - unsigned num_dwords = sizeof(desc->desc_list) / 4; > + unsigned desc_slot_offset = desc_slot * 16; > + uint32_t *data; > + uint64_t va; > + > + data = desc->list + desc_slot_offset; > + > + va = desc->buffer->gpu_address + desc->buffer_offset + > + desc_slot_offset * 4; > > radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0)); > radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) | > @@ -1930,7 +1946,7 @@ static void si_upload_bindless_descriptor(struct > si_context *sctx, > S_370_ENGINE_SEL(V_370_ME)); > radeon_emit(cs, va); > radeon_emit(cs, va >> 32); > - radeon_emit_array(cs, desc->desc_list, num_dwords); > + radeon_emit_array(cs, data, num_dwords); > } > > static void si_upload_bindless_descriptors(struct si_context *sctx) > @@ -1947,24 +1963,24 @@ static void si_upload_bindless_descriptors(struct > si_context *sctx) > > util_dynarray_foreach(&sctx->resident_tex_handles, > struct si_texture_handle *, tex_handle) { > - struct si_bindless_descriptor *desc = (*tex_handle)->desc; > + unsigned desc_slot = (*tex_handle)->desc_slot; > > - if (!desc->dirty) > + if (!(*tex_handle)->desc_dirty) > continue; > > - si_upload_bindless_descriptor(sctx, desc); > - desc->dirty = false; > + si_upload_bindless_descriptor(sctx, desc_slot, 16); > + (*tex_handle)->desc_dirty = false; > } > > util_dynarray_foreach(&sctx->resident_img_handles, > struct si_image_handle *, img_handle) { > - struct si_bindless_descriptor *desc = (*img_handle)->desc; > + unsigned desc_slot = (*img_handle)->desc_slot; > > - if (!desc->dirty) > + if (!(*img_handle)->desc_dirty) > continue; > > - si_upload_bindless_descriptor(sctx, desc); > - desc->dirty = false; > + si_upload_bindless_descriptor(sctx, desc_slot, 8); > + (*img_handle)->desc_dirty = false; > } > > /* Invalidate L1 because it doesn't know that L2 changed. */ > @@ -1977,9 +1993,11 @@ static void si_upload_bindless_descriptors(struct > si_context *sctx) > /* Update mutable image descriptor fields of all resident textures. */ > static void si_update_all_resident_texture_descriptors(struct si_context > *sctx) > { > + struct si_descriptors *desc = &sctx->bindless_descriptors; > + > util_dynarray_foreach(&sctx->resident_tex_handles, > struct si_texture_handle *, tex_handle) { > - struct si_bindless_descriptor *desc = (*tex_handle)->desc; > + unsigned desc_slot_offset = (*tex_handle)->desc_slot * 16; > struct si_sampler_view *sview = > (struct si_sampler_view *)(*tex_handle)->view; > uint32_t desc_list[16]; > @@ -1987,31 +2005,43 @@ static void > si_update_all_resident_texture_descriptors(struct si_context *sctx) > if (sview->base.texture->target == PIPE_BUFFER) > continue; > > - memcpy(desc_list, desc->desc_list, sizeof(desc_list)); > + /* Store the previous descriptor to only mark it dirty if it > + * has been changed. > + */ > + memcpy(desc_list, desc->list + desc_slot_offset, > + sizeof(desc_list)); > + > si_set_sampler_view_desc(sctx, sview, &(*tex_handle)->sstate, > - &desc->desc_list[0]); > + desc->list + desc_slot_offset); > > - if (memcmp(desc_list, desc->desc_list, sizeof(desc_list))) { > - desc->dirty = true; > + if (memcmp(desc_list, desc->list + desc_slot_offset, > + sizeof(desc_list))) { > + (*tex_handle)->desc_dirty = true; > sctx->bindless_descriptors_dirty = true; > } > } > > util_dynarray_foreach(&sctx->resident_img_handles, > struct si_image_handle *, img_handle) { > - struct si_bindless_descriptor *desc = (*img_handle)->desc; > + unsigned desc_slot_offset = (*img_handle)->desc_slot * 16; > struct pipe_image_view *view = &(*img_handle)->view; > - uint32_t desc_list[16]; > + uint32_t desc_list[8]; > > if (view->resource->target == PIPE_BUFFER) > continue; > > - memcpy(desc_list, desc->desc_list, sizeof(desc_list)); > + /* Store the previous descriptor to only mark it dirty if it > + * has been changed. > + */ > + memcpy(desc_list, desc->list + desc_slot_offset, > + sizeof(desc_list)); > + > si_set_shader_image_desc(sctx, view, true, > - &desc->desc_list[0]); > + desc->list + desc_slot_offset); > > - if (memcmp(desc_list, desc->desc_list, sizeof(desc_list))) { > - desc->dirty = true; > + if (memcmp(desc_list, desc->list + desc_slot_offset, > + sizeof(desc_list))) { > + (*img_handle)->desc_dirty = true; > sctx->bindless_descriptors_dirty = true; > } > } > @@ -2082,6 +2112,8 @@ static void si_shader_userdata_begin_new_cs(struct > si_context *sctx) > { > sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS); > sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != > NULL; > + sctx->graphics_bindless_pointer_dirty = > sctx->bindless_descriptors.buffer != NULL; > + sctx->compute_bindless_pointer_dirty = > sctx->bindless_descriptors.buffer != NULL; > si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); > } > > @@ -2223,6 +2255,12 @@ void si_emit_graphics_shader_userdata(struct > si_context *sctx, > sh_base[PIPE_SHADER_VERTEX]); > sctx->vertex_buffer_pointer_dirty = false; > } > + > + if (sctx->graphics_bindless_pointer_dirty) { > + si_emit_graphics_shader_pointers(sctx, > + &sctx->bindless_descriptors); > + sctx->graphics_bindless_pointer_dirty = false; > + } > } > > void si_emit_compute_shader_userdata(struct si_context *sctx) > @@ -2239,135 +2277,107 @@ void si_emit_compute_shader_userdata(struct > si_context *sctx) > si_emit_shader_pointer(sctx, descs + i, base); > } > sctx->shader_pointers_dirty &= ~compute_mask; > + > + if (sctx->compute_bindless_pointer_dirty) { > + si_emit_shader_pointer(sctx, &sctx->bindless_descriptors, > base); > + sctx->compute_bindless_pointer_dirty = false; > + } > } > > /* BINDLESS */ > > -struct si_bindless_descriptor_slab > +static void si_init_bindless_descriptors(struct si_context *sctx, > + struct si_descriptors *desc, > + unsigned shader_userdata_index, > + unsigned num_elements) > { > - struct pb_slab base; > - struct r600_resource *buffer; > - struct si_bindless_descriptor *entries; > -}; > + si_init_descriptors(sctx, desc, shader_userdata_index, 16, > num_elements, > + 0, 0, NULL); > + sctx->bindless_descriptors.num_active_slots = num_elements; > > -bool si_bindless_descriptor_can_reclaim_slab(void *priv, > - struct pb_slab_entry *entry) > -{ > - /* Do not allow to reclaim any bindless descriptors for now because > the > - * GPU might be using them. This should be improved later on. > + /* The first bindless descriptor is stored at slot 1, because 0 is not > + * considered to be a valid handle. > */ > - return false; > + sctx->num_bindless_descriptors = 1; > } > > -struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap, > - unsigned entry_size, > - unsigned group_index) > +static inline void si_release_bindless_descriptors(struct si_context *sctx) > { > - struct si_context *sctx = priv; > - struct si_screen *sscreen = sctx->screen; > - struct si_bindless_descriptor_slab *slab; > - > - slab = CALLOC_STRUCT(si_bindless_descriptor_slab); > - if (!slab) > - return NULL; > - > - /* Create a buffer in VRAM for 1024 bindless descriptors. */ > - slab->buffer = (struct r600_resource *) > - pipe_buffer_create(&sscreen->b.b, 0, > - PIPE_USAGE_DEFAULT, 64 * 1024); > - if (!slab->buffer) > - goto fail; > - > - slab->base.num_entries = slab->buffer->bo_size / entry_size; > - slab->base.num_free = slab->base.num_entries; > - slab->entries = CALLOC(slab->base.num_entries, > sizeof(*slab->entries)); > - if (!slab->entries) > - goto fail_buffer; > - > - LIST_INITHEAD(&slab->base.free); > - > - for (unsigned i = 0; i < slab->base.num_entries; ++i) { > - struct si_bindless_descriptor *desc = &slab->entries[i]; > - > - desc->entry.slab = &slab->base; > - desc->entry.group_index = group_index; > - desc->buffer = slab->buffer; > - desc->offset = i * entry_size; > - > - LIST_ADDTAIL(&desc->entry.head, &slab->base.free); > - } > - > - /* Add the descriptor to the per-context list. */ > - util_dynarray_append(&sctx->bindless_descriptors, > - struct r600_resource *, slab->buffer); > - > - return &slab->base; > - > -fail_buffer: > - r600_resource_reference(&slab->buffer, NULL); > -fail: > - FREE(slab); > - return NULL; > + si_release_descriptors(&sctx->bindless_descriptors); > } > > -void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab) > -{ > - struct si_context *sctx = priv; > - struct si_bindless_descriptor_slab *slab = > - (struct si_bindless_descriptor_slab *)pslab; > - > - /* Remove the descriptor from the per-context list. */ > - util_dynarray_delete_unordered(&sctx->bindless_descriptors, > - struct r600_resource *, slab->buffer); > - > - r600_resource_reference(&slab->buffer, NULL); > - FREE(slab->entries); > - FREE(slab); > -} > - > -static struct si_bindless_descriptor * > +static unsigned > si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list, > unsigned size) > { > struct si_screen *sscreen = sctx->screen; > - struct si_bindless_descriptor *desc; > - struct pb_slab_entry *entry; > - void *ptr; > + struct si_descriptors *desc = &sctx->bindless_descriptors; > + unsigned desc_slot, desc_slot_offset; > + bool resized = false; > > - /* Sub-allocate the bindless descriptor from a slab to avoid dealing > - * with a ton of buffers and for reducing the winsys overhead. > - */ > - entry = pb_slab_alloc(&sctx->bindless_descriptor_slabs, 64, 0); > - if (!entry) > - return NULL; > + /* Reserve a new slot for this bindless descriptor. */ > + desc_slot = sctx->num_bindless_descriptors++; > > - desc = NULL; > - desc = container_of(entry, desc, entry); > + if (desc_slot >= desc->num_elements) { > + /* The array of bindless descriptors is full, resize it. */ > + unsigned slot_size = desc->element_dw_size * 4; > + unsigned new_num_elements = desc->num_elements * 2; > > - /* Upload the descriptor directly in VRAM. Because the slabs are > - * currently never reclaimed, we don't need to synchronize the > - * operation. > - */ > - ptr = sscreen->b.ws->buffer_map(desc->buffer->buf, NULL, > - PIPE_TRANSFER_WRITE | > - PIPE_TRANSFER_UNSYNCHRONIZED); > - util_memcpy_cpu_to_le32(ptr + desc->offset, desc_list, size); > + desc->list = REALLOC(desc->list, desc->num_elements * > slot_size, > + new_num_elements * slot_size); > + desc->num_elements = new_num_elements; > + desc->num_active_slots = new_num_elements; > + resized = true; > + } > > - /* Keep track of the initial descriptor especially for buffers > - * invalidation because we might need to know the previous address. > + /* For simplicity, sampler and image bindless descriptors use fixed > + * 16-dword slots for now. Image descriptors only need 8-dword but > this > + * doesn't really matter because no real apps use image handles. > */ > - memcpy(desc->desc_list, desc_list, sizeof(desc->desc_list)); > + desc_slot_offset = desc_slot * 16; > + > + /* Copy the descriptor into the array. */ > + memcpy(desc->list + desc_slot_offset, desc_list, size); > + > + if (!desc->buffer || resized) { > + /* Upload the whole array of bindless descriptors if it's new, > + * or re-upload it if the array has been resized. > + */ > + if (!si_upload_descriptors(sctx, desc, > + &sctx->shader_userdata.atom)) > + return 0; > + > + /* Make sure to re-emit the shader pointers for all stages. */ > + sctx->graphics_bindless_pointer_dirty = true; > + sctx->compute_bindless_pointer_dirty = true; > + } else { > + void *ptr; > + > + /* Upload the descriptor directly in VRAM. Because the slot > + * index is new, we don't need to synchronize the operation. > + */ > + ptr = sscreen->b.ws->buffer_map(desc->buffer->buf, NULL, > + PIPE_TRANSFER_WRITE | > + PIPE_TRANSFER_UNSYNCHRONIZED); > + ptr += desc->buffer_offset; > > - return desc; > + util_memcpy_cpu_to_le32(ptr + desc_slot_offset * 4, > + desc->list + desc_slot_offset, size); > + } > + > + return desc_slot; > } > > static void si_invalidate_bindless_buf_desc(struct si_context *sctx, > - struct si_bindless_descriptor > *desc, > + unsigned desc_slot, > struct pipe_resource *resource, > - uint64_t offset) > + uint64_t offset, > + bool *desc_dirty) > { > + struct si_descriptors *desc = &sctx->bindless_descriptors; > struct r600_resource *buf = r600_resource(resource); > - uint32_t *desc_list = desc->desc_list + 4; > + unsigned desc_slot_offset = desc_slot * 16; > + uint32_t *desc_list = desc->list + desc_slot_offset + 4; > uint64_t old_desc_va; > > assert(resource->target == PIPE_BUFFER); > @@ -2382,7 +2392,7 @@ static void si_invalidate_bindless_buf_desc(struct > si_context *sctx, > */ > si_set_buf_desc_address(buf, offset, &desc_list[0]); > > - desc->dirty = true; > + *desc_dirty = true; > sctx->bindless_descriptors_dirty = true; > } > } > @@ -2415,20 +2425,17 @@ static uint64_t si_create_texture_handle(struct > pipe_context *ctx, > memcpy(&tex_handle->sstate, sstate, sizeof(*sstate)); > ctx->delete_sampler_state(ctx, sstate); > > - tex_handle->desc = si_create_bindless_descriptor(sctx, desc_list, > - sizeof(desc_list)); > - if (!tex_handle->desc) { > + tex_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, > + > sizeof(desc_list)); > + if (!tex_handle->desc_slot) { > FREE(tex_handle); > return 0; > } > > - handle = tex_handle->desc->buffer->gpu_address + > - tex_handle->desc->offset; > + handle = tex_handle->desc_slot; > > if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)handle, > tex_handle)) { > - pb_slab_free(&sctx->bindless_descriptor_slabs, > - &tex_handle->desc->entry); > FREE(tex_handle); > return 0; > } > @@ -2454,8 +2461,6 @@ static void si_delete_texture_handle(struct > pipe_context *ctx, uint64_t handle) > > pipe_sampler_view_reference(&tex_handle->view, NULL); > _mesa_hash_table_remove(sctx->tex_handles, entry); > - pb_slab_free(&sctx->bindless_descriptor_slabs, > - &tex_handle->desc->entry); > FREE(tex_handle); > } > > @@ -2497,9 +2502,11 @@ static void si_make_texture_handle_resident(struct > pipe_context *ctx, > p_atomic_read(&rtex->framebuffers_bound)) > sctx->need_check_render_feedback = true; > } else { > - si_invalidate_bindless_buf_desc(sctx, > tex_handle->desc, > + si_invalidate_bindless_buf_desc(sctx, > + tex_handle->desc_slot, > sview->base.texture, > - > sview->base.u.buf.offset); > + > sview->base.u.buf.offset, > + > &tex_handle->desc_dirty); > } > > /* Add the texture handle to the per-context list. */ > @@ -2509,11 +2516,6 @@ static void si_make_texture_handle_resident(struct > pipe_context *ctx, > /* Add the buffers to the current CS in case si_begin_new_cs() > * is not going to be called. > */ > - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, > - tex_handle->desc->buffer, > - RADEON_USAGE_READWRITE, > - RADEON_PRIO_DESCRIPTORS); > - > si_sampler_view_add_buffer(sctx, sview->base.texture, > RADEON_USAGE_READ, > sview->is_stencil_sampler, false); > @@ -2540,7 +2542,7 @@ static uint64_t si_create_image_handle(struct > pipe_context *ctx, > { > struct si_context *sctx = (struct si_context *)ctx; > struct si_image_handle *img_handle; > - uint32_t desc_list[16]; > + uint32_t desc_list[8]; > uint64_t handle; > > if (!view || !view->resource) > @@ -2555,20 +2557,17 @@ static uint64_t si_create_image_handle(struct > pipe_context *ctx, > > si_set_shader_image_desc(sctx, view, false, &desc_list[0]); > > - img_handle->desc = si_create_bindless_descriptor(sctx, desc_list, > - sizeof(desc_list)); > - if (!img_handle->desc) { > + img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, > + > sizeof(desc_list)); > + if (!img_handle->desc_slot) { > FREE(img_handle); > return 0; > } > > - handle = img_handle->desc->buffer->gpu_address + > - img_handle->desc->offset; > + handle = img_handle->desc_slot; > > if (!_mesa_hash_table_insert(sctx->img_handles, (void *)handle, > img_handle)) { > - pb_slab_free(&sctx->bindless_descriptor_slabs, > - &img_handle->desc->entry); > FREE(img_handle); > return 0; > } > @@ -2594,8 +2593,6 @@ static void si_delete_image_handle(struct pipe_context > *ctx, uint64_t handle) > > util_copy_image_view(&img_handle->view, NULL); > _mesa_hash_table_remove(sctx->img_handles, entry); > - pb_slab_free(&sctx->bindless_descriptor_slabs, > - &img_handle->desc->entry); > FREE(img_handle); > } > > @@ -2633,9 +2630,11 @@ static void si_make_image_handle_resident(struct > pipe_context *ctx, > p_atomic_read(&rtex->framebuffers_bound)) > sctx->need_check_render_feedback = true; > } else { > - si_invalidate_bindless_buf_desc(sctx, > img_handle->desc, > + si_invalidate_bindless_buf_desc(sctx, > + img_handle->desc_slot, > view->resource, > - view->u.buf.offset); > + view->u.buf.offset, > + > &img_handle->desc_dirty); > } > > /* Add the image handle to the per-context list. */ > @@ -2645,11 +2644,6 @@ static void si_make_image_handle_resident(struct > pipe_context *ctx, > /* Add the buffers to the current CS in case si_begin_new_cs() > * is not going to be called. > */ > - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, > - img_handle->desc->buffer, > - RADEON_USAGE_READWRITE, > - RADEON_PRIO_DESCRIPTORS); > - > si_sampler_view_add_buffer(sctx, view->resource, > (access & PIPE_IMAGE_ACCESS_WRITE) > ? > RADEON_USAGE_READWRITE : > @@ -2679,20 +2673,6 @@ void si_all_resident_buffers_begin_new_cs(struct > si_context *sctx) > num_resident_img_handles = sctx->resident_img_handles.size / > sizeof(struct si_image_handle *); > > - /* Skip adding the bindless descriptors when no handles are resident. > - */ > - if (!num_resident_tex_handles && !num_resident_img_handles) > - return; > - > - /* Add all bindless descriptors. */ > - util_dynarray_foreach(&sctx->bindless_descriptors, > - struct r600_resource *, desc) { > - > - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, *desc, > - RADEON_USAGE_READWRITE, > - RADEON_PRIO_DESCRIPTORS); > - } > - > /* Add all resident texture handles. */ > util_dynarray_foreach(&sctx->resident_tex_handles, > struct si_texture_handle *, tex_handle) { > @@ -2841,6 +2821,13 @@ void si_init_all_descriptors(struct si_context *sctx) > si_init_descriptors(sctx, &sctx->vertex_buffers, > SI_SGPR_VERTEX_BUFFERS, > 4, SI_NUM_VERTEX_BUFFERS, 0, 0, NULL); > > + /* Initialize an array of 1024 bindless descriptors, when the limit is > + * reached, just make it larger and re-upload the whole array. > + */ > + si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors, > + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, > + 1024); > + > sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS); > sctx->total_ce_ram_allocated = ce_offset; > > @@ -2954,6 +2941,7 @@ void si_release_all_descriptors(struct si_context *sctx) > for (i = 0; i < SI_NUM_DESCS; ++i) > si_release_descriptors(&sctx->descriptors[i]); > si_release_descriptors(&sctx->vertex_buffers); > + si_release_bindless_descriptors(sctx); > } > > void si_all_descriptors_begin_new_cs(struct si_context *sctx) > @@ -2970,6 +2958,7 @@ void si_all_descriptors_begin_new_cs(struct si_context > *sctx) > > for (i = 0; i < SI_NUM_DESCS; ++i) > si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]); > + si_descriptors_begin_new_cs(sctx, &sctx->bindless_descriptors); > > si_shader_userdata_begin_new_cs(sctx); > } > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index 0bc30027ea..dc27af0a92 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -96,9 +96,6 @@ static void si_destroy_context(struct pipe_context *context) > r600_resource_reference(&sctx->last_trace_buf, NULL); > radeon_clear_saved_cs(&sctx->last_gfx); > > - pb_slabs_deinit(&sctx->bindless_descriptor_slabs); > - util_dynarray_fini(&sctx->bindless_descriptors); > - > _mesa_hash_table_destroy(sctx->tex_handles, NULL); > _mesa_hash_table_destroy(sctx->img_handles, NULL); > > @@ -343,15 +340,6 @@ static struct pipe_context *si_create_context(struct > pipe_screen *screen, > > sctx->tm = si_create_llvm_target_machine(sscreen); > > - /* Create a slab allocator for all bindless descriptors. */ > - if (!pb_slabs_init(&sctx->bindless_descriptor_slabs, 6, 6, 1, sctx, > - si_bindless_descriptor_can_reclaim_slab, > - si_bindless_descriptor_slab_alloc, > - si_bindless_descriptor_slab_free)) > - goto fail; > - > - util_dynarray_init(&sctx->bindless_descriptors, NULL); > - > /* Bindless handles. */ > sctx->tex_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer, > _mesa_key_pointer_equal); > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h > b/src/gallium/drivers/radeonsi/si_pipe.h > index c028aba308..cfdf4a5571 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.h > +++ b/src/gallium/drivers/radeonsi/si_pipe.h > @@ -231,25 +231,18 @@ union si_vgt_param_key { > uint32_t index; > }; > > -struct si_bindless_descriptor > -{ > - struct pb_slab_entry entry; > - struct r600_resource *buffer; > - unsigned offset; > - uint32_t desc_list[16]; > - bool dirty; > -}; > - > struct si_texture_handle > { > - struct si_bindless_descriptor *desc; > + unsigned desc_slot; > + bool desc_dirty; > struct pipe_sampler_view *view; > struct si_sampler_state sstate; > }; > > struct si_image_handle > { > - struct si_bindless_descriptor *desc; > + unsigned desc_slot; > + bool desc_dirty; > struct pipe_image_view view; > }; > > @@ -419,12 +412,12 @@ struct si_context { > union si_vgt_param_key ia_multi_vgt_param_key; > unsigned ia_multi_vgt_param[SI_NUM_VGT_PARAM_STATES]; > > - /* Slab allocator for bindless descriptors. */ > - struct pb_slabs bindless_descriptor_slabs; > - > /* Bindless descriptors. */ > - struct util_dynarray bindless_descriptors; > + struct si_descriptors bindless_descriptors; > + unsigned num_bindless_descriptors; > bool bindless_descriptors_dirty; > + bool graphics_bindless_pointer_dirty; > + bool compute_bindless_pointer_dirty; > > /* Allocated bindless handles */ > struct hash_table *tex_handles; > diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > index 3b50ca5341..9a44ab786d 100644 > --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > @@ -213,15 +213,22 @@ image_fetch_rsrc( > } > > if (image->Register.File != TGSI_FILE_IMAGE) { > + /* Bindless descriptors are accessible from a different pair > of > + * user SGPR indices. > + */ > struct gallivm_state *gallivm = &ctx->gallivm; > LLVMBuilderRef builder = gallivm->builder; > > - LLVMValueRef ptr = > - lp_build_emit_fetch_src(bld_base, image, > - TGSI_TYPE_UNSIGNED64, 0); > - rsrc_ptr = LLVMBuildIntToPtr(builder, ptr, > - si_const_array(ctx->v8i32, 0), > ""); > - index = LLVMConstInt(ctx->i32, 0, 0); > + rsrc_ptr = LLVMGetParam(ctx->main_fn, > + > ctx->param_bindless_samplers_and_images); > + index = lp_build_emit_fetch_src(bld_base, image, > + TGSI_TYPE_UNSIGNED, 0); > + > + /* For simplicity, bindless image descriptors use fixed > + * 16-dword slots for now. > + */ > + index = LLVMBuildMul(builder, index, > + LLVMConstInt(ctx->i32, 2, 0), ""); > } > > *rsrc = load_image_desc(ctx, rsrc_ptr, index, target); > @@ -1221,15 +1228,13 @@ static void tex_fetch_ptrs( > } > > if (reg->Register.File != TGSI_FILE_SAMPLER) { > - struct gallivm_state *gallivm = &ctx->gallivm; > - LLVMBuilderRef builder = gallivm->builder; > - > - LLVMValueRef ptr = > - lp_build_emit_fetch_src(bld_base, reg, > - TGSI_TYPE_UNSIGNED64, 0); > - list = LLVMBuildIntToPtr(builder, ptr, > - si_const_array(ctx->v8i32, 0), ""); > - index = LLVMConstInt(ctx->i32, 0, 0); > + /* Bindless descriptors are accessible from a different pair > of > + * user SGPR indices. > + */ > + list = LLVMGetParam(ctx->main_fn, > + ctx->param_bindless_samplers_and_images); > + index = lp_build_emit_fetch_src(bld_base, reg, > + TGSI_TYPE_UNSIGNED, 0); > } > > if (target == TGSI_TEXTURE_BUFFER) > -- > 2.13.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev