From: Marek Olšák <marek.ol...@amd.com>

VBO descriptor code will change a lot one day.
---
 src/gallium/drivers/radeonsi/si_blit.c        |  2 +-
 src/gallium/drivers/radeonsi/si_cp_dma.c      |  5 ++-
 src/gallium/drivers/radeonsi/si_debug.c       | 14 ++++++--
 src/gallium/drivers/radeonsi/si_descriptors.c | 50 +++++++++++++--------------
 src/gallium/drivers/radeonsi/si_hw_context.c  |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.h        |  6 +++-
 6 files changed, 46 insertions(+), 33 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 370ce04..f1c4f6d 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -79,21 +79,21 @@ void si_blitter_begin(struct pipe_context *ctx, enum 
si_blitter_op op)
 
 void si_blitter_end(struct pipe_context *ctx)
 {
        struct si_context *sctx = (struct si_context *)ctx;
 
        sctx->b.render_cond_force_off = false;
 
        /* Restore shader pointers because the VS blit shader changed all
         * non-global VS user SGPRs. */
        sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX);
-       sctx->vertex_buffer_pointer_dirty = true;
+       sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL;
        si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
 }
 
 static unsigned u_max_sample(struct pipe_resource *r)
 {
        return r->nr_samples ? r->nr_samples - 1 : 0;
 }
 
 static unsigned
 si_blit_dbcb_copy(struct si_context *sctx,
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index b665926..15bd305 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -509,23 +509,22 @@ static void cik_prefetch_shader_async(struct si_context 
*sctx,
        assert(state->nbo == 1);
 
        cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
 }
 
 static void cik_prefetch_VBO_descriptors(struct si_context *sctx)
 {
        if (!sctx->vertex_elements)
                return;
 
-       cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
-                                sctx->vertex_buffers.gpu_address -
-                                sctx->vertex_buffers.buffer->gpu_address,
+       cik_prefetch_TC_L2_async(sctx, &sctx->vb_descriptors_buffer->b.b,
+                                sctx->vb_descriptors_offset,
                                 sctx->vertex_elements->desc_list_byte_size);
 }
 
 void cik_emit_prefetch_L2(struct si_context *sctx)
 {
        /* Prefetch shaders and VBO descriptors to TC L2. */
        if (sctx->b.chip_class >= GFX9) {
                /* Choose the right spot for the VBO prefetch. */
                if (sctx->tes_shader.cso) {
                        if (sctx->prefetch_L2_mask & SI_PREFETCH_HS)
diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
b/src/gallium/drivers/radeonsi/si_debug.c
index 385ce39..1f25f4e 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -733,24 +733,34 @@ static void si_dump_descriptors(struct si_context *sctx,
                enabled_constbuf = 
sctx->const_and_shader_buffers[processor].enabled_mask >>
                                   SI_NUM_SHADER_BUFFERS;
                enabled_shaderbuf = 
sctx->const_and_shader_buffers[processor].enabled_mask &
                                    u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
                enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >>
                                    (32 - SI_NUM_SHADER_BUFFERS);
                enabled_samplers = sctx->samplers[processor].enabled_mask;
                enabled_images = sctx->images[processor].enabled_mask;
        }
 
-       if (processor == PIPE_SHADER_VERTEX) {
+       if (processor == PIPE_SHADER_VERTEX &&
+           sctx->vb_descriptors_buffer &&
+           sctx->vb_descriptors_gpu_list &&
+           sctx->vertex_elements) {
                assert(info); /* only CS may not have an info struct */
+               struct si_descriptors desc = {};
 
-               si_dump_descriptor_list(sctx->screen, &sctx->vertex_buffers, 
name,
+               desc.buffer = sctx->vb_descriptors_buffer;
+               desc.list = sctx->vb_descriptors_gpu_list;
+               desc.gpu_list = sctx->vb_descriptors_gpu_list;
+               desc.element_dw_size = 4;
+               desc.num_active_slots = 
sctx->vertex_elements->desc_list_byte_size / 16;
+
+               si_dump_descriptor_list(sctx->screen, &desc, name,
                                        " - Vertex buffer", 4, info->num_inputs,
                                        si_identity, log);
        }
 
        si_dump_descriptor_list(sctx->screen,
                                
&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
                                name, " - Constant buffer", 4,
                                util_last_bit(enabled_constbuf),
                                si_get_constbuf_slot, log);
        si_dump_descriptor_list(sctx->screen,
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 76f2a3e..f6bc3cf 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1040,84 +1040,82 @@ static void si_get_buffer_from_descriptors(struct 
si_buffer_resources *buffers,
 
                assert(va >= res->gpu_address && va + *size <= res->gpu_address 
+ res->bo_size);
                *offset = va - res->gpu_address;
        }
 }
 
 /* VERTEX BUFFERS */
 
 static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
 {
-       struct si_descriptors *desc = &sctx->vertex_buffers;
        int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0;
        int i;
 
        for (i = 0; i < count; i++) {
                int vb = sctx->vertex_elements->vertex_buffer_index[i];
 
                if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
                        continue;
                if (!sctx->vertex_buffer[vb].buffer.resource)
                        continue;
 
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
                                      (struct 
r600_resource*)sctx->vertex_buffer[vb].buffer.resource,
                                      RADEON_USAGE_READ, 
RADEON_PRIO_VERTEX_BUFFER);
        }
 
-       if (!desc->buffer)
+       if (!sctx->vb_descriptors_buffer)
                return;
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
-                             desc->buffer, RADEON_USAGE_READ,
-                             RADEON_PRIO_DESCRIPTORS);
+                                 sctx->vb_descriptors_buffer, 
RADEON_USAGE_READ,
+                                 RADEON_PRIO_DESCRIPTORS);
 }
 
 bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 {
        struct si_vertex_elements *velems = sctx->vertex_elements;
-       struct si_descriptors *desc = &sctx->vertex_buffers;
        unsigned i, count;
        unsigned desc_list_byte_size;
        unsigned first_vb_use_mask;
        uint32_t *ptr;
 
        if (!sctx->vertex_buffers_dirty || !velems)
                return true;
 
        count = velems->count;
 
        if (!count)
                return true;
 
        desc_list_byte_size = velems->desc_list_byte_size;
        first_vb_use_mask = velems->first_vb_use_mask;
 
        /* Vertex buffer descriptors are the only ones which are uploaded
         * directly through a staging buffer and don't go through
         * the fine-grained upload path.
         */
-       unsigned buffer_offset = 0;
        u_upload_alloc(sctx->b.b.const_uploader, 0,
                       desc_list_byte_size,
                       si_optimal_tcc_alignment(sctx, desc_list_byte_size),
-                      &buffer_offset,
-                      (struct pipe_resource**)&desc->buffer, (void**)&ptr);
-       if (!desc->buffer) {
-               desc->gpu_address = 0;
+                      &sctx->vb_descriptors_offset,
+                      (struct pipe_resource**)&sctx->vb_descriptors_buffer,
+                      (void**)&ptr);
+       if (!sctx->vb_descriptors_buffer) {
+               sctx->vb_descriptors_offset = 0;
+               sctx->vb_descriptors_gpu_list = NULL;
                return false;
        }
 
-       desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
-       desc->list = ptr;
+       sctx->vb_descriptors_gpu_list = ptr;
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
-                             desc->buffer, RADEON_USAGE_READ,
-                             RADEON_PRIO_DESCRIPTORS);
+                                 sctx->vb_descriptors_buffer, 
RADEON_USAGE_READ,
+                                 RADEON_PRIO_DESCRIPTORS);
 
        assert(count <= SI_MAX_ATTRIBS);
 
        for (i = 0; i < count; i++) {
                struct pipe_vertex_buffer *vb;
                struct r600_resource *rbuffer;
                unsigned vbo_index = velems->vertex_buffer_index[i];
                uint32_t *desc = &ptr[i*4];
 
                vb = &sctx->vertex_buffer[vbo_index];
@@ -1980,29 +1978,29 @@ void si_update_all_texture_descriptors(struct 
si_context *sctx)
 /* SHADER USER DATA */
 
 static void si_mark_shader_pointers_dirty(struct si_context *sctx,
                                          unsigned shader)
 {
        sctx->shader_pointers_dirty |=
                u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * 
SI_NUM_SHADER_DESCS,
                                  SI_NUM_SHADER_DESCS);
 
        if (shader == PIPE_SHADER_VERTEX)
-               sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer 
!= NULL;
+               sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer 
!= NULL;
 
        si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
 }
 
 static void si_shader_pointers_begin_new_cs(struct si_context *sctx)
 {
        sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
-       sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
+       sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL;
        si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
        sctx->graphics_bindless_pointer_dirty = 
sctx->bindless_descriptors.buffer != NULL;
        sctx->compute_bindless_pointer_dirty = 
sctx->bindless_descriptors.buffer != NULL;
 }
 
 /* Set a base register address for user data constants in the given shader.
  * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
  */
 static void si_set_user_data_base(struct si_context *sctx,
                                  unsigned shader, uint32_t new_base)
@@ -2182,22 +2180,28 @@ void si_emit_graphics_shader_pointers(struct si_context 
*sctx,
                si_emit_disjoint_shader_pointers(sctx, 
SI_DESCS_SHADER_MASK(TESS_CTRL),
                                                 
sh_base[PIPE_SHADER_TESS_CTRL]);
                si_emit_disjoint_shader_pointers(sctx, 
SI_DESCS_SHADER_MASK(GEOMETRY),
                                                 sh_base[PIPE_SHADER_GEOMETRY]);
        }
 
        sctx->shader_pointers_dirty &=
                ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
 
        if (sctx->vertex_buffer_pointer_dirty) {
-               si_emit_shader_pointer(sctx, &sctx->vertex_buffers,
-                                      sh_base[PIPE_SHADER_VERTEX]);
+               struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+               unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] +
+                                    SI_SGPR_VERTEX_BUFFERS * 4;
+
+               si_emit_shader_pointer_head(cs, sh_offset, 1);
+               si_emit_shader_pointer_body(sctx->screen, cs,
+                                           
sctx->vb_descriptors_buffer->gpu_address +
+                                           sctx->vb_descriptors_offset);
                sctx->vertex_buffer_pointer_dirty = false;
        }
 
        if (sctx->graphics_bindless_pointer_dirty) {
                si_emit_global_shader_pointers(sctx,
                                               &sctx->bindless_descriptors);
                sctx->graphics_bindless_pointer_dirty = false;
        }
 }
 
@@ -2719,25 +2723,20 @@ void si_init_all_descriptors(struct si_context *sctx)
 
        si_init_buffer_resources(&sctx->rw_buffers,
                                 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
                                 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
                                 /* The second set of usage/priority is used by
                                  * const buffers in RW buffer slots. */
                                 RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
                                 RADEON_PRIO_SHADER_RINGS, 
RADEON_PRIO_CONST_BUFFER);
        sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = 
SI_NUM_RW_BUFFERS;
 
-       si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
-                           4, SI_NUM_VERTEX_BUFFERS);
-       FREE(sctx->vertex_buffers.list); /* not used */
-       sctx->vertex_buffers.list = NULL;
-
        /* Initialize an array of 1024 bindless descriptors, when the limit is
         * reached, just make it larger and re-upload the whole array.
         */
        si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
                                     SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
                                     1024);
 
        sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
 
        /* Set pipe_context functions. */
@@ -2825,22 +2824,23 @@ void si_release_all_descriptors(struct si_context *sctx)
                si_release_image_views(&sctx->images[i]);
        }
        si_release_buffer_resources(&sctx->rw_buffers,
                                    &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
        for (i = 0; i < SI_NUM_VERTEX_BUFFERS; i++)
                pipe_vertex_buffer_unreference(&sctx->vertex_buffer[i]);
 
        for (i = 0; i < SI_NUM_DESCS; ++i)
                si_release_descriptors(&sctx->descriptors[i]);
 
-       sctx->vertex_buffers.list = NULL; /* points into a mapped buffer */
-       si_release_descriptors(&sctx->vertex_buffers);
+       r600_resource_reference(&sctx->vb_descriptors_buffer, NULL);
+       sctx->vb_descriptors_gpu_list = NULL; /* points into a mapped buffer */
+
        si_release_bindless_descriptors(sctx);
 }
 
 void si_all_descriptors_begin_new_cs(struct si_context *sctx)
 {
        int i;
 
        for (i = 0; i < SI_NUM_SHADERS; i++) {
                si_buffer_resources_begin_new_cs(sctx, 
&sctx->const_and_shader_buffers[i]);
                si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i]);
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c 
b/src/gallium/drivers/radeonsi/si_hw_context.c
index 3823be0..61c8d70 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -209,21 +209,21 @@ void si_begin_new_cs(struct si_context *ctx)
        if (ctx->queued.named.hs)
                ctx->prefetch_L2_mask |= SI_PREFETCH_HS;
        if (ctx->queued.named.es)
                ctx->prefetch_L2_mask |= SI_PREFETCH_ES;
        if (ctx->queued.named.gs)
                ctx->prefetch_L2_mask |= SI_PREFETCH_GS;
        if (ctx->queued.named.vs)
                ctx->prefetch_L2_mask |= SI_PREFETCH_VS;
        if (ctx->queued.named.ps)
                ctx->prefetch_L2_mask |= SI_PREFETCH_PS;
-       if (ctx->vertex_buffers.buffer && ctx->vertex_elements)
+       if (ctx->vb_descriptors_buffer && ctx->vertex_elements)
                ctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
 
        /* CLEAR_STATE disables all colorbuffers, so only enable bound ones. */
        bool has_clear_state = ctx->screen->has_clear_state;
        if (has_clear_state) {
                ctx->framebuffer.dirty_cbufs =
                         u_bit_consecutive(0, ctx->framebuffer.state.nr_cbufs);
                /* CLEAR_STATE disables the zbuffer, so only enable it if it's 
bound. */
                ctx->framebuffer.dirty_zsbuf = ctx->framebuffer.state.zsbuf != 
NULL;
        } else {
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 896b640..2053dcb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -490,22 +490,26 @@ struct si_context {
        struct si_shader_ctx_state      tcs_shader;
        struct si_shader_ctx_state      tes_shader;
        struct si_cs_shader_state       cs_shader_state;
 
        /* shader information */
        struct si_vertex_elements       *vertex_elements;
        unsigned                        sprite_coord_enable;
        bool                            flatshade;
        bool                            do_update_shaders;
 
+       /* vertex buffer descriptors */
+       uint32_t *vb_descriptors_gpu_list;
+       struct r600_resource *vb_descriptors_buffer;
+       unsigned vb_descriptors_offset;
+
        /* shader descriptors */
-       struct si_descriptors           vertex_buffers;
        struct si_descriptors           descriptors[SI_NUM_DESCS];
        unsigned                        descriptors_dirty;
        unsigned                        shader_pointers_dirty;
        unsigned                        shader_needs_decompress_mask;
        struct si_buffer_resources      rw_buffers;
        struct si_buffer_resources      
const_and_shader_buffers[SI_NUM_SHADERS];
        struct si_samplers              samplers[SI_NUM_SHADERS];
        struct si_images                images[SI_NUM_SHADERS];
 
        /* other shader resources */
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to