Module: Mesa
Branch: master
Commit: 9a1363427ea3300d2ff9ef5ec0cc2ffbee22cffe
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9a1363427ea3300d2ff9ef5ec0cc2ffbee22cffe

Author: Marek Olšák <marek.ol...@amd.com>
Date:   Mon Apr  2 21:08:05 2018 -0400

radeonsi: always prefetch later shaders after the draw packet

so that the draw is started as soon as possible.

v2: only prefetch the API VS and VBO descriptors

Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Tested-by: Dieter Nützel <die...@nuetzel-hh.de>

---

 src/gallium/drivers/radeonsi/si_cp_dma.c     | 89 +++++++++++++++++++++-------
 src/gallium/drivers/radeonsi/si_pipe.h       |  2 +-
 src/gallium/drivers/radeonsi/si_state_draw.c | 10 +++-
 3 files changed, 75 insertions(+), 26 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index e2d261d7e0..358b33c4eb 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -520,67 +520,110 @@ static void cik_prefetch_VBO_descriptors(struct 
si_context *sctx)
                                 sctx->vertex_elements->desc_list_byte_size);
 }
 
-void cik_emit_prefetch_L2(struct si_context *sctx)
+/**
+ * Prefetch shaders and VBO descriptors.
+ *
+ * \param vertex_stage_only  Whether only the the API VS and VBO descriptors
+ *                           should be prefetched.
+ */
+void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only)
 {
+       unsigned mask = sctx->prefetch_L2_mask;
+       assert(mask);
+
        /* Prefetch shaders and VBO descriptors to TC L2. */
        if (sctx->chip_class >= GFX9) {
                /* Choose the right spot for the VBO prefetch. */
                if (sctx->tes_shader.cso) {
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_HS)
+                       if (mask & SI_PREFETCH_HS)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.hs);
-                       if (sctx->prefetch_L2_mask & 
SI_PREFETCH_VBO_DESCRIPTORS)
+                       if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
                                cik_prefetch_VBO_descriptors(sctx);
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
+                       if (vertex_stage_only) {
+                               sctx->prefetch_L2_mask &= ~(SI_PREFETCH_HS |
+                                                           
SI_PREFETCH_VBO_DESCRIPTORS);
+                               return;
+                       }
+
+                       if (mask & SI_PREFETCH_GS)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.gs);
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+                       if (mask & SI_PREFETCH_VS)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.vs);
                } else if (sctx->gs_shader.cso) {
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
+                       if (mask & SI_PREFETCH_GS)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.gs);
-                       if (sctx->prefetch_L2_mask & 
SI_PREFETCH_VBO_DESCRIPTORS)
+                       if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
                                cik_prefetch_VBO_descriptors(sctx);
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+                       if (vertex_stage_only) {
+                               sctx->prefetch_L2_mask &= ~(SI_PREFETCH_GS |
+                                                           
SI_PREFETCH_VBO_DESCRIPTORS);
+                               return;
+                       }
+
+                       if (mask & SI_PREFETCH_VS)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.vs);
                } else {
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+                       if (mask & SI_PREFETCH_VS)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.vs);
-                       if (sctx->prefetch_L2_mask & 
SI_PREFETCH_VBO_DESCRIPTORS)
+                       if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
                                cik_prefetch_VBO_descriptors(sctx);
+                       if (vertex_stage_only) {
+                               sctx->prefetch_L2_mask &= ~(SI_PREFETCH_VS |
+                                                           
SI_PREFETCH_VBO_DESCRIPTORS);
+                               return;
+                       }
                }
        } else {
                /* SI-CI-VI */
                /* Choose the right spot for the VBO prefetch. */
                if (sctx->tes_shader.cso) {
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_LS)
+                       if (mask & SI_PREFETCH_LS)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.ls);
-                       if (sctx->prefetch_L2_mask & 
SI_PREFETCH_VBO_DESCRIPTORS)
+                       if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
                                cik_prefetch_VBO_descriptors(sctx);
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_HS)
+                       if (vertex_stage_only) {
+                               sctx->prefetch_L2_mask &= ~(SI_PREFETCH_LS |
+                                                           
SI_PREFETCH_VBO_DESCRIPTORS);
+                               return;
+                       }
+
+                       if (mask & SI_PREFETCH_HS)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.hs);
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_ES)
+                       if (mask & SI_PREFETCH_ES)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.es);
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
+                       if (mask & SI_PREFETCH_GS)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.gs);
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+                       if (mask & SI_PREFETCH_VS)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.vs);
                } else if (sctx->gs_shader.cso) {
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_ES)
+                       if (mask & SI_PREFETCH_ES)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.es);
-                       if (sctx->prefetch_L2_mask & 
SI_PREFETCH_VBO_DESCRIPTORS)
+                       if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
                                cik_prefetch_VBO_descriptors(sctx);
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
+                       if (vertex_stage_only) {
+                               sctx->prefetch_L2_mask &= ~(SI_PREFETCH_ES |
+                                                           
SI_PREFETCH_VBO_DESCRIPTORS);
+                               return;
+                       }
+
+                       if (mask & SI_PREFETCH_GS)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.gs);
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+                       if (mask & SI_PREFETCH_VS)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.vs);
                } else {
-                       if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+                       if (mask & SI_PREFETCH_VS)
                                cik_prefetch_shader_async(sctx, 
sctx->queued.named.vs);
-                       if (sctx->prefetch_L2_mask & 
SI_PREFETCH_VBO_DESCRIPTORS)
+                       if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
                                cik_prefetch_VBO_descriptors(sctx);
+                       if (vertex_stage_only) {
+                               sctx->prefetch_L2_mask &= ~(SI_PREFETCH_VS |
+                                                           
SI_PREFETCH_VBO_DESCRIPTORS);
+                               return;
+                       }
                }
        }
 
-       if (sctx->prefetch_L2_mask & SI_PREFETCH_PS)
+       if (mask & SI_PREFETCH_PS)
                cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
 
        sctx->prefetch_L2_mask = 0;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index e3e5d5ac91..c7ad5366a6 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -911,7 +911,7 @@ void si_copy_buffer(struct si_context *sctx,
                    unsigned user_flags);
 void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource 
*buf,
                              uint64_t offset, unsigned size);
-void cik_emit_prefetch_L2(struct si_context *sctx);
+void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only);
 void si_init_cp_dma_functions(struct si_context *sctx);
 
 /* si_debug.c */
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index f8d52cbc98..96dfd93645 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1456,7 +1456,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
                 * in parallel, but starting the draw first is more important.
                 */
                if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
-                       cik_emit_prefetch_L2(sctx);
+                       cik_emit_prefetch_L2(sctx, false);
        } else {
                /* If we don't wait for idle, start prefetches first, then set
                 * states, and draw at the end.
@@ -1464,14 +1464,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
                if (sctx->flags)
                        si_emit_cache_flush(sctx);
 
+               /* Only prefetch the API VS and VBO descriptors. */
                if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
-                       cik_emit_prefetch_L2(sctx);
+                       cik_emit_prefetch_L2(sctx, true);
 
                if (!si_upload_graphics_shader_descriptors(sctx))
                        return;
 
                si_emit_all_states(sctx, info, 0);
                si_emit_draw_packets(sctx, info, indexbuf, index_size, 
index_offset);
+
+               /* Prefetch the remaining shaders after the draw has been
+                * started. */
+               if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
+                       cik_emit_prefetch_L2(sctx, false);
        }
 
        if (unlikely(sctx->current_saved_cs)) {

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to