That's a nice improvement. For the series:

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 11.10.2016 16:48, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeonsi/si_compute.c    |  2 +-
 src/gallium/drivers/radeonsi/si_state.c      | 12 +++---------
 src/gallium/drivers/radeonsi/si_state_draw.c |  6 +++---
 3 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 632839f..e785106 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -694,21 +694,21 @@ static void si_launch_grid(

        /* Add buffer sizes for memory checking in need_cs_space. */
        r600_context_add_resource_size(ctx, &program->shader.bo->b.b);
        /* TODO: add the scratch buffer */

        if (info->indirect) {
                r600_context_add_resource_size(ctx, info->indirect);

                /* The hw doesn't read the indirect buffer via TC L2. */
                if (r600_resource(info->indirect)->TC_L2_dirty) {
-                       sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+                       sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
                        r600_resource(info->indirect)->TC_L2_dirty = false;
                }
        }

        si_need_cs_space(sctx);

        if (!sctx->cs_shader_state.initialized)
                si_initialize_compute(sctx);

        if (sctx->b.flags)
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 34f3ed7..ad65fc2 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3390,35 +3390,29 @@ static void si_memory_barrier(struct pipe_context *ctx, 
unsigned flags)
                 * automatically at end of shader, but the contents of other
                 * L1 caches might still be stale. */
                sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
        }

        if (flags & PIPE_BARRIER_INDEX_BUFFER) {
                /* Indices are read through TC L2 since VI.
                 * L1 isn't used.
                 */
                if (sctx->screen->b.chip_class <= CIK)
-                       sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+                       sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
        }

        if (flags & PIPE_BARRIER_FRAMEBUFFER)
                sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;

        if (flags & (PIPE_BARRIER_FRAMEBUFFER |
-                    PIPE_BARRIER_INDIRECT_BUFFER)) {
-               /* Not sure if INV_GLOBAL_L2 is the best thing here.
-                *
-                * We need to make sure that TC L1 & L2 are written back to
-                * memory, because CB fetches don't consider TC, but there's
-                * no need to invalidate any TC cache lines. */
-               sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
-       }
+                    PIPE_BARRIER_INDIRECT_BUFFER))
+               sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 }

 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
 {
        struct pipe_blend_state blend;

        memset(&blend, 0, sizeof(blend));
        blend.independent_blend_enable = true;
        blend.rt[0].colormask = 0xf;
        return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 33b6b23..c14e852 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1040,32 +1040,32 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
                        if (!ib.buffer)
                                return;
                        /* info->start will be added by the drawing code */
                        ib.offset -= start_offset;
                }
        }

        /* VI reads index buffers through TC L2. */
        if (info->indexed && sctx->b.chip_class <= CIK &&
            r600_resource(ib.buffer)->TC_L2_dirty) {
-               sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+               sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
                r600_resource(ib.buffer)->TC_L2_dirty = false;
        }

        if (info->indirect && r600_resource(info->indirect)->TC_L2_dirty) {
-               sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+               sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
                r600_resource(info->indirect)->TC_L2_dirty = false;
        }

        if (info->indirect_params &&
            r600_resource(info->indirect_params)->TC_L2_dirty) {
-               sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+               sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
                r600_resource(info->indirect_params)->TC_L2_dirty = false;
        }

        /* Add buffer sizes for memory checking in need_cs_space. */
        if (sctx->emit_scratch_reloc && sctx->scratch_buffer)
                r600_context_add_resource_size(ctx, &sctx->scratch_buffer->b.b);
        if (info->indirect)
                r600_context_add_resource_size(ctx, info->indirect);

        si_need_cs_space(sctx);

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to