From: Marek Olšák <marek.ol...@amd.com>

The improvement is +3.5%, not much.
---
 src/gallium/drivers/radeonsi/si_pipe.h  |  9 +++++----
 src/gallium/drivers/radeonsi/si_state.c | 22 ++++++++++++++++++++++
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 2053dcb..6f5939b 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -281,24 +281,25 @@ struct si_framebuffer {
        unsigned                        spi_shader_col_format;
        unsigned                        spi_shader_col_format_alpha;
        unsigned                        spi_shader_col_format_blend;
        unsigned                        spi_shader_col_format_blend_alpha;
        ubyte                           nr_samples:5; /* at most 16xAA */
        ubyte                           log_samples:3; /* at most 4 = 16xAA */
        ubyte                           compressed_cb_mask;
        ubyte                           color_is_int8;
        ubyte                           color_is_int10;
        ubyte                           dirty_cbufs;
-       bool                            dirty_zsbuf;
-       bool                            any_dst_linear;
-       bool                            CB_has_shader_readable_metadata;
-       bool                            DB_has_shader_readable_metadata;
+       bool                            dirty_zsbuf:1;
+       bool                            any_dst_linear:1;
+       bool                            blitting_to_gart:1;
+       bool                            CB_has_shader_readable_metadata:1;
+       bool                            DB_has_shader_readable_metadata:1;
 };
 
 struct si_signed_scissor {
        int minx;
        int miny;
        int maxx;
        int maxy;
 };
 
 struct si_scissors {
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index aae7332..b0bd11d 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2824,20 +2824,25 @@ static void si_set_framebuffer_state(struct 
pipe_context *ctx,
        sctx->framebuffer.spi_shader_col_format_alpha = 0;
        sctx->framebuffer.spi_shader_col_format_blend = 0;
        sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
        sctx->framebuffer.color_is_int8 = 0;
        sctx->framebuffer.color_is_int10 = 0;
 
        sctx->framebuffer.compressed_cb_mask = 0;
        sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
        sctx->framebuffer.log_samples = 
util_logbase2(sctx->framebuffer.nr_samples);
        sctx->framebuffer.any_dst_linear = false;
+       /* This will be set to false later if any color buffer is not in GART. 
*/
+       sctx->framebuffer.blitting_to_gart = sctx->blitter->running &&
+                                            !sctx->blitter->leaving &&
+                                            state->nr_cbufs &&
+                                            state->cbufs[0];
        sctx->framebuffer.CB_has_shader_readable_metadata = false;
        sctx->framebuffer.DB_has_shader_readable_metadata = false;
 
        for (i = 0; i < state->nr_cbufs; i++) {
                if (!state->cbufs[i])
                        continue;
 
                surf = (struct r600_surface*)state->cbufs[i];
                rtex = (struct r600_texture*)surf->base.texture;
 
@@ -2860,20 +2865,23 @@ static void si_set_framebuffer_state(struct 
pipe_context *ctx,
                if (surf->color_is_int10)
                        sctx->framebuffer.color_is_int10 |= 1 << i;
 
                if (rtex->fmask.size) {
                        sctx->framebuffer.compressed_cb_mask |= 1 << i;
                }
 
                if (rtex->surface.is_linear)
                        sctx->framebuffer.any_dst_linear = true;
 
+               if (rtex->resource.domains & RADEON_DOMAIN_VRAM)
+                       sctx->framebuffer.blitting_to_gart = false;
+
                if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
                        sctx->framebuffer.CB_has_shader_readable_metadata = 
true;
 
                si_context_add_resource_size(ctx, surf->base.texture);
 
                p_atomic_inc(&rtex->framebuffers_bound);
 
                if (rtex->dcc_gather_statistics) {
                        /* Dirty tracking must be enabled for DCC usage 
analysis. */
                        sctx->framebuffer.compressed_cb_mask |= 1 << i;
@@ -3183,20 +3191,34 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom
 
                radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* 
DB_Z_INFO */
                radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* 
DB_STENCIL_INFO */
        }
 
        /* Framebuffer dimensions. */
         /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
        radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
                               S_028208_BR_X(state->width) | 
S_028208_BR_Y(state->height));
 
+       if (sctx->b.chip_class >= GFX9 &&
+           sctx->screen->info.has_dedicated_vram) {
+               /* For copies to GART, it is faster (although very unintuitive)
+                * to disable all but one RB. If all RBs were banging away on
+                * the PCIE bus, it would produce more traffic than the write-
+                * combiner can efficiently handle.
+                */
+               radeon_set_context_reg(cs, 
R_02835C_PA_SC_TILE_STEERING_OVERRIDE,
+                                      sctx->framebuffer.blitting_to_gart ?
+                                              S_02835C_ENABLE(1) |
+                                              S_02835C_NUM_SE(1) |
+                                              S_02835C_NUM_RB_PER_SE(1) : 0);
+       }
+
        if (sctx->screen->dfsm_allowed) {
                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
                radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | 
EVENT_INDEX(0));
        }
 
        sctx->framebuffer.dirty_cbufs = 0;
        sctx->framebuffer.dirty_zsbuf = false;
 }
 
 static void si_emit_msaa_sample_locs(struct si_context *sctx,
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to