From: Marek Olšák <marek.ol...@amd.com> The improvement is +3.5%, not much. --- src/gallium/drivers/radeonsi/si_pipe.h | 9 +++++---- src/gallium/drivers/radeonsi/si_state.c | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 2053dcb..6f5939b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -281,24 +281,25 @@ struct si_framebuffer { unsigned spi_shader_col_format; unsigned spi_shader_col_format_alpha; unsigned spi_shader_col_format_blend; unsigned spi_shader_col_format_blend_alpha; ubyte nr_samples:5; /* at most 16xAA */ ubyte log_samples:3; /* at most 4 = 16xAA */ ubyte compressed_cb_mask; ubyte color_is_int8; ubyte color_is_int10; ubyte dirty_cbufs; - bool dirty_zsbuf; - bool any_dst_linear; - bool CB_has_shader_readable_metadata; - bool DB_has_shader_readable_metadata; + bool dirty_zsbuf:1; + bool any_dst_linear:1; + bool blitting_to_gart:1; + bool CB_has_shader_readable_metadata:1; + bool DB_has_shader_readable_metadata:1; }; struct si_signed_scissor { int minx; int miny; int maxx; int maxy; }; struct si_scissors { diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index aae7332..b0bd11d 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2824,20 +2824,25 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, sctx->framebuffer.spi_shader_col_format_alpha = 0; sctx->framebuffer.spi_shader_col_format_blend = 0; sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; sctx->framebuffer.color_is_int8 = 0; sctx->framebuffer.color_is_int10 = 0; sctx->framebuffer.compressed_cb_mask = 0; sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); sctx->framebuffer.any_dst_linear = false; + /* This will be set to false later if any color buffer is not in GART. */ + sctx->framebuffer.blitting_to_gart = sctx->blitter->running && + !sctx->blitter->leaving && + state->nr_cbufs && + state->cbufs[0]; sctx->framebuffer.CB_has_shader_readable_metadata = false; sctx->framebuffer.DB_has_shader_readable_metadata = false; for (i = 0; i < state->nr_cbufs; i++) { if (!state->cbufs[i]) continue; surf = (struct r600_surface*)state->cbufs[i]; rtex = (struct r600_texture*)surf->base.texture; @@ -2860,20 +2865,23 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, if (surf->color_is_int10) sctx->framebuffer.color_is_int10 |= 1 << i; if (rtex->fmask.size) { sctx->framebuffer.compressed_cb_mask |= 1 << i; } if (rtex->surface.is_linear) sctx->framebuffer.any_dst_linear = true; + if (rtex->resource.domains & RADEON_DOMAIN_VRAM) + sctx->framebuffer.blitting_to_gart = false; + if (vi_dcc_enabled(rtex, surf->base.u.tex.level)) sctx->framebuffer.CB_has_shader_readable_metadata = true; si_context_add_resource_size(ctx, surf->base.texture); p_atomic_inc(&rtex->framebuffers_bound); if (rtex->dcc_gather_statistics) { /* Dirty tracking must be enabled for DCC usage analysis. */ sctx->framebuffer.compressed_cb_mask |= 1 << i; @@ -3183,20 +3191,34 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */ radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */ } /* Framebuffer dimensions. */ /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); + if (sctx->b.chip_class >= GFX9 && + sctx->screen->info.has_dedicated_vram) { + /* For copies to GART, it is faster (although very unintuitive) + * to disable all but one RB. If all RBs were banging away on + * the PCIE bus, it would produce more traffic than the write- + * combiner can efficiently handle. + */ + radeon_set_context_reg(cs, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, + sctx->framebuffer.blitting_to_gart ? + S_02835C_ENABLE(1) | + S_02835C_NUM_SE(1) | + S_02835C_NUM_RB_PER_SE(1) : 0); + } + if (sctx->screen->dfsm_allowed) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); } sctx->framebuffer.dirty_cbufs = 0; sctx->framebuffer.dirty_zsbuf = false; } static void si_emit_msaa_sample_locs(struct si_context *sctx, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev