From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeonsi/si_cp_dma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index b40f5cc..0cf7b3b 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -190,34 +190,35 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, if (!size) return; /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(&rdst->valid_buffer_range, offset, offset + size); /* Fallback for unaligned clears. */ - if (offset % 4 != 0 || size % 4 != 0) { + if (size % 4 != 0) { uint8_t *map = r600_buffer_map_sync_with_rings(&sctx->b, rdst, PIPE_TRANSFER_WRITE); map += offset; for (uint64_t i = 0; i < size; i++) { unsigned byte_within_dword = (offset + i) % 4; *map++ = (value >> (byte_within_dword * 8)) & 0xff; } return; } /* dma_clear_buffer can use clear_buffer on failure. Make sure that * doesn't happen. We don't want an infinite recursion: */ if (sctx->b.dma.cs && + (offset % 4 == 0) && /* CP DMA is very slow. Always use SDMA for big clears. This * alone improves DeusEx:MD performance by 70%. */ (size > 128 * 1024 || /* Buffers not used by the GFX IB yet will be cleared by SDMA. * This happens to move most buffer clears to SDMA, including * DCC and CMASK clears, because pipe->clear clears them before * si_emit_framebuffer_state (in a draw call) adds them. * For example, DeusEx:MD has 21 buffer clears per frame and all * of them are moved to SDMA thanks to this. */ !ws->cs_is_buffer_referenced(sctx->b.gfx.cs, rdst->buf, -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev