From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_buffer.c | 30 ++++++++++++++++++++---- src/gallium/drivers/radeonsi/si_pipe.c | 2 ++ src/gallium/drivers/radeonsi/si_pipe.h | 8 +++++++ 3 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index a1e421b8b0d..1d4387252a0 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -443,32 +443,47 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx, PIPE_TRANSFER_PERSISTENT))) || (rbuffer->flags & RADEON_FLAG_SPARSE))) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (rbuffer->flags & RADEON_FLAG_SPARSE || force_discard_range || si_rings_is_buffer_referenced(sctx, rbuffer->buf, RADEON_USAGE_READWRITE) || !sctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) { + unsigned alloc_start = box->x % SI_MAP_BUFFER_ALIGNMENT; + unsigned alloc_size = alloc_start + box->width; + + /* Use PKT3_WRITE_DATA for small uploads. */ + if (box->width <= SI_TRANSFER_WRITE_DATA_THRESHOLD && + box->x % 4 == 0 && box->width % 4 == 0) { + void *cpu_map = u_cpu_suballoc(&sctx->cpu_suballoc, alloc_size, + SI_MAP_BUFFER_ALIGNMENT); + cpu_map = (char*)cpu_map + alloc_start; + + return si_buffer_get_transfer(ctx, resource, usage, box, + ptransfer, cpu_map, cpu_map, + SI_TRANSFER_SPECIAL_OFFSET_USE_CPU_ALLOC); + } + /* Do a wait-free write-only transfer using a temporary buffer. */ unsigned offset; struct r600_resource *staging = NULL; u_upload_alloc(ctx->stream_uploader, 0, - box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT), + alloc_size, sctx->screen->info.tcc_cache_line_size, &offset, (struct pipe_resource**)&staging, (void**)&data); if (staging) { - data += box->x % SI_MAP_BUFFER_ALIGNMENT; + data += alloc_start; return si_buffer_get_transfer(ctx, resource, usage, box, ptransfer, data, staging, offset); } else if (rbuffer->flags & RADEON_FLAG_SPARSE) { return NULL; } } else { /* At this point, the buffer is always idle (we checked it above). */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } } @@ -530,26 +545,30 @@ static void si_buffer_write_data(struct si_context *sctx, struct r600_resource * si_cp_write_data(sctx, buf, offset, size, V_370_TC_L2, V_370_ME, data); radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(cs, 0); } static void si_buffer_do_flush_region(struct pipe_context *ctx, struct pipe_transfer *transfer, const struct pipe_box *box) { + struct si_context *sctx = (struct si_context*)ctx; struct si_transfer *stransfer = (struct si_transfer*)transfer; struct r600_resource *rbuffer = r600_resource(transfer->resource); - if (stransfer->u.staging) { + if (stransfer->offset == SI_TRANSFER_SPECIAL_OFFSET_USE_CPU_ALLOC) { + si_buffer_write_data(sctx, rbuffer, box->x, box->width, + stransfer->u.cpu); + } else if (stransfer->u.staging) { /* Copy the staging buffer into the original one. */ - si_copy_buffer((struct si_context*)ctx, transfer->resource, + si_copy_buffer(sctx, transfer->resource, &stransfer->u.staging->b.b, box->x, stransfer->offset + box->x % SI_MAP_BUFFER_ALIGNMENT, box->width); } util_range_add(&rbuffer->valid_buffer_range, box->x, box->x + box->width); } static void si_buffer_flush_region(struct pipe_context *ctx, @@ -570,21 +589,22 @@ static void si_buffer_flush_region(struct pipe_context *ctx, static void si_buffer_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct si_context *sctx = (struct si_context*)ctx; struct si_transfer *stransfer = (struct si_transfer*)transfer; if (transfer->usage & PIPE_TRANSFER_WRITE && !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) si_buffer_do_flush_region(ctx, transfer, &transfer->box); - r600_resource_reference(&stransfer->u.staging, NULL); + if (stransfer->offset != SI_TRANSFER_SPECIAL_OFFSET_USE_CPU_ALLOC) + r600_resource_reference(&stransfer->u.staging, NULL); assert(stransfer->b.staging == NULL); /* for threaded context only */ pipe_resource_reference(&transfer->resource, NULL); /* Don't use pool_transfers_unsync. We are always in the driver * thread. */ slab_free(&sctx->pool_transfers, transfer); } static void si_buffer_subdata(struct pipe_context *ctx, struct pipe_resource *buffer, diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 3bb8e04e4ad..a17929c2d5f 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -253,20 +253,21 @@ static void si_destroy_context(struct pipe_context *context) si_saved_cs_reference(&sctx->current_saved_cs, NULL); _mesa_hash_table_destroy(sctx->tex_handles, NULL); _mesa_hash_table_destroy(sctx->img_handles, NULL); util_dynarray_fini(&sctx->resident_tex_handles); util_dynarray_fini(&sctx->resident_img_handles); util_dynarray_fini(&sctx->resident_tex_needs_color_decompress); util_dynarray_fini(&sctx->resident_img_needs_color_decompress); util_dynarray_fini(&sctx->resident_tex_needs_depth_decompress); + u_cpu_suballoc_deinit(&sctx->cpu_suballoc); FREE(sctx); } static enum pipe_reset_status si_get_reset_status(struct pipe_context *ctx) { struct si_context *sctx = (struct si_context *)ctx; if (sctx->screen->info.has_gpu_reset_status_query) return sctx->ws->ctx_query_reset_status(sctx->ctx); @@ -390,20 +391,21 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->b.screen = screen; /* this must be set first */ sctx->b.priv = NULL; sctx->b.destroy = si_destroy_context; sctx->b.emit_string_marker = si_emit_string_marker; sctx->b.set_debug_callback = si_set_debug_callback; sctx->b.set_log_context = si_set_log_context; sctx->b.set_context_param = si_set_context_param; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; + u_cpu_suballoc_init(&sctx->cpu_suballoc, 64 * 1024, SI_MAP_BUFFER_ALIGNMENT); slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers); slab_create_child(&sctx->pool_transfers_unsync, &sscreen->pool_transfers); sctx->ws = sscreen->ws; sctx->family = sscreen->info.family; sctx->chip_class = sscreen->info.chip_class; if (sscreen->info.has_gpu_reset_counter_query) { sctx->gpu_reset_counter = sctx->ws->query_value(sctx->ws, RADEON_GPU_RESET_COUNTER); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index f79828f3438..e2cca55a8e2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -21,20 +21,21 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SI_PIPE_H #define SI_PIPE_H #include "si_shader.h" #include "si_state.h" +#include "util/u_cpu_suballoc.h" #include "util/u_dynarray.h" #include "util/u_idalloc.h" #include "util/u_threaded_context.h" #ifdef PIPE_ARCH_BIG_ENDIAN #define SI_BIG_ENDIAN 1 #else #define SI_BIG_ENDIAN 0 #endif @@ -244,25 +245,31 @@ struct r600_resource { bool TC_L2_dirty; /* Whether this resource is referenced by bindless handles. */ bool texture_handle_allocated; bool image_handle_allocated; /* Whether the resource has been exported via resource_get_handle. */ unsigned external_usage; /* PIPE_HANDLE_USAGE_* */ }; +#define SI_TRANSFER_SPECIAL_OFFSET_USE_CPU_ALLOC UINT_MAX + struct si_transfer { struct threaded_transfer b; union { struct r600_resource *staging; + uint32_t *cpu; } u; + /* If offset == SI_TRANSFER_SPECIAL_OFFSET_USE_CPU_ALLOC, use "cpu", + * else use "staging". + */ unsigned offset; }; struct si_texture { struct r600_resource buffer; struct radeon_surf surface; uint64_t size; struct si_texture *flushed_depth_texture; @@ -778,20 +785,21 @@ struct si_context { struct radeon_winsys *ws; struct radeon_winsys_ctx *ctx; struct radeon_cmdbuf *gfx_cs; struct radeon_cmdbuf *dma_cs; struct pipe_fence_handle *last_gfx_fence; struct pipe_fence_handle *last_sdma_fence; struct r600_resource *eop_bug_scratch; struct u_upload_mgr *cached_gtt_allocator; struct threaded_context *tc; struct u_suballocator *allocator_zeroed_memory; + struct u_cpu_suballoc cpu_suballoc; struct slab_child_pool pool_transfers; struct slab_child_pool pool_transfers_unsync; /* for threaded_context */ struct pipe_device_reset_callback device_reset_callback; struct u_log_context *log; void *query_result_shader; struct blitter_context *blitter; void *custom_dsa_flush; void *custom_blend_resolve; void *custom_blend_fmask_decompress; void *custom_blend_eliminate_fastclear; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev