Module: Mesa Branch: master Commit: d906c007d665725eb501f304aba31b8a25b1a18f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d906c007d665725eb501f304aba31b8a25b1a18f
Author: Rhys Perry <[email protected]> Date: Tue Feb 2 15:28:37 2021 +0000 radv: use a more relaxed alignment for upload buffer allocations 256 bytes was higher than necessary. Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8833> --- src/amd/vulkan/radv_cmd_buffer.c | 48 ++++++++++++++++++++------------------- src/amd/vulkan/radv_meta_buffer.c | 2 +- src/amd/vulkan/radv_private.h | 8 ++----- src/amd/vulkan/si_cmd_buffer.c | 2 +- 4 files changed, 29 insertions(+), 31 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c3af2b0df55..c378b6393ff 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -459,7 +459,7 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) unsigned fence_offset, eop_bug_offset; void *fence_ptr; - radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 8, &fence_offset, + radv_cmd_buffer_upload_alloc(cmd_buffer, 8, &fence_offset, &fence_ptr); memset(fence_ptr, 0, 8); @@ -469,7 +469,7 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { /* Allocate a buffer for the EOP bug on GFX9. */ - radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 8, + radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, &eop_bug_offset, &fence_ptr); memset(fence_ptr, 0, 16 * num_db); cmd_buffer->gfx9_eop_bug_va = @@ -548,14 +548,21 @@ radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, - unsigned size, - unsigned alignment, - unsigned *out_offset, - void **ptr) + unsigned size, unsigned *out_offset, void **ptr) { - assert(util_is_power_of_two_nonzero(alignment)); + assert(size % 4 == 0); + + struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info; + + /* Align to the scalar cache line size if it results in this allocation + * being placed in less of them. + */ + unsigned offset = cmd_buffer->upload.offset; + unsigned line_size = rad_info->chip_class >= GFX10 ? 64 : 32; + unsigned gap = align(offset, line_size) - offset; + if ((size & (line_size - 1)) > gap) + offset = align(offset, line_size); - uint64_t offset = align(cmd_buffer->upload.offset, alignment); if (offset + size > cmd_buffer->upload.size) { if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size)) return false; @@ -571,13 +578,11 @@ radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, - unsigned size, unsigned alignment, - const void *data, unsigned *out_offset) + unsigned size, const void *data, unsigned *out_offset) { uint8_t *ptr; - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, alignment, - out_offset, (void **)&ptr)) + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void **)&ptr)) return false; if (ptr) @@ -2634,7 +2639,7 @@ radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer, (struct radv_descriptor_set *)&descriptors_state->push_set.set; unsigned bo_offset; - if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size, 32, + if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size, set->header.mapped_ptr, &bo_offset)) return; @@ -2653,8 +2658,7 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, uint32_t offset; void *ptr; - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, - 256, &offset, &ptr)) + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, &offset, &ptr)) return; for (unsigned i = 0; i < MAX_SETS; i++) { @@ -2798,8 +2802,7 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, if (need_push_constants) { if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size + - 16 * layout->dynamic_offset_count, - 256, &offset, &ptr)) + 16 * layout->dynamic_offset_count, &offset, &ptr)) return; memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size); @@ -2847,7 +2850,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t va; /* allocate some descriptor state for vertex buffers */ - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16, 256, + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16, &vb_offset, &vb_ptr)) return; @@ -2970,7 +2973,7 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) /* Allocate some descriptor state for streamout buffers. */ if (!radv_cmd_buffer_upload_alloc(cmd_buffer, - MAX_SO_BUFFERS * 16, 256, + MAX_SO_BUFFERS * 16, &so_offset, &so_ptr)) return; @@ -4237,9 +4240,8 @@ void radv_meta_push_descriptor_set( push_set->header.size = layout->set[set].layout->size; push_set->header.layout = layout->set[set].layout; - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->header.size, 32, - &bo_offset, - (void**) &push_set->header.mapped_ptr)) + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->header.size, + &bo_offset, (void**) &push_set->header.mapped_ptr)) return; push_set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); @@ -6789,7 +6791,7 @@ void radv_CmdBeginConditionalRenderingEXT( * Based on the conditionalrender demo, it's faster to do the * COPY_DATA in ME (+ sync PFP) instead of PFP. */ - radv_cmd_buffer_upload_data(cmd_buffer, 8, 16, &pred_value, &pred_offset); + radv_cmd_buffer_upload_data(cmd_buffer, 8, &pred_value, &pred_offset); pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_buffer.c index d6b0c64a533..20f619bbe9c 100644 --- a/src/amd/vulkan/radv_meta_buffer.c +++ b/src/amd/vulkan/radv_meta_buffer.c @@ -546,7 +546,7 @@ void radv_CmdUpdateBuffer( radv_cmd_buffer_trace_emit(cmd_buffer); } else { uint32_t buf_offset; - radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset); + radv_cmd_buffer_upload_data(cmd_buffer, dataSize, pData, &buf_offset); radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo, buf_offset, dstOffset + dst_buffer->offset, dataSize); } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 539e99b6f2b..361a7d318e8 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1504,17 +1504,13 @@ void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer); void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer); bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, - unsigned size, - unsigned alignment, - unsigned *out_offset, - void **ptr); + unsigned size, unsigned *out_offset, void **ptr); void radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass *subpass); bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, - unsigned size, unsigned alignmnet, - const void *data, unsigned *out_offset); + unsigned size, const void *data, unsigned *out_offset); void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer); void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer); diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index a103d1545e3..e529167b25c 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -1744,7 +1744,7 @@ static void si_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigne assert(size < SI_CPDMA_ALIGNMENT); - radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size, SI_CPDMA_ALIGNMENT, &offset, &ptr); + radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size, &offset, &ptr); va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); va += offset; _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
