Module: Mesa Branch: main Commit: c707cb51e53f7ece4cc049990a23ccd2673fe7c4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c707cb51e53f7ece4cc049990a23ccd2673fe7c4
Author: Pierre-Eric Pelloux-Prayer <[email protected]> Date: Fri Aug 18 11:57:27 2023 +0200 radv/sdma: use multiple commands if required Instead of failing the copy we can use multiple chunks. This codepath shouldn't really be used since the source image should usually be tiled but it still better to not fail when possible. Reviewed-by: Marek Olšák <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]> Reviewed-by: Daniel Stone <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24771> --- src/amd/vulkan/radv_sdma_copy_image.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/amd/vulkan/radv_sdma_copy_image.c b/src/amd/vulkan/radv_sdma_copy_image.c index d81dc12057f..a567eb2c318 100644 --- a/src/amd/vulkan/radv_sdma_copy_image.c +++ b/src/amd/vulkan/radv_sdma_copy_image.c @@ -22,6 +22,7 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "util/macros.h" #include "util/u_memory.h" #include "radv_cs.h" #include "radv_private.h" @@ -41,21 +42,28 @@ radv_sdma_v4_v5_copy_image_to_buffer(struct radv_device *device, struct radeon_c /* Linear -> linear sub-window copy. */ if (image->planes[0].surface.is_linear) { - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 7); uint64_t bytes = (uint64_t)src_pitch * copy_height * bpp; + uint32_t chunk_size = 1u << 22; + uint32_t chunk_count = DIV_ROUND_UP(bytes, chunk_size); - if (!(bytes < (1u << 22))) - return false; + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 7 * chunk_count); src_address += image->planes[0].surface.u.gfx9.offset[0]; - radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0))); - radeon_emit(cs, bytes - 1); - radeon_emit(cs, 0); - radeon_emit(cs, src_address); - radeon_emit(cs, src_address >> 32); - radeon_emit(cs, dst_address); - radeon_emit(cs, dst_address >> 32); + for (int i = 0; i < chunk_count; i++) { + uint32_t size = MIN2(chunk_size, bytes); + radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0))); + radeon_emit(cs, size - 1); + radeon_emit(cs, 0); + radeon_emit(cs, src_address); + radeon_emit(cs, src_address >> 32); + radeon_emit(cs, dst_address); + radeon_emit(cs, dst_address >> 32); + + src_address += size; + dst_address += size; + bytes -= size; + } assert(cs->cdw <= cdw_max);
