Module: Mesa Branch: main Commit: a4b4c9b72304ae407323b513bc522c2bb448bc1f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a4b4c9b72304ae407323b513bc522c2bb448bc1f
Author: Timur Kristóf <timur.kris...@gmail.com> Date: Sat Dec 9 22:15:24 2023 +0100 radv: Implement image copies on transfer queues. When either of the images is linear then the implementation can use the same packets as used by the buffer/image copies. However, tiled to tiled image copies use a separate packet. Several variations of tiled to tiled copies are not supported by the built-in packet and need a scanline copy as a workaround, this will be implemented by an upcoming commit. Signed-off-by: Timur Kristóf <timur.kris...@gmail.com> Reviewed-by: Tatsuyuki Ishi <ishitatsuy...@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26913> --- src/amd/vulkan/meta/radv_meta_copy.c | 32 +++++++++++++ src/amd/vulkan/radv_sdma.c | 87 ++++++++++++++++++++++++++++++++++++ src/amd/vulkan/radv_sdma.h | 2 + 3 files changed, 121 insertions(+) diff --git a/src/amd/vulkan/meta/radv_meta_copy.c b/src/amd/vulkan/meta/radv_meta_copy.c index a25405c5564..3fe1151a646 100644 --- a/src/amd/vulkan/meta/radv_meta_copy.c +++ b/src/amd/vulkan/meta/radv_meta_copy.c @@ -383,10 +383,42 @@ radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBuf } } +static void +transfer_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout, + struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageCopy2 *region) +{ + const struct radv_device *device = cmd_buffer->device; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + unsigned int dst_aspect_mask_remaining = region->dstSubresource.aspectMask; + + u_foreach_bit (b, region->srcSubresource.aspectMask) { + const VkImageAspectFlags src_aspect_mask = BITFIELD_BIT(b); + const VkImageAspectFlags dst_aspect_mask = BITFIELD_BIT(u_bit_scan(&dst_aspect_mask_remaining)); + const unsigned src_binding_idx = src_image->disjoint ? radv_plane_from_aspect(src_aspect_mask) : 0; + const unsigned dst_binding_idx = dst_image->disjoint ? radv_plane_from_aspect(dst_aspect_mask) : 0; + + radv_cs_add_buffer(device->ws, cs, src_image->bindings[src_binding_idx].bo); + radv_cs_add_buffer(device->ws, cs, dst_image->bindings[dst_binding_idx].bo); + + const struct radv_sdma_surf src = + radv_sdma_get_surf(device, src_image, region->srcSubresource, region->srcOffset, src_aspect_mask); + const struct radv_sdma_surf dst = + radv_sdma_get_surf(device, dst_image, region->dstSubresource, region->dstOffset, dst_aspect_mask); + const VkExtent3D extent = radv_sdma_get_copy_extent(src_image, region->srcSubresource, region->extent); + + radv_sdma_copy_image(device, cs, &src, &dst, extent); + } +} + static void copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout, struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageCopy2 *region) { + if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { + transfer_copy_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region); + return; + } + struct radv_meta_saved_state saved_state; bool cs; diff --git a/src/amd/vulkan/radv_sdma.c b/src/amd/vulkan/radv_sdma.c index f03cbb28978..b0f86f618f0 100644 --- a/src/amd/vulkan/radv_sdma.c +++ b/src/amd/vulkan/radv_sdma.c @@ -474,6 +474,74 @@ radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct ra assert(cs->cdw == cdw_end); } +static void +radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct radeon_cmdbuf *cs, + const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst, + const VkExtent3D px_extent) +{ + /* We currently only support the SDMA v4+ versions of this packet. */ + assert(device->physical_device->rad_info.sdma_ip_version >= SDMA_4_0); + + /* On GFX10+ this supports DCC, but cannot copy a compressed surface to another compressed surface. */ + assert(!src->meta_va || !dst->meta_va); + + if (device->physical_device->rad_info.sdma_ip_version >= SDMA_4_0 && + device->physical_device->rad_info.sdma_ip_version < SDMA_5_0) { + /* SDMA v4 doesn't support mip_id selection in the T2T copy packet. */ + assert(src->header_dword >> 24 == 0); + assert(dst->header_dword >> 24 == 0); + /* SDMA v4 doesn't support any image metadata. */ + assert(!src->meta_va); + assert(!dst->meta_va); + } + + /* Despite the name, this can indicate DCC or HTILE metadata. */ + const uint32_t dcc = src->meta_va || dst->meta_va; + /* 0 = compress (src is uncompressed), 1 = decompress (src is compressed). */ + const uint32_t dcc_dir = src->meta_va && !dst->meta_va; + + const VkOffset3D src_off = radv_sdma_pixel_offset_to_blocks(src->offset, src->blk_w, src->blk_h); + const VkOffset3D dst_off = radv_sdma_pixel_offset_to_blocks(dst->offset, dst->blk_w, dst->blk_h); + const VkExtent3D src_ext = radv_sdma_pixel_extent_to_blocks(src->extent, src->blk_w, src->blk_h); + const VkExtent3D dst_ext = radv_sdma_pixel_extent_to_blocks(dst->extent, dst->blk_w, dst->blk_h); + const VkExtent3D ext = radv_sdma_pixel_extent_to_blocks(px_extent, src->blk_w, src->blk_h); + + assert(util_is_power_of_two_nonzero(src->bpp)); + assert(util_is_power_of_two_nonzero(dst->bpp)); + + ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs, 15 + (dcc ? 3 : 0)); + + radeon_emit(cs, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW, 0) | dcc << 19 | dcc_dir << 31 | + src->header_dword); + radeon_emit(cs, src->va); + radeon_emit(cs, src->va >> 32); + radeon_emit(cs, src_off.x | src_off.y << 16); + radeon_emit(cs, src_off.z | (src_ext.width - 1) << 16); + radeon_emit(cs, (src_ext.height - 1) | (src_ext.depth - 1) << 16); + radeon_emit(cs, src->info_dword); + radeon_emit(cs, dst->va); + radeon_emit(cs, dst->va >> 32); + radeon_emit(cs, dst_off.x | dst_off.y << 16); + radeon_emit(cs, dst_off.z | (dst_ext.width - 1) << 16); + radeon_emit(cs, (dst_ext.height - 1) | (dst_ext.depth - 1) << 16); + radeon_emit(cs, dst->info_dword); + radeon_emit(cs, (ext.width - 1) | (ext.height - 1) << 16); + radeon_emit(cs, (ext.depth - 1)); + + if (dst->meta_va) { + const uint32_t write_compress_enable = 1; + radeon_emit(cs, dst->meta_va); + radeon_emit(cs, dst->meta_va >> 32); + radeon_emit(cs, dst->meta_config | write_compress_enable << 28); + } else if (src->meta_va) { + radeon_emit(cs, src->meta_va); + radeon_emit(cs, src->meta_va >> 32); + radeon_emit(cs, src->meta_config); + } + + assert(cs->cdw == cdw_end); +} + void radv_sdma_copy_buffer_image(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img, const VkExtent3D extent, @@ -577,3 +645,22 @@ radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct r } } } + +void +radv_sdma_copy_image(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_sdma_surf *src, + const struct radv_sdma_surf *dst, const VkExtent3D extent) +{ + if (src->is_linear) { + if (dst->is_linear) { + radv_sdma_emit_copy_linear_sub_window(device, cs, src, dst, extent); + } else { + radv_sdma_emit_copy_tiled_sub_window(device, cs, dst, src, extent, false); + } + } else { + if (dst->is_linear) { + radv_sdma_emit_copy_tiled_sub_window(device, cs, src, dst, extent, true); + } else { + radv_sdma_emit_copy_t2t_sub_window(device, cs, src, dst, extent); + } + } +} diff --git a/src/amd/vulkan/radv_sdma.h b/src/amd/vulkan/radv_sdma.h index 5f5a701e6f1..e089618b407 100644 --- a/src/amd/vulkan/radv_sdma.h +++ b/src/amd/vulkan/radv_sdma.h @@ -81,6 +81,8 @@ void radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, str const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img_in, const VkExtent3D copy_extent, struct radeon_winsys_bo *temp_bo, bool to_image); +void radv_sdma_copy_image(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_sdma_surf *src, + const struct radv_sdma_surf *dst, const VkExtent3D extent); void radv_sdma_copy_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va, uint64_t size); void radv_sdma_fill_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va,