Module: Mesa
Branch: main
Commit: a4b4c9b72304ae407323b513bc522c2bb448bc1f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a4b4c9b72304ae407323b513bc522c2bb448bc1f

Author: Timur Kristóf <timur.kris...@gmail.com>
Date:   Sat Dec  9 22:15:24 2023 +0100

radv: Implement image copies on transfer queues.

When either of the images is linear then the implementation can
use the same packets as used by the buffer/image copies.
However, tiled to tiled image copies use a separate packet.

Several variations of tiled to tiled copies are not supported
by the built-in packet and need a scanline copy as a workaround,
this will be implemented by an upcoming commit.

Signed-off-by: Timur Kristóf <timur.kris...@gmail.com>
Reviewed-by: Tatsuyuki Ishi <ishitatsuy...@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26913>

---

 src/amd/vulkan/meta/radv_meta_copy.c | 32 +++++++++++++
 src/amd/vulkan/radv_sdma.c           | 87 ++++++++++++++++++++++++++++++++++++
 src/amd/vulkan/radv_sdma.h           |  2 +
 3 files changed, 121 insertions(+)

diff --git a/src/amd/vulkan/meta/radv_meta_copy.c 
b/src/amd/vulkan/meta/radv_meta_copy.c
index a25405c5564..3fe1151a646 100644
--- a/src/amd/vulkan/meta/radv_meta_copy.c
+++ b/src/amd/vulkan/meta/radv_meta_copy.c
@@ -383,10 +383,42 @@ radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, 
const VkCopyImageToBuf
    }
 }
 
+static void
+transfer_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image 
*src_image, VkImageLayout src_image_layout,
+                    struct radv_image *dst_image, VkImageLayout 
dst_image_layout, const VkImageCopy2 *region)
+{
+   const struct radv_device *device = cmd_buffer->device;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   unsigned int dst_aspect_mask_remaining = region->dstSubresource.aspectMask;
+
+   u_foreach_bit (b, region->srcSubresource.aspectMask) {
+      const VkImageAspectFlags src_aspect_mask = BITFIELD_BIT(b);
+      const VkImageAspectFlags dst_aspect_mask = 
BITFIELD_BIT(u_bit_scan(&dst_aspect_mask_remaining));
+      const unsigned src_binding_idx = src_image->disjoint ? 
radv_plane_from_aspect(src_aspect_mask) : 0;
+      const unsigned dst_binding_idx = dst_image->disjoint ? 
radv_plane_from_aspect(dst_aspect_mask) : 0;
+
+      radv_cs_add_buffer(device->ws, cs, 
src_image->bindings[src_binding_idx].bo);
+      radv_cs_add_buffer(device->ws, cs, 
dst_image->bindings[dst_binding_idx].bo);
+
+      const struct radv_sdma_surf src =
+         radv_sdma_get_surf(device, src_image, region->srcSubresource, 
region->srcOffset, src_aspect_mask);
+      const struct radv_sdma_surf dst =
+         radv_sdma_get_surf(device, dst_image, region->dstSubresource, 
region->dstOffset, dst_aspect_mask);
+      const VkExtent3D extent = radv_sdma_get_copy_extent(src_image, 
region->srcSubresource, region->extent);
+
+      radv_sdma_copy_image(device, cs, &src, &dst, extent);
+   }
+}
+
 static void
 copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, 
VkImageLayout src_image_layout,
            struct radv_image *dst_image, VkImageLayout dst_image_layout, const 
VkImageCopy2 *region)
 {
+   if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
+      transfer_copy_image(cmd_buffer, src_image, src_image_layout, dst_image, 
dst_image_layout, region);
+      return;
+   }
+
    struct radv_meta_saved_state saved_state;
    bool cs;
 
diff --git a/src/amd/vulkan/radv_sdma.c b/src/amd/vulkan/radv_sdma.c
index f03cbb28978..b0f86f618f0 100644
--- a/src/amd/vulkan/radv_sdma.c
+++ b/src/amd/vulkan/radv_sdma.c
@@ -474,6 +474,74 @@ radv_sdma_emit_copy_tiled_sub_window(const struct 
radv_device *device, struct ra
    assert(cs->cdw == cdw_end);
 }
 
+static void
+radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct 
radeon_cmdbuf *cs,
+                                   const struct radv_sdma_surf *const src, 
const struct radv_sdma_surf *const dst,
+                                   const VkExtent3D px_extent)
+{
+   /* We currently only support the SDMA v4+ versions of this packet. */
+   assert(device->physical_device->rad_info.sdma_ip_version >= SDMA_4_0);
+
+   /* On GFX10+ this supports DCC, but cannot copy a compressed surface to 
another compressed surface. */
+   assert(!src->meta_va || !dst->meta_va);
+
+   if (device->physical_device->rad_info.sdma_ip_version >= SDMA_4_0 &&
+       device->physical_device->rad_info.sdma_ip_version < SDMA_5_0) {
+      /* SDMA v4 doesn't support mip_id selection in the T2T copy packet. */
+      assert(src->header_dword >> 24 == 0);
+      assert(dst->header_dword >> 24 == 0);
+      /* SDMA v4 doesn't support any image metadata. */
+      assert(!src->meta_va);
+      assert(!dst->meta_va);
+   }
+
+   /* Despite the name, this can indicate DCC or HTILE metadata. */
+   const uint32_t dcc = src->meta_va || dst->meta_va;
+   /* 0 = compress (src is uncompressed), 1 = decompress (src is compressed). 
*/
+   const uint32_t dcc_dir = src->meta_va && !dst->meta_va;
+
+   const VkOffset3D src_off = radv_sdma_pixel_offset_to_blocks(src->offset, 
src->blk_w, src->blk_h);
+   const VkOffset3D dst_off = radv_sdma_pixel_offset_to_blocks(dst->offset, 
dst->blk_w, dst->blk_h);
+   const VkExtent3D src_ext = radv_sdma_pixel_extent_to_blocks(src->extent, 
src->blk_w, src->blk_h);
+   const VkExtent3D dst_ext = radv_sdma_pixel_extent_to_blocks(dst->extent, 
dst->blk_w, dst->blk_h);
+   const VkExtent3D ext = radv_sdma_pixel_extent_to_blocks(px_extent, 
src->blk_w, src->blk_h);
+
+   assert(util_is_power_of_two_nonzero(src->bpp));
+   assert(util_is_power_of_two_nonzero(dst->bpp));
+
+   ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs, 15 + (dcc ? 
3 : 0));
+
+   radeon_emit(cs, SDMA_PACKET(SDMA_OPCODE_COPY, 
SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW, 0) | dcc << 19 | dcc_dir << 31 |
+                      src->header_dword);
+   radeon_emit(cs, src->va);
+   radeon_emit(cs, src->va >> 32);
+   radeon_emit(cs, src_off.x | src_off.y << 16);
+   radeon_emit(cs, src_off.z | (src_ext.width - 1) << 16);
+   radeon_emit(cs, (src_ext.height - 1) | (src_ext.depth - 1) << 16);
+   radeon_emit(cs, src->info_dword);
+   radeon_emit(cs, dst->va);
+   radeon_emit(cs, dst->va >> 32);
+   radeon_emit(cs, dst_off.x | dst_off.y << 16);
+   radeon_emit(cs, dst_off.z | (dst_ext.width - 1) << 16);
+   radeon_emit(cs, (dst_ext.height - 1) | (dst_ext.depth - 1) << 16);
+   radeon_emit(cs, dst->info_dword);
+   radeon_emit(cs, (ext.width - 1) | (ext.height - 1) << 16);
+   radeon_emit(cs, (ext.depth - 1));
+
+   if (dst->meta_va) {
+      const uint32_t write_compress_enable = 1;
+      radeon_emit(cs, dst->meta_va);
+      radeon_emit(cs, dst->meta_va >> 32);
+      radeon_emit(cs, dst->meta_config | write_compress_enable << 28);
+   } else if (src->meta_va) {
+      radeon_emit(cs, src->meta_va);
+      radeon_emit(cs, src->meta_va >> 32);
+      radeon_emit(cs, src->meta_config);
+   }
+
+   assert(cs->cdw == cdw_end);
+}
+
 void
 radv_sdma_copy_buffer_image(const struct radv_device *device, struct 
radeon_cmdbuf *cs,
                             const struct radv_sdma_surf *buf, const struct 
radv_sdma_surf *img, const VkExtent3D extent,
@@ -577,3 +645,22 @@ radv_sdma_copy_buffer_image_unaligned(const struct 
radv_device *device, struct r
       }
    }
 }
+
+void
+radv_sdma_copy_image(const struct radv_device *device, struct radeon_cmdbuf 
*cs, const struct radv_sdma_surf *src,
+                     const struct radv_sdma_surf *dst, const VkExtent3D extent)
+{
+   if (src->is_linear) {
+      if (dst->is_linear) {
+         radv_sdma_emit_copy_linear_sub_window(device, cs, src, dst, extent);
+      } else {
+         radv_sdma_emit_copy_tiled_sub_window(device, cs, dst, src, extent, 
false);
+      }
+   } else {
+      if (dst->is_linear) {
+         radv_sdma_emit_copy_tiled_sub_window(device, cs, src, dst, extent, 
true);
+      } else {
+         radv_sdma_emit_copy_t2t_sub_window(device, cs, src, dst, extent);
+      }
+   }
+}
diff --git a/src/amd/vulkan/radv_sdma.h b/src/amd/vulkan/radv_sdma.h
index 5f5a701e6f1..e089618b407 100644
--- a/src/amd/vulkan/radv_sdma.h
+++ b/src/amd/vulkan/radv_sdma.h
@@ -81,6 +81,8 @@ void radv_sdma_copy_buffer_image_unaligned(const struct 
radv_device *device, str
                                            const struct radv_sdma_surf *buf, 
const struct radv_sdma_surf *img_in,
                                            const VkExtent3D copy_extent, 
struct radeon_winsys_bo *temp_bo,
                                            bool to_image);
+void radv_sdma_copy_image(const struct radv_device *device, struct 
radeon_cmdbuf *cs, const struct radv_sdma_surf *src,
+                          const struct radv_sdma_surf *dst, const VkExtent3D 
extent);
 void radv_sdma_copy_buffer(const struct radv_device *device, struct 
radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va,
                            uint64_t size);
 void radv_sdma_fill_buffer(const struct radv_device *device, struct 
radeon_cmdbuf *cs, const uint64_t va,

Reply via email to