Module: Mesa
Branch: main
Commit: 8156c923ee9a0e9a8084ae09e6e91cb1ac41aeb0
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8156c923ee9a0e9a8084ae09e6e91cb1ac41aeb0

Author: Timur Kristóf <timur.kris...@gmail.com>
Date:   Fri Oct  6 01:14:04 2023 +0200

radv: Implement buffer/image copies on transfer queues.

Previously, RADV only had a simple implementation of
image to buffer copies using the SDMA for the PRIME copy.

This commit replaces that with a full-featured implementation
that includes buffer to image and image to buffer copies and
removes the assumptions that the PRIME copy had, as well as
adds new helper functions which will be shared with other copy
functions in upcoming commits.

Unaligned buffer/image copies require a workaround, which
will be implemented by a future commit.

Signed-off-by: Timur Kristóf <timur.kris...@gmail.com>
Reviewed-by: Tatsuyuki Ishi <ishitatsuy...@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25831>

---

 src/amd/vulkan/meta/radv_meta_copy.c |  29 ++-
 src/amd/vulkan/radv_private.h        |   4 +-
 src/amd/vulkan/radv_sdma.c           | 473 +++++++++++++++++++++++++++--------
 3 files changed, 396 insertions(+), 110 deletions(-)

diff --git a/src/amd/vulkan/meta/radv_meta_copy.c 
b/src/amd/vulkan/meta/radv_meta_copy.c
index ede2cb92f76..3e288b68d84 100644
--- a/src/amd/vulkan/meta/radv_meta_copy.c
+++ b/src/amd/vulkan/meta/radv_meta_copy.c
@@ -85,10 +85,28 @@ radv_image_is_renderable(const struct radv_device *device, 
const struct radv_ima
    return true;
 }
 
+static void
+transfer_copy_buffer_image(struct radv_cmd_buffer *cmd_buffer, struct 
radv_buffer *buffer, struct radv_image *image,
+                           const VkBufferImageCopy2 *region, bool to_image)
+{
+   const struct radv_device *device = cmd_buffer->device;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+   radv_cs_add_buffer(device->ws, cs, image->bindings[0].bo);
+   radv_cs_add_buffer(device->ws, cs, buffer->bo);
+
+   radv_sdma_copy_buffer_image(device, cs, image, buffer, region, to_image);
+}
+
 static void
 copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer 
*buffer, struct radv_image *image,
                      VkImageLayout layout, const VkBufferImageCopy2 *region)
 {
+   if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
+      transfer_copy_buffer_image(cmd_buffer, buffer, image, region, true);
+      return;
+   }
+
    struct radv_meta_saved_state saved_state;
    bool cs;
 
@@ -236,16 +254,7 @@ copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, 
struct radv_buffer *buf
 {
    struct radv_device *device = cmd_buffer->device;
    if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
-      struct radeon_cmdbuf *cs = cmd_buffer->cs;
-      /* RADV_QUEUE_TRANSFER should only be used for the prime blit */
-      assert(!region->imageOffset.x && !region->imageOffset.y && 
!region->imageOffset.z);
-      assert(image->vk.image_type == VK_IMAGE_TYPE_2D);
-      assert(image->vk.extent.width == region->imageExtent.width);
-      assert(image->vk.extent.height == region->imageExtent.height);
-      ASSERTED bool res = radv_sdma_copy_image(device, cs, image, buffer, 
region);
-      assert(res);
-      radv_cs_add_buffer(device->ws, cs, image->bindings[0].bo);
-      radv_cs_add_buffer(device->ws, cs, buffer->bo);
+      transfer_copy_buffer_image(cmd_buffer, buffer, image, region, false);
       return;
    }
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 7fd3583337e..cbaba423f35 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -3129,8 +3129,8 @@ void radv_rra_trace_init(struct radv_device *device);
 VkResult radv_rra_dump_trace(VkQueue vk_queue, char *filename);
 void radv_rra_trace_finish(VkDevice vk_device, struct radv_rra_trace_data 
*data);
 
-bool radv_sdma_copy_image(struct radv_device *device, struct radeon_cmdbuf 
*cs, struct radv_image *image,
-                          struct radv_buffer *buffer, const VkBufferImageCopy2 
*region);
+void radv_sdma_copy_buffer_image(const struct radv_device *device, struct 
radeon_cmdbuf *cs, struct radv_image *image,
+                                 struct radv_buffer *buffer, const 
VkBufferImageCopy2 *region, bool to_image);
 void radv_sdma_copy_buffer(const struct radv_device *device, struct 
radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va,
                            uint64_t size);
 
diff --git a/src/amd/vulkan/radv_sdma.c b/src/amd/vulkan/radv_sdma.c
index 86394035e84..70cd6334ddc 100644
--- a/src/amd/vulkan/radv_sdma.c
+++ b/src/amd/vulkan/radv_sdma.c
@@ -1,6 +1,7 @@
 /*
  * Copyright 2010 Jerome Glisse <gli...@freedesktop.org>
  * Copyright 2015-2021 Advanced Micro Devices, Inc.
+ * Copyright 2023 Valve Corporation
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -22,123 +23,276 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
+
 #include "util/macros.h"
 #include "util/u_memory.h"
 #include "radv_cs.h"
 #include "radv_private.h"
 
-static bool
-radv_sdma_v4_v5_copy_image_to_buffer(struct radv_device *device, struct 
radeon_cmdbuf *cs, struct radv_image *image,
-                                     struct radv_buffer *buffer, const 
VkBufferImageCopy2 *region)
+struct radv_sdma_linear_info {
+   uint64_t va;
+   unsigned pitch;
+   unsigned slice_pitch;
+   unsigned bpp;
+   unsigned blk_w;
+   unsigned blk_h;
+};
+
+struct radv_sdma_tiled_info {
+   VkExtent3D extent;
+   uint64_t va;
+   uint64_t meta_va;
+   uint32_t meta_config;
+   uint32_t info_dword;
+   uint32_t header_dword;
+   unsigned bpp;
+   unsigned blk_w;
+   unsigned blk_h;
+};
+
+ALWAYS_INLINE static void
+radv_sdma_check_pitches(const unsigned pitch, const unsigned slice_pitch, 
const unsigned bpp, const bool uses_depth)
 {
-   assert(image->plane_count == 1);
-   unsigned bpp = image->planes[0].surface.bpe;
-   uint64_t dst_address = buffer->bo->va;
-   uint64_t src_address = image->bindings[0].bo->va + 
image->planes[0].surface.u.gfx9.surf_offset;
-   unsigned src_pitch = image->planes[0].surface.u.gfx9.surf_pitch;
-   unsigned copy_width = DIV_ROUND_UP(image->vk.extent.width, 
image->planes[0].surface.blk_w);
-   unsigned copy_height = DIV_ROUND_UP(image->vk.extent.height, 
image->planes[0].surface.blk_h);
+   ASSERTED const unsigned pitch_alignment = MAX2(1, 4 / bpp);
+   assert(pitch);
+   assert(pitch <= (1 << 14));
+   assert(radv_is_aligned(pitch, pitch_alignment));
 
-   /* Linear -> linear sub-window copy. */
-   if (image->planes[0].surface.is_linear) {
-      bool is_v5_2 = device->physical_device->rad_info.gfx_level >= GFX10_3;
-      uint64_t bytes = (uint64_t)src_pitch * copy_height * bpp;
-      uint32_t chunk_size = 1u << (is_v5_2 ? 30 : 22);
-      uint32_t chunk_count = DIV_ROUND_UP(bytes, chunk_size);
+   if (uses_depth) {
+      ASSERTED const unsigned slice_pitch_alignment = 4;
+      assert(slice_pitch);
+      assert(slice_pitch <= (1 << 28));
+      assert(radv_is_aligned(slice_pitch, slice_pitch_alignment));
+   }
+}
+
+ALWAYS_INLINE static enum gfx9_resource_type
+radv_sdma_surface_resource_type(const struct radv_device *const device, const 
struct radeon_surf *const surf)
+{
+   if (device->physical_device->rad_info.gfx_level >= GFX10) {
+      /* Use the 2D resource type for rotated or Z swizzles. */
+      if ((surf->u.gfx9.resource_type == RADEON_RESOURCE_1D || 
surf->u.gfx9.resource_type == RADEON_RESOURCE_3D) &&
+          (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER || 
surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH))
+         return RADEON_RESOURCE_2D;
+   }
+
+   return surf->u.gfx9.resource_type;
+}
+
+ALWAYS_INLINE static uint32_t
+radv_sdma_surface_type_from_aspect_mask(const VkImageAspectFlags aspectMask)
+{
+   if (aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
+      return 1;
+   else if (aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
+      return 2;
+
+   return 0;
+}
+
+ALWAYS_INLINE static VkOffset3D
+radv_sdma_get_img_offset(const struct radv_image *const image, const 
VkImageSubresourceLayers subresource,
+                         VkOffset3D offset)
+{
+   if (image->vk.image_type != VK_IMAGE_TYPE_3D)
+      offset.z = subresource.baseArrayLayer;
+
+   return offset;
+}
+
+ALWAYS_INLINE static VkExtent3D
+radv_sdma_get_copy_extent(const struct radv_image *const image, const 
VkImageSubresourceLayers subresource,
+                          VkExtent3D extent)
+{
+   if (image->vk.image_type != VK_IMAGE_TYPE_3D)
+      extent.depth = vk_image_subresource_layer_count(&image->vk, 
&subresource);
+
+   return extent;
+}
+
+ALWAYS_INLINE static VkExtent3D
+radv_sdma_get_image_extent(const struct radv_image *const image)
+{
+   VkExtent3D extent = image->vk.extent;
+   if (image->vk.image_type != VK_IMAGE_TYPE_3D)
+      extent.depth = image->vk.array_layers;
+
+   return extent;
+}
+
+ALWAYS_INLINE static VkExtent3D
+radv_sdma_pixel_extent_to_blocks(const VkExtent3D extent, const unsigned 
blk_w, const unsigned blk_h)
+{
+   const VkExtent3D r = {
+      .width = DIV_ROUND_UP(extent.width, blk_w),
+      .height = DIV_ROUND_UP(extent.height, blk_h),
+      .depth = extent.depth,
+   };
+
+   return r;
+}
+
+ALWAYS_INLINE static VkOffset3D
+radv_sdma_pixel_offset_to_blocks(const VkOffset3D offset, const unsigned 
blk_w, const unsigned blk_h)
+{
+   const VkOffset3D r = {
+      .x = DIV_ROUND_UP(offset.x, blk_w),
+      .y = DIV_ROUND_UP(offset.y, blk_h),
+      .z = offset.z,
+   };
+
+   return r;
+}
+
+ALWAYS_INLINE static unsigned
+radv_sdma_pixels_to_blocks(const unsigned linear_pitch, const unsigned blk_w)
+{
+   return DIV_ROUND_UP(linear_pitch, blk_w);
+}
+
+ALWAYS_INLINE static unsigned
+radv_sdma_pixel_area_to_blocks(const unsigned linear_slice_pitch, const 
unsigned blk_w, const unsigned blk_h)
+{
+   return DIV_ROUND_UP(DIV_ROUND_UP(linear_slice_pitch, blk_w), blk_h);
+}
+
+static struct radv_sdma_linear_info
+radv_sdma_get_linear_buf_info(const struct radv_buffer *const buffer, const 
struct radv_image *const image,
+                              const VkBufferImageCopy2 *const region)
+{
+   const unsigned pitch = (region->bufferRowLength ? region->bufferRowLength : 
region->imageExtent.width);
+   const unsigned slice_pitch =
+      (region->bufferImageHeight ? region->bufferImageHeight : 
region->imageExtent.height) * pitch;
+
+   const struct radeon_surf *surf = &image->planes[0].surface;
+   const struct radv_sdma_linear_info info = {
+      .va = radv_buffer_get_va(buffer->bo) + buffer->offset + 
region->bufferOffset,
+      .pitch = pitch,
+      .slice_pitch = slice_pitch,
+      .bpp = surf->bpe,
+      .blk_w = surf->blk_w,
+      .blk_h = surf->blk_h,
+   };
+
+   return info;
+}
 
-      ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 7 * 
chunk_count);
+static struct radv_sdma_linear_info
+radv_sdma_get_linear_img_info(const struct radv_image *const image, const 
VkImageSubresourceLayers subresource)
+{
+   const struct radeon_surf *surf = &image->planes[0].surface;
 
-      src_address += image->planes[0].surface.u.gfx9.offset[0];
+   if (!surf->is_linear) {
+      const struct radv_sdma_linear_info empty_info = {0};
+      return empty_info;
+   }
 
-      for (int i = 0; i < chunk_count; i++) {
-         uint32_t size = MIN2(chunk_size, bytes);
-         radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, 
CIK_SDMA_COPY_SUB_OPCODE_LINEAR, 0));
-         radeon_emit(cs, size - 1);
-         radeon_emit(cs, 0);
-         radeon_emit(cs, src_address);
-         radeon_emit(cs, src_address >> 32);
-         radeon_emit(cs, dst_address);
-         radeon_emit(cs, dst_address >> 32);
+   const struct radv_sdma_linear_info info = {
+      .va = image->bindings[0].bo->va + image->bindings[0].offset + 
surf->u.gfx9.surf_offset +
+            surf->u.gfx9.offset[subresource.mipLevel],
+      .pitch = surf->u.gfx9.pitch[subresource.mipLevel],
+      .slice_pitch = surf->blk_w * surf->blk_h * surf->u.gfx9.surf_slice_size 
/ surf->bpe,
+      .bpp = surf->bpe,
+      .blk_w = surf->blk_w,
+      .blk_h = surf->blk_h,
+   };
 
-         src_address += size;
-         dst_address += size;
-         bytes -= size;
-      }
+   return info;
+}
 
-      assert(cs->cdw <= cdw_max);
+static uint32_t
+radv_sdma_get_metadata_config(const struct radv_device *const device, const 
struct radv_image *const image,
+                              const VkImageSubresourceLayers subresource)
+{
+   /* Only SDMA 5 supports metadata. */
+   const bool is_v5 = device->physical_device->rad_info.gfx_level >= GFX10;
 
-      return true;
+   if (!is_v5 || !(radv_dcc_enabled(image, subresource.mipLevel) || 
radv_image_has_htile(image))) {
+      return 0;
    }
-   /* Tiled sub-window copy -> Linear */
-   else {
-      unsigned tiled_width = copy_width;
-      unsigned tiled_height = copy_height;
-      unsigned linear_pitch = region->bufferRowLength;
-      uint64_t linear_slice_pitch = (uint64_t)region->bufferRowLength * 
copy_height;
-      uint64_t tiled_address = src_address;
-      uint64_t linear_address = dst_address;
-      bool is_v5 = device->physical_device->rad_info.gfx_level >= GFX10;
-      /* Only SDMA 5 supports DCC with SDMA */
-      bool dcc = radv_dcc_enabled(image, 0) && is_v5;
-
-      /* Check if everything fits into the bitfields */
-      if (!(tiled_width < (1 << 14) && tiled_height < (1 << 14) && 
linear_pitch < (1 << 14) &&
-            linear_slice_pitch < (1 << 28) && copy_width < (1 << 14) && 
copy_height < (1 << 14)))
-         return false;
-
-      ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 14 + (dcc 
? 3 : 0));
-
-      radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, 
CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 0) | dcc << 19 |
-                         (is_v5 ? 0 : 0 /* tiled->buffer.b.b.last_level */) << 
20 | 1u << 31);
-      radeon_emit(cs, (uint32_t)tiled_address | 
(image->planes[0].surface.tile_swizzle << 8));
-      radeon_emit(cs, (uint32_t)(tiled_address >> 32));
-      radeon_emit(cs, 0);
-      radeon_emit(cs, ((tiled_width - 1) << 16));
-      radeon_emit(cs, (tiled_height - 1));
-      radeon_emit(cs, util_logbase2(bpp) | 
image->planes[0].surface.u.gfx9.swizzle_mode << 3 |
-                         image->planes[0].surface.u.gfx9.resource_type << 9 |
-                         (is_v5 ? 0 /* tiled->buffer.b.b.last_level */ : 
image->planes[0].surface.u.gfx9.epitch) << 16);
-      radeon_emit(cs, (uint32_t)linear_address);
-      radeon_emit(cs, (uint32_t)(linear_address >> 32));
-      radeon_emit(cs, 0);
-      radeon_emit(cs, ((linear_pitch - 1) << 16));
-      radeon_emit(cs, linear_slice_pitch - 1);
-      radeon_emit(cs, (copy_width - 1) | ((copy_height - 1) << 16));
-      radeon_emit(cs, 0);
-
-      if (dcc) {
-         uint64_t md_address = tiled_address + 
image->planes[0].surface.meta_offset;
-         const struct util_format_description *desc;
-         VkFormat format = image->vk.format;
-         unsigned hw_fmt, hw_type;
-
-         desc = vk_format_description(image->vk.format);
-         hw_fmt = 
ac_get_cb_format(device->physical_device->rad_info.gfx_level, 
vk_format_to_pipe_format(format));
-         hw_type = radv_translate_buffer_numformat(desc, 
vk_format_get_first_non_void_channel(format));
-
-         /* Add metadata */
-         radeon_emit(cs, (uint32_t)md_address);
-         radeon_emit(cs, (uint32_t)(md_address >> 32));
-         radeon_emit(cs, hw_fmt | vi_alpha_is_on_msb(device, format) << 8 | 
hw_type << 9 |
-                            
image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size << 24 |
-                            V_028C78_MAX_BLOCK_SIZE_256B << 26 |
-                            
image->planes[0].surface.u.gfx9.color.dcc.pipe_aligned << 31);
-      }
-
-      assert(cs->cdw <= cdw_max);
-
-      return true;
+
+   const struct radeon_surf *const surf = &image->planes[0].surface;
+   const VkFormat format = vk_format_get_aspect_format(image->vk.format, 
subresource.aspectMask);
+   const struct util_format_description *desc = vk_format_description(format);
+
+   const uint32_t data_format =
+      ac_get_cb_format(device->physical_device->rad_info.gfx_level, 
vk_format_to_pipe_format(format));
+   const uint32_t alpha_is_on_msb = vi_alpha_is_on_msb(device, format);
+   const uint32_t number_type = radv_translate_buffer_numformat(desc, 
vk_format_get_first_non_void_channel(format));
+   const uint32_t surface_type = 
radv_sdma_surface_type_from_aspect_mask(subresource.aspectMask);
+   const uint32_t max_comp_block_size = 
surf->u.gfx9.color.dcc.max_compressed_block_size;
+   const uint32_t max_uncomp_block_size = 
radv_get_dcc_max_uncompressed_block_size(device, image);
+   const uint32_t pipe_aligned = surf->u.gfx9.color.dcc.pipe_aligned;
+
+   return data_format | alpha_is_on_msb << 8 | number_type << 9 | surface_type 
<< 12 | max_comp_block_size << 24 |
+          max_uncomp_block_size << 26 | pipe_aligned << 31;
+}
+
+static uint32_t
+radv_sdma_get_tiled_info_dword(const struct radv_device *const device, const 
struct radv_image *const image,
+                               const VkImageSubresourceLayers subresource)
+{
+   const struct radeon_surf *const surf = &image->planes[0].surface;
+   const uint32_t element_size = util_logbase2(surf->bpe);
+   const uint32_t swizzle_mode = surf->has_stencil ? 
surf->u.gfx9.zs.stencil_swizzle_mode : surf->u.gfx9.swizzle_mode;
+   const enum gfx9_resource_type dimension = 
radv_sdma_surface_resource_type(device, surf);
+   const uint32_t info = element_size | swizzle_mode << 3 | dimension << 9;
+
+   if (device->physical_device->rad_info.gfx_level >= GFX10) {
+      const uint32_t mip_max = MAX2(image->vk.mip_levels, 1);
+      const uint32_t mip_id = subresource.mipLevel;
+
+      return info | (mip_max - 1) << 16 | mip_id << 20;
+   } else if (device->physical_device->rad_info.gfx_level == GFX9) {
+      return info | surf->u.gfx9.epitch << 16;
+   } else {
+      unreachable("unsupported gfx_level");
    }
+}
+
+static uint32_t
+radv_sdma_get_tiled_header_dword(const struct radv_device *const device, const 
struct radv_image *const image,
+                                 const VkImageSubresourceLayers subresource)
+{
+   const enum amd_gfx_level gfx_level = 
device->physical_device->rad_info.gfx_level;
 
-   return false;
+   if (gfx_level >= GFX10) {
+      return 0;
+   } else if (gfx_level == GFX9) {
+      const uint32_t mip_max = MAX2(image->vk.mip_levels, 1);
+      const uint32_t mip_id = subresource.mipLevel;
+      return (mip_max - 1) << 20 | mip_id << 24;
+   } else {
+      unreachable("unsupported gfx_level");
+   }
 }
 
-bool
-radv_sdma_copy_image(struct radv_device *device, struct radeon_cmdbuf *cs, 
struct radv_image *image,
-                     struct radv_buffer *buffer, const VkBufferImageCopy2 
*region)
+static struct radv_sdma_tiled_info
+radv_sdma_get_tiled_img_info(const struct radv_device *const device, const 
struct radv_image *const image,
+                             const VkImageSubresourceLayers subresource)
 {
-   assert(device->physical_device->rad_info.gfx_level >= GFX9);
-   return radv_sdma_v4_v5_copy_image_to_buffer(device, cs, image, buffer, 
region);
+   const struct radeon_surf *const surf = &image->planes[0].surface;
+
+   /* 1D resources should be linear. */
+   assert(surf->u.gfx9.resource_type != RADEON_RESOURCE_1D);
+
+   const uint32_t meta_config = radv_sdma_get_metadata_config(device, image, 
subresource);
+   const uint64_t meta_va = image->bindings[0].bo->va + 
image->bindings[0].offset + surf->meta_offset;
+
+   struct radv_sdma_tiled_info info = {
+      .bpp = surf->bpe,
+      .va = (image->bindings[0].bo->va + image->bindings[0].offset + 
surf->u.gfx9.surf_offset) | surf->tile_swizzle
+                                                                               
                     << 8,
+      .meta_va = meta_config ? meta_va : 0,
+      .meta_config = meta_config,
+      .extent = radv_sdma_get_image_extent(image),
+      .info_dword = radv_sdma_get_tiled_info_dword(device, image, subresource),
+      .header_dword = radv_sdma_get_tiled_header_dword(device, image, 
subresource),
+      .blk_w = surf->blk_w,
+      .blk_h = surf->blk_h,
+   };
+
+   return info;
 }
 
 void
@@ -183,3 +337,126 @@ radv_sdma_copy_buffer(const struct radv_device *device, 
struct radeon_cmdbuf *cs
       size -= csize;
    }
 }
+
+static void
+radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct 
radeon_cmdbuf *cs,
+                                      const struct radv_sdma_linear_info 
*const src,
+                                      const struct radv_sdma_linear_info 
*const dst, const VkOffset3D src_pix_offset,
+                                      const VkOffset3D dst_pix_offset, const 
VkExtent3D pix_extent)
+{
+   /* This packet is the same since SDMA v2.4, haven't bothered to check older 
versions.
+    * The main difference is the bitfield sizes:
+    *
+    * v2.4 - src/dst_pitch: 14 bits, rect_z: 11 bits
+    * v4.0 - src/dst_pitch: 19 bits, rect_z: 11 bits
+    * v5.0 - src/dst_pitch: 19 bits, rect_z: 13 bits
+    *
+    * We currently use the smallest limits (from SDMA v2.4).
+    */
+
+   const VkOffset3D src_off = radv_sdma_pixel_offset_to_blocks(src_pix_offset, 
src->blk_w, src->blk_h);
+   const VkOffset3D dst_off = radv_sdma_pixel_offset_to_blocks(dst_pix_offset, 
dst->blk_w, dst->blk_h);
+   const VkExtent3D ext = radv_sdma_pixel_extent_to_blocks(pix_extent, 
src->blk_w, src->blk_h);
+   const unsigned src_pitch = radv_sdma_pixels_to_blocks(src->pitch, 
src->blk_w);
+   const unsigned dst_pitch = radv_sdma_pixels_to_blocks(dst->pitch, 
dst->blk_w);
+   const unsigned src_slice_pitch = 
radv_sdma_pixel_area_to_blocks(src->slice_pitch, src->blk_w, src->blk_h);
+   const unsigned dst_slice_pitch = 
radv_sdma_pixel_area_to_blocks(dst->slice_pitch, dst->blk_w, dst->blk_h);
+
+   assert(src->bpp == dst->bpp);
+   assert(util_is_power_of_two_nonzero(src->bpp));
+   radv_sdma_check_pitches(src->pitch, src->slice_pitch, src->bpp, false);
+   radv_sdma_check_pitches(dst->pitch, dst->slice_pitch, dst->bpp, false);
+
+   ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs, 13);
+
+   radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, 
CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) |
+                      util_logbase2(src->bpp) << 29);
+   radeon_emit(cs, src->va);
+   radeon_emit(cs, src->va >> 32);
+   radeon_emit(cs, src_off.x | src_off.y << 16);
+   radeon_emit(cs, src_off.z | (src_pitch - 1) << 13);
+   radeon_emit(cs, src_slice_pitch - 1);
+   radeon_emit(cs, dst->va);
+   radeon_emit(cs, dst->va >> 32);
+   radeon_emit(cs, dst_off.x | dst_off.y << 16);
+   radeon_emit(cs, dst_off.z | (dst_pitch - 1) << 13);
+   radeon_emit(cs, dst_slice_pitch - 1);
+   radeon_emit(cs, (ext.width - 1) | (ext.height - 1) << 16);
+   radeon_emit(cs, (ext.depth - 1));
+
+   assert(cs->cdw == cdw_end);
+}
+
+static void
+radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct 
radeon_cmdbuf *cs,
+                                     const struct radv_sdma_tiled_info *const 
tiled,
+                                     const struct radv_sdma_linear_info *const 
linear,
+                                     const VkOffset3D tiled_pix_offset, const 
VkOffset3D linear_pix_offset,
+                                     const VkExtent3D pix_extent, const bool 
detile)
+{
+   if (device->physical_device->rad_info.gfx_level == GFX9) {
+      /* SDMA v4 doesn't support any image metadata. */
+      assert(!tiled->meta_va);
+   }
+
+   const VkOffset3D linear_off = 
radv_sdma_pixel_offset_to_blocks(linear_pix_offset, linear->blk_w, 
linear->blk_h);
+   const VkOffset3D tiled_off = 
radv_sdma_pixel_offset_to_blocks(tiled_pix_offset, tiled->blk_w, tiled->blk_h);
+   const VkExtent3D tiled_ext = 
radv_sdma_pixel_extent_to_blocks(tiled->extent, tiled->blk_w, tiled->blk_h);
+   const VkExtent3D ext = radv_sdma_pixel_extent_to_blocks(pix_extent, 
tiled->blk_w, tiled->blk_h);
+   const unsigned linear_pitch = radv_sdma_pixels_to_blocks(linear->pitch, 
tiled->blk_w);
+   const unsigned linear_slice_pitch = 
radv_sdma_pixel_area_to_blocks(linear->slice_pitch, tiled->blk_w, tiled->blk_h);
+   const bool dcc = !!tiled->meta_va;
+   const bool uses_depth = linear_off.z != 0 || tiled_off.z != 0 || ext.depth 
!= 1;
+
+   assert(util_is_power_of_two_nonzero(tiled->bpp));
+   radv_sdma_check_pitches(linear_pitch, linear_slice_pitch, tiled->bpp, 
uses_depth);
+
+   ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs, 14 + (dcc ? 
3 : 0));
+
+   radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, 
CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 0) | dcc << 19 |
+                      detile << 31 | tiled->header_dword);
+   radeon_emit(cs, tiled->va);
+   radeon_emit(cs, tiled->va >> 32);
+   radeon_emit(cs, tiled_off.x | tiled_off.y << 16);
+   radeon_emit(cs, tiled_off.z | (tiled_ext.width - 1) << 16);
+   radeon_emit(cs, (tiled_ext.height - 1) | (tiled_ext.depth - 1) << 16);
+   radeon_emit(cs, tiled->info_dword);
+   radeon_emit(cs, linear->va);
+   radeon_emit(cs, linear->va >> 32);
+   radeon_emit(cs, linear_off.x | linear_off.y << 16);
+   radeon_emit(cs, linear_off.z | (linear_pitch - 1) << 16);
+   radeon_emit(cs, linear_slice_pitch - 1);
+   radeon_emit(cs, (ext.width - 1) | (ext.height - 1) << 16);
+   radeon_emit(cs, (ext.depth - 1));
+
+   if (tiled->meta_va) {
+      const unsigned write_compress_enable = !detile;
+      radeon_emit(cs, tiled->meta_va);
+      radeon_emit(cs, tiled->meta_va >> 32);
+      radeon_emit(cs, tiled->meta_config | write_compress_enable << 28);
+   }
+
+   assert(cs->cdw == cdw_end);
+}
+
+void
+radv_sdma_copy_buffer_image(const struct radv_device *device, struct 
radeon_cmdbuf *cs, struct radv_image *image,
+                            struct radv_buffer *buffer, const 
VkBufferImageCopy2 *region, bool to_image)
+{
+   const struct radv_sdma_linear_info buf_info = 
radv_sdma_get_linear_buf_info(buffer, image, region);
+   const VkExtent3D extent = radv_sdma_get_copy_extent(image, 
region->imageSubresource, region->imageExtent);
+   const VkOffset3D img_offset = radv_sdma_get_img_offset(image, 
region->imageSubresource, region->imageOffset);
+   const VkOffset3D zero_offset = {0};
+
+   if (image->planes[0].surface.is_linear) {
+      const struct radv_sdma_linear_info linear = 
radv_sdma_get_linear_img_info(image, region->imageSubresource);
+
+      if (to_image)
+         radv_sdma_emit_copy_linear_sub_window(device, cs, &buf_info, &linear, 
zero_offset, img_offset, extent);
+      else
+         radv_sdma_emit_copy_linear_sub_window(device, cs, &linear, &buf_info, 
img_offset, zero_offset, extent);
+   } else {
+      const struct radv_sdma_tiled_info tiled = 
radv_sdma_get_tiled_img_info(device, image, region->imageSubresource);
+      radv_sdma_emit_copy_tiled_sub_window(device, cs, &tiled, &buf_info, 
img_offset, zero_offset, extent, !to_image);
+   }
+}

Reply via email to