Mesa (master): v3dv: batch copies in the copy_buffer_to_image_blit path

GitLab Mirror Fri, 05 Feb 2021 04:41:26 -0800

Module: Mesa
Branch: master
Commit: 4d4a0797ceefc841dda4b8db8cd6b0ed03ba7815
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4d4a0797ceefc841dda4b8db8cd6b0ed03ba7815


Author: Iago Toral Quiroga <ito...@igalia.com>
Date:   Fri Feb  5 08:56:14 2021 +0100

v3dv: batch copies in the copy_buffer_to_image_blit path

This path is very memory hungry and batching allows us to reduce
this by allocating memory just once and reuse it for all regions
in the batch instead of allocating once per region.

v2: document return value for this function (apinheiro).

Reviewed-by: Alejandro Piñeiro <apinhe...@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8875>

---

 src/broadcom/vulkan/v3dv_meta_copy.c | 374 +++++++++++++++++++----------------
 1 file changed, 208 insertions(+), 166 deletions(-)

diff --git a/src/broadcom/vulkan/v3dv_meta_copy.c 
b/src/broadcom/vulkan/v3dv_meta_copy.c
index 3836e6a440e..cedc5cd26dc 100644
--- a/src/broadcom/vulkan/v3dv_meta_copy.c
+++ b/src/broadcom/vulkan/v3dv_meta_copy.c
@@ -3584,6 +3584,10 @@ fail:
    return handled;
 }
 
+/**
+ * Returns true if the implementation supports the requested operation (even if
+ * it failed to process it, for example, due to an out-of-memory error).
+ */
 static bool
 copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
                           VkImageAspectFlags aspect,
@@ -3593,172 +3597,220 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer 
*cmd_buffer,
                           struct v3dv_buffer *buffer,
                           uint32_t buffer_bpp,
                           VkColorComponentFlags cmask,
-                          const VkBufferImageCopy *region)
+                          uint32_t region_count,
+                          const VkBufferImageCopy *regions)
 {
+   /* Since we can't sample linear images we need to upload the linear
+    * buffer to a tiled image that we can use as a blit source, which
+    * is slow.
+    */
    perf_debug("Falling back to blit path for buffer to image copy.\n");
 
-   /* Obtain the layer count */
-   uint32_t num_layers;
-   if (image->type != VK_IMAGE_TYPE_3D)
-      num_layers = region->imageSubresource.layerCount;
-   else
-      num_layers = region->imageExtent.depth;
-   assert(num_layers > 0);
+   struct v3dv_device *device = cmd_buffer->device;
+   VkDevice _device = v3dv_device_to_handle(device);
+   bool handled = true;
 
-   /* Obtain the 2D buffer region spec */
-   uint32_t buf_width, buf_height;
-   if (region->bufferRowLength == 0)
-       buf_width = region->imageExtent.width;
-   else
-       buf_width = region->bufferRowLength;
+   /* Allocate memory for the tiled image. Since we copy layer by layer
+    * we allocate memory to hold a full layer, which is the worse case.
+    * For that we create a dummy image with that spec, get memory requirements
+    * for it and use that information to create the memory allocation.
+    * We will then reuse this memory store for all the regions we want to
+    * copy.
+    */
+   VkImage dummy_image;
+   VkImageCreateInfo dummy_info = {
+      .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+      .imageType = VK_IMAGE_TYPE_2D,
+      .format = src_format,
+      .extent = { image->extent.width, image->extent.height, 1 },
+      .mipLevels = 1,
+      .arrayLayers = 1,
+      .samples = VK_SAMPLE_COUNT_1_BIT,
+      .tiling = VK_IMAGE_TILING_OPTIMAL,
+      .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+               VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+      .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+      .queueFamilyIndexCount = 0,
+      .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+   };
+   VkResult result =
+      v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image);
+   if (result != VK_SUCCESS)
+      return handled;
 
-   if (region->bufferImageHeight == 0)
-       buf_height = region->imageExtent.height;
-   else
-       buf_height = region->bufferImageHeight;
+   VkMemoryRequirements reqs;
+   v3dv_GetImageMemoryRequirements(_device, dummy_image, &reqs);
+   v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);
 
-   /* If the image is compressed, the bpp refers to blocks, not pixels */
-   uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
-   uint32_t block_height = vk_format_get_blockheight(image->vk_format);
-   buf_width = buf_width / block_width;
-   buf_height = buf_height / block_height;
+   VkDeviceMemory mem;
+   VkMemoryAllocateInfo alloc_info = {
+      .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+      .allocationSize = reqs.size,
+      .memoryTypeIndex = 0,
+   };
+   result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem);
+   if (result != VK_SUCCESS)
+      return handled;
 
-   /* We should have configured the blit to use a supported format  */
-   bool handled = true;
+   v3dv_cmd_buffer_add_private_obj(
+      cmd_buffer, (uintptr_t)mem,
+      (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
 
-   struct v3dv_device *device = cmd_buffer->device;
-   VkDevice _device = v3dv_device_to_handle(device);
-   for (uint32_t i = 0; i < num_layers; i++) {
-      /* Otherwise, since we can't sample linear images we need to upload the
-       * linear buffer to a tiled image that we can use as a blit source, which
-       * is slow.
-       */
-      VkImageCreateInfo image_info = {
-         .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
-         .imageType = VK_IMAGE_TYPE_2D,
-         .format = src_format,
-         .extent = { buf_width, buf_height, 1 },
-         .mipLevels = 1,
-         .arrayLayers = 1,
-         .samples = VK_SAMPLE_COUNT_1_BIT,
-         .tiling = VK_IMAGE_TILING_OPTIMAL,
-         .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
-                  VK_IMAGE_USAGE_TRANSFER_DST_BIT,
-         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-         .queueFamilyIndexCount = 0,
-         .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-      };
+   /* Obtain the layer count.
+    *
+    * If we are batching (region_count > 1) all our regions have the same
+    * image subresource so we can take this from the first region.
+    */
+   uint32_t num_layers;
+   if (image->type != VK_IMAGE_TYPE_3D)
+      num_layers = regions[0].imageSubresource.layerCount;
+   else
+      num_layers = regions[0].imageExtent.depth;
+   assert(num_layers > 0);
 
-      VkImage buffer_image;
-      VkResult result =
-         v3dv_CreateImage(_device, &image_info, &device->vk.alloc, 
&buffer_image);
-      if (result != VK_SUCCESS)
-         return handled;
+   /* Sanity check: we can only batch multiple regions together if they have
+    * the same framebuffer (so the same layer).
+    */
+   assert(num_layers == 1 || region_count == 1);
 
-      v3dv_cmd_buffer_add_private_obj(
-         cmd_buffer, (uintptr_t)buffer_image,
-         (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
+   const uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
+   const uint32_t block_height = vk_format_get_blockheight(image->vk_format);
 
-      /* Allocate and bind memory for the image */
-      VkDeviceMemory mem;
-      VkMemoryRequirements reqs;
-      v3dv_GetImageMemoryRequirements(_device, buffer_image, &reqs);
-      VkMemoryAllocateInfo alloc_info = {
-         .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
-         .allocationSize = reqs.size,
-         .memoryTypeIndex = 0,
-      };
-      result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, 
&mem);
-      if (result != VK_SUCCESS)
-         return handled;
+   /* Copy regions by uploading each region to a temporary tiled image using
+    * the memory we have just allocated as storage.
+    */
+   for (uint32_t r = 0; r < region_count; r++) {
+      const VkBufferImageCopy *region = &regions[r];
 
-      v3dv_cmd_buffer_add_private_obj(
-         cmd_buffer, (uintptr_t)mem,
-         (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
+      /* Obtain the 2D buffer region spec */
+      uint32_t buf_width, buf_height;
+      if (region->bufferRowLength == 0)
+          buf_width = region->imageExtent.width;
+      else
+          buf_width = region->bufferRowLength;
 
-      result = v3dv_BindImageMemory(_device, buffer_image, mem, 0);
-      if (result != VK_SUCCESS)
-         return handled;
+      if (region->bufferImageHeight == 0)
+          buf_height = region->imageExtent.height;
+      else
+          buf_height = region->bufferImageHeight;
+
+      /* If the image is compressed, the bpp refers to blocks, not pixels */
+      buf_width = buf_width / block_width;
+      buf_height = buf_height / block_height;
+
+      for (uint32_t i = 0; i < num_layers; i++) {
+         /* Create the tiled image */
+         VkImageCreateInfo image_info = {
+            .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+            .imageType = VK_IMAGE_TYPE_2D,
+            .format = src_format,
+            .extent = { buf_width, buf_height, 1 },
+            .mipLevels = 1,
+            .arrayLayers = 1,
+            .samples = VK_SAMPLE_COUNT_1_BIT,
+            .tiling = VK_IMAGE_TILING_OPTIMAL,
+            .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+                     VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+            .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+            .queueFamilyIndexCount = 0,
+            .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+         };
 
-      /* Upload buffer contents for the selected layer */
-      const VkDeviceSize buf_offset_bytes =
-         region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
-      const VkBufferImageCopy buffer_image_copy = {
-         .bufferOffset = buf_offset_bytes,
-         .bufferRowLength = region->bufferRowLength / block_width,
-         .bufferImageHeight = region->bufferImageHeight / block_height,
-         .imageSubresource = {
-            .aspectMask = aspect,
-            .mipLevel = 0,
-            .baseArrayLayer = 0,
-            .layerCount = 1,
-         },
-         .imageOffset = { 0, 0, 0 },
-         .imageExtent = { buf_width, buf_height, 1 }
-      };
-      handled =
-         create_tiled_image_from_buffer(cmd_buffer,
-                                        v3dv_image_from_handle(buffer_image),
-                                        buffer, &buffer_image_copy);
-      if (!handled) {
-         /* This is unexpected, we should have setup the upload to be
-          * conformant to a TFU or TLB copy.
-          */
-         unreachable("Unable to copy buffer to image through TLB");
-         return false;
-      }
+         VkImage buffer_image;
+         VkResult result =
+            v3dv_CreateImage(_device, &image_info, &device->vk.alloc,
+                             &buffer_image);
+         if (result != VK_SUCCESS)
+            return handled;
+
+         v3dv_cmd_buffer_add_private_obj(
+            cmd_buffer, (uintptr_t)buffer_image,
+            (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
+
+         result = v3dv_BindImageMemory(_device, buffer_image, mem, 0);
+         if (result != VK_SUCCESS)
+            return handled;
+
+         /* Upload buffer contents for the selected layer */
+         const VkDeviceSize buf_offset_bytes =
+            region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
+         const VkBufferImageCopy buffer_image_copy = {
+            .bufferOffset = buf_offset_bytes,
+            .bufferRowLength = region->bufferRowLength / block_width,
+            .bufferImageHeight = region->bufferImageHeight / block_height,
+            .imageSubresource = {
+               .aspectMask = aspect,
+               .mipLevel = 0,
+               .baseArrayLayer = 0,
+               .layerCount = 1,
+            },
+            .imageOffset = { 0, 0, 0 },
+            .imageExtent = { buf_width, buf_height, 1 }
+         };
+         handled =
+            create_tiled_image_from_buffer(cmd_buffer,
+                                           
v3dv_image_from_handle(buffer_image),
+                                           buffer, &buffer_image_copy);
+         if (!handled) {
+            /* This is unexpected, we should have setup the upload to be
+             * conformant to a TFU or TLB copy.
+             */
+            unreachable("Unable to copy buffer to image through TLB");
+            return false;
+         }
 
-      /* Blit-copy the requested image extent from the buffer image to the
-       * destination image.
-       *
-       * Since we are copying, the blit must use the same format on the
-       * destination and source images to avoid format conversions. The
-       * only exception is copying stencil, which we upload to a R8UI source
-       * image, but that we need to blit to a S8D24 destination (the only
-       * stencil format we support).
-       */
-      const VkImageBlit blit_region = {
-         .srcSubresource = {
-            .aspectMask = aspect,
-            .mipLevel = 0,
-            .baseArrayLayer = 0,
-            .layerCount = 1,
-         },
-         .srcOffsets = {
-            { 0, 0, 0 },
-            { region->imageExtent.width, region->imageExtent.height, 1 },
-         },
-         .dstSubresource = {
-            .aspectMask = aspect,
-            .mipLevel = region->imageSubresource.mipLevel,
-            .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
-            .layerCount = 1,
-         },
-         .dstOffsets = {
-            {
-               DIV_ROUND_UP(region->imageOffset.x, block_width),
-               DIV_ROUND_UP(region->imageOffset.y, block_height),
-               region->imageOffset.z + i,
+         /* Blit-copy the requested image extent from the buffer image to the
+          * destination image.
+          *
+          * Since we are copying, the blit must use the same format on the
+          * destination and source images to avoid format conversions. The
+          * only exception is copying stencil, which we upload to a R8UI source
+          * image, but that we need to blit to a S8D24 destination (the only
+          * stencil format we support).
+          */
+         const VkImageBlit blit_region = {
+            .srcSubresource = {
+               .aspectMask = aspect,
+               .mipLevel = 0,
+               .baseArrayLayer = 0,
+               .layerCount = 1,
             },
-            {
-               DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
-                            block_width),
-               DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
-                            block_height),
-               region->imageOffset.z + i + 1,
+            .srcOffsets = {
+               { 0, 0, 0 },
+               { region->imageExtent.width, region->imageExtent.height, 1 },
             },
-         },
-      };
+            .dstSubresource = {
+               .aspectMask = aspect,
+               .mipLevel = region->imageSubresource.mipLevel,
+               .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
+               .layerCount = 1,
+            },
+            .dstOffsets = {
+               {
+                  DIV_ROUND_UP(region->imageOffset.x, block_width),
+                  DIV_ROUND_UP(region->imageOffset.y, block_height),
+                  region->imageOffset.z + i,
+               },
+               {
+                  DIV_ROUND_UP(region->imageOffset.x + 
region->imageExtent.width,
+                               block_width),
+                  DIV_ROUND_UP(region->imageOffset.y + 
region->imageExtent.height,
+                               block_height),
+                  region->imageOffset.z + i + 1,
+               },
+            },
+         };
 
-      handled = blit_shader(cmd_buffer,
-                            image, dst_format,
-                            v3dv_image_from_handle(buffer_image), src_format,
-                            cmask, NULL,
-                            &blit_region, VK_FILTER_NEAREST, true);
-      if (!handled) {
-         /* This is unexpected, we should have a supported blit spec */
-         unreachable("Unable to blit buffer to destination image");
-         return false;
+         handled = blit_shader(cmd_buffer,
+                               image, dst_format,
+                               v3dv_image_from_handle(buffer_image), 
src_format,
+                               cmask, NULL,
+                               &blit_region, VK_FILTER_NEAREST, true);
+         if (!handled) {
+            /* This is unexpected, we should have a supported blit spec */
+            unreachable("Unable to blit buffer to destination image");
+            return false;
+         }
       }
    }
 
@@ -3777,9 +3829,6 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer 
*cmd_buffer,
                             const VkBufferImageCopy *regions,
                             bool use_texel_buffer)
 {
-   /* FIXME: we only support batching on the texel buffer path for now */
-   assert(region_count == 1 || use_texel_buffer);
-
    /* We can only call this with region_count > 1 if we can batch the regions
     * together, in which case they share the same image subresource, and so
     * the same aspect.
@@ -3900,16 +3949,10 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer 
*cmd_buffer,
       if (memcmp(&cswizzle, &ident_swizzle, sizeof(cswizzle)))
          return false;
 
-      bool handled = true;
-      for (uint32_t i = 0; i < region_count; i++) {
-         handled = copy_buffer_to_image_blit(cmd_buffer, aspect, image,
-                                             dst_format, src_format,
-                                             buffer, buf_bpp,
-                                             cmask, &regions[i]);
-         if (!handled)
-            break;
-      }
-      return handled;
+      return copy_buffer_to_image_blit(cmd_buffer, aspect, image,
+                                       dst_format, src_format,
+                                       buffer, buf_bpp, cmask,
+                                       region_count, regions);
    }
 }
 
@@ -3994,7 +4037,6 @@ v3dv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
    assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
 
    uint32_t r = 0;
-
    while (r < regionCount) {
       /* The TFU and TLB paths can only copy one region at a time and the 
region
        * needs to start at the origin. We try these first for the common case
@@ -4033,13 +4075,13 @@ v3dv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
        * slow it might not be worth it and we should instead put more effort
        * in handling more cases with the other paths.
        */
-      batch_size = 1;
-
-      if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer, &pRegions[r]))
+      if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer, &pRegions[r])) {
+         batch_size = 1;
          goto handled;
+      }
 
       if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
-                                      1, &pRegions[r], false)) {
+                                      batch_size, &pRegions[r], false)) {
          goto handled;
       }
 

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): v3dv: batch copies in the copy_buffer_to_image_blit path

Reply via email to