Module: Mesa
Branch: main
Commit: 74b083ef75e45dbe164d2351495a1d6a54476d16
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=74b083ef75e45dbe164d2351495a1d6a54476d16

Author: Faith Ekstrand <faith.ekstr...@collabora.com>
Date:   Thu Nov 30 23:26:07 2023 -0600

nvk: Use the copy engine for CmdFillBuffer

The copy engine is a lot simpler than the 2D engine.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26441>

---

 src/nouveau/vulkan/nvk_cmd_copy.c | 107 +++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 58 deletions(-)

diff --git a/src/nouveau/vulkan/nvk_cmd_copy.c 
b/src/nouveau/vulkan/nvk_cmd_copy.c
index f01d1d088a1..0a8e88caeaa 100644
--- a/src/nouveau/vulkan/nvk_cmd_copy.c
+++ b/src/nouveau/vulkan/nvk_cmd_copy.c
@@ -635,79 +635,70 @@ VKAPI_ATTR void VKAPI_CALL
 nvk_CmdFillBuffer(VkCommandBuffer commandBuffer,
                   VkBuffer dstBuffer,
                   VkDeviceSize dstOffset,
-                  VkDeviceSize fillSize,
+                  VkDeviceSize size,
                   uint32_t data)
 {
    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
-   VK_FROM_HANDLE(nvk_buffer, dst, dstBuffer);
-
-   fillSize = vk_buffer_range(&dst->vk, dstOffset, fillSize);
-
-   VkDeviceSize dst_addr = nvk_buffer_address(dst, 0);
-   VkDeviceSize start = dstOffset / 4;
-   VkDeviceSize end = start + fillSize / 4;
-
-   /* Pascal could do 1 << 19, but previous gens need lower pitches */
-   uint32_t pitch = 1 << 18;
-   uint32_t line = pitch / 4;
-
-   struct nv_push *p = nvk_cmd_buffer_push(cmd, 33);
+   VK_FROM_HANDLE(nvk_buffer, dst_buffer, dstBuffer);
 
-   P_IMMD(p, NV902D, SET_OPERATION, V_SRCCOPY);
+   uint64_t dst_addr = nvk_buffer_address(dst_buffer, dstOffset);
+   size = vk_buffer_range(&dst_buffer->vk, dstOffset, size);
 
-   P_MTHD(p, NV902D, SET_DST_FORMAT);
-   P_NV902D_SET_DST_FORMAT(p, V_A8B8G8R8);
-   P_NV902D_SET_DST_MEMORY_LAYOUT(p, V_PITCH);
+   uint32_t max_dim = 1 << 15;
 
-   P_MTHD(p, NV902D, SET_DST_PITCH);
-   P_NV902D_SET_DST_PITCH(p, pitch);
+   struct nv_push *p = nvk_cmd_buffer_push(cmd, 7);
 
-   P_MTHD(p, NV902D, SET_DST_OFFSET_UPPER);
-   P_NV902D_SET_DST_OFFSET_UPPER(p, dst_addr >> 32);
-   P_NV902D_SET_DST_OFFSET_LOWER(p, dst_addr & 0xffffffff);
-
-   P_MTHD(p, NV902D, RENDER_SOLID_PRIM_MODE);
-   P_NV902D_RENDER_SOLID_PRIM_MODE(p, V_LINES);
-   P_NV902D_SET_RENDER_SOLID_PRIM_COLOR_FORMAT(p, V_A8B8G8R8);
-   P_NV902D_SET_RENDER_SOLID_PRIM_COLOR(p, data);
+   P_IMMD(p, NV90B5, SET_REMAP_CONST_A, data);
+   P_IMMD(p, NV90B5, SET_REMAP_COMPONENTS, {
+      .dst_x = DST_X_CONST_A,
+      .dst_y = DST_Y_CONST_A,
+      .dst_z = DST_Z_CONST_A,
+      .dst_w = DST_W_CONST_A,
+      .component_size = COMPONENT_SIZE_FOUR,
+      .num_src_components = NUM_SRC_COMPONENTS_ONE,
+      .num_dst_components = NUM_DST_COMPONENTS_ONE,
+   });
 
-   /*
-    * In order to support CPU efficient fills, we'll draw up to three 
primitives:
-    *   1. rest of the first line
-    *   2. a rect filling up the space between the start and end
-    *   3. begining of last line
-    */
+   P_MTHD(p, NV90B5, PITCH_IN);
+   P_NV90B5_PITCH_IN(p, max_dim * 4);
+   P_NV90B5_PITCH_OUT(p, max_dim * 4);
 
-   uint32_t y_0 = start / line;
-   uint32_t y_1 = end / line;
+   while (size >= 4) {
+      struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
 
-   uint32_t x_0 = start % line;
-   uint32_t x_1 = end % line;
+      P_MTHD(p, NV90B5, OFFSET_OUT_UPPER);
+      P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
+      P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
 
-   P_MTHD(p, NV902D, RENDER_SOLID_PRIM_POINT_SET_X(0));
-   P_NV902D_RENDER_SOLID_PRIM_POINT_SET_X(p, 0, x_0);
-   P_NV902D_RENDER_SOLID_PRIM_POINT_Y(p, 0, y_0);
-   P_NV902D_RENDER_SOLID_PRIM_POINT_SET_X(p, 1, y_0 == y_1 ? x_1 : line);
-   P_NV902D_RENDER_SOLID_PRIM_POINT_Y(p, 1, y_0);
+      uint64_t width, height;
+      if (size >= (uint64_t)max_dim * (uint64_t)max_dim * 4) {
+         width = height = max_dim;
+      } else if (size >= max_dim * 4) {
+         width = max_dim;
+         height = size / (max_dim * 4);
+      } else {
+         width = size / 4;
+         height = 1;
+      }
 
-   if (y_0 + 1 < y_1) {
-      P_IMMD(p, NV902D, RENDER_SOLID_PRIM_MODE, V_RECTS);
+      uint64_t dma_size = (uint64_t)width * (uint64_t)height * 4;
+      assert(dma_size <= size);
 
-      P_MTHD(p, NV902D, RENDER_SOLID_PRIM_POINT_SET_X(0));
-      P_NV902D_RENDER_SOLID_PRIM_POINT_SET_X(p, 0, 0);
-      P_NV902D_RENDER_SOLID_PRIM_POINT_Y(p, 0, y_0 + 1);
-      P_NV902D_RENDER_SOLID_PRIM_POINT_SET_X(p, 1, line);
-      P_NV902D_RENDER_SOLID_PRIM_POINT_Y(p, 1, y_1);
+      P_MTHD(p, NV90B5, LINE_LENGTH_IN);
+      P_NV90B5_LINE_LENGTH_IN(p, width);
+      P_NV90B5_LINE_COUNT(p, height);
 
-      P_IMMD(p, NV902D, RENDER_SOLID_PRIM_MODE, V_LINES);
-   }
+      P_IMMD(p, NV90B5, LAUNCH_DMA, {
+         .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
+         .multi_line_enable = height > 1,
+         .flush_enable = FLUSH_ENABLE_TRUE,
+         .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
+         .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
+         .remap_enable = REMAP_ENABLE_TRUE,
+      });
 
-   if (y_0 < y_1) {
-      P_MTHD(p, NV902D, RENDER_SOLID_PRIM_POINT_SET_X(0));
-      P_NV902D_RENDER_SOLID_PRIM_POINT_SET_X(p, 0, 0);
-      P_NV902D_RENDER_SOLID_PRIM_POINT_Y(p, 0, y_1);
-      P_NV902D_RENDER_SOLID_PRIM_POINT_SET_X(p, 1, x_1);
-      P_NV902D_RENDER_SOLID_PRIM_POINT_Y(p, 1, y_1);
+      dst_addr += dma_size;
+      size -= dma_size;
    }
 }
 

Reply via email to