Module: Mesa Branch: main Commit: 74b083ef75e45dbe164d2351495a1d6a54476d16 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=74b083ef75e45dbe164d2351495a1d6a54476d16
Author: Faith Ekstrand <faith.ekstr...@collabora.com> Date: Thu Nov 30 23:26:07 2023 -0600 nvk: Use the copy engine for CmdFillBuffer The copy engine is a lot simpler than the 2D engine. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26441> --- src/nouveau/vulkan/nvk_cmd_copy.c | 107 +++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 58 deletions(-) diff --git a/src/nouveau/vulkan/nvk_cmd_copy.c b/src/nouveau/vulkan/nvk_cmd_copy.c index f01d1d088a1..0a8e88caeaa 100644 --- a/src/nouveau/vulkan/nvk_cmd_copy.c +++ b/src/nouveau/vulkan/nvk_cmd_copy.c @@ -635,79 +635,70 @@ VKAPI_ATTR void VKAPI_CALL nvk_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, - VkDeviceSize fillSize, + VkDeviceSize size, uint32_t data) { VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); - VK_FROM_HANDLE(nvk_buffer, dst, dstBuffer); - - fillSize = vk_buffer_range(&dst->vk, dstOffset, fillSize); - - VkDeviceSize dst_addr = nvk_buffer_address(dst, 0); - VkDeviceSize start = dstOffset / 4; - VkDeviceSize end = start + fillSize / 4; - - /* Pascal could do 1 << 19, but previous gens need lower pitches */ - uint32_t pitch = 1 << 18; - uint32_t line = pitch / 4; - - struct nv_push *p = nvk_cmd_buffer_push(cmd, 33); + VK_FROM_HANDLE(nvk_buffer, dst_buffer, dstBuffer); - P_IMMD(p, NV902D, SET_OPERATION, V_SRCCOPY); + uint64_t dst_addr = nvk_buffer_address(dst_buffer, dstOffset); + size = vk_buffer_range(&dst_buffer->vk, dstOffset, size); - P_MTHD(p, NV902D, SET_DST_FORMAT); - P_NV902D_SET_DST_FORMAT(p, V_A8B8G8R8); - P_NV902D_SET_DST_MEMORY_LAYOUT(p, V_PITCH); + uint32_t max_dim = 1 << 15; - P_MTHD(p, NV902D, SET_DST_PITCH); - P_NV902D_SET_DST_PITCH(p, pitch); + struct nv_push *p = nvk_cmd_buffer_push(cmd, 7); - P_MTHD(p, NV902D, SET_DST_OFFSET_UPPER); - P_NV902D_SET_DST_OFFSET_UPPER(p, dst_addr >> 32); - P_NV902D_SET_DST_OFFSET_LOWER(p, dst_addr & 0xffffffff); - - P_MTHD(p, NV902D, RENDER_SOLID_PRIM_MODE); - P_NV902D_RENDER_SOLID_PRIM_MODE(p, V_LINES); - P_NV902D_SET_RENDER_SOLID_PRIM_COLOR_FORMAT(p, V_A8B8G8R8); - P_NV902D_SET_RENDER_SOLID_PRIM_COLOR(p, data); + P_IMMD(p, NV90B5, SET_REMAP_CONST_A, data); + P_IMMD(p, NV90B5, SET_REMAP_COMPONENTS, { + .dst_x = DST_X_CONST_A, + .dst_y = DST_Y_CONST_A, + .dst_z = DST_Z_CONST_A, + .dst_w = DST_W_CONST_A, + .component_size = COMPONENT_SIZE_FOUR, + .num_src_components = NUM_SRC_COMPONENTS_ONE, + .num_dst_components = NUM_DST_COMPONENTS_ONE, + }); - /* - * In order to support CPU efficient fills, we'll draw up to three primitives: - * 1. rest of the first line - * 2. a rect filling up the space between the start and end - * 3. begining of last line - */ + P_MTHD(p, NV90B5, PITCH_IN); + P_NV90B5_PITCH_IN(p, max_dim * 4); + P_NV90B5_PITCH_OUT(p, max_dim * 4); - uint32_t y_0 = start / line; - uint32_t y_1 = end / line; + while (size >= 4) { + struct nv_push *p = nvk_cmd_buffer_push(cmd, 8); - uint32_t x_0 = start % line; - uint32_t x_1 = end % line; + P_MTHD(p, NV90B5, OFFSET_OUT_UPPER); + P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32); + P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff); - P_MTHD(p, NV902D, RENDER_SOLID_PRIM_POINT_SET_X(0)); - P_NV902D_RENDER_SOLID_PRIM_POINT_SET_X(p, 0, x_0); - P_NV902D_RENDER_SOLID_PRIM_POINT_Y(p, 0, y_0); - P_NV902D_RENDER_SOLID_PRIM_POINT_SET_X(p, 1, y_0 == y_1 ? x_1 : line); - P_NV902D_RENDER_SOLID_PRIM_POINT_Y(p, 1, y_0); + uint64_t width, height; + if (size >= (uint64_t)max_dim * (uint64_t)max_dim * 4) { + width = height = max_dim; + } else if (size >= max_dim * 4) { + width = max_dim; + height = size / (max_dim * 4); + } else { + width = size / 4; + height = 1; + } - if (y_0 + 1 < y_1) { - P_IMMD(p, NV902D, RENDER_SOLID_PRIM_MODE, V_RECTS); + uint64_t dma_size = (uint64_t)width * (uint64_t)height * 4; + assert(dma_size <= size); - P_MTHD(p, NV902D, RENDER_SOLID_PRIM_POINT_SET_X(0)); - P_NV902D_RENDER_SOLID_PRIM_POINT_SET_X(p, 0, 0); - P_NV902D_RENDER_SOLID_PRIM_POINT_Y(p, 0, y_0 + 1); - P_NV902D_RENDER_SOLID_PRIM_POINT_SET_X(p, 1, line); - P_NV902D_RENDER_SOLID_PRIM_POINT_Y(p, 1, y_1); + P_MTHD(p, NV90B5, LINE_LENGTH_IN); + P_NV90B5_LINE_LENGTH_IN(p, width); + P_NV90B5_LINE_COUNT(p, height); - P_IMMD(p, NV902D, RENDER_SOLID_PRIM_MODE, V_LINES); - } + P_IMMD(p, NV90B5, LAUNCH_DMA, { + .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED, + .multi_line_enable = height > 1, + .flush_enable = FLUSH_ENABLE_TRUE, + .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH, + .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH, + .remap_enable = REMAP_ENABLE_TRUE, + }); - if (y_0 < y_1) { - P_MTHD(p, NV902D, RENDER_SOLID_PRIM_POINT_SET_X(0)); - P_NV902D_RENDER_SOLID_PRIM_POINT_SET_X(p, 0, 0); - P_NV902D_RENDER_SOLID_PRIM_POINT_Y(p, 0, y_1); - P_NV902D_RENDER_SOLID_PRIM_POINT_SET_X(p, 1, x_1); - P_NV902D_RENDER_SOLID_PRIM_POINT_Y(p, 1, y_1); + dst_addr += dma_size; + size -= dma_size; } }