We'll be performing a GPU memcpy in more places to copy small amounts of data. Add a path that thrashes less state.
v2: - Make a new function (Jason Ekstrand) - Move the #define into the function Signed-off-by: Nanley Chery <nanley.g.ch...@intel.com> --- src/intel/vulkan/anv_genX.h | 5 +++++ src/intel/vulkan/genX_gpu_memcpy.c | 41 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 7b1487b543..5cab31234b 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -64,6 +64,11 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, VkShaderStageFlags active_stages, const unsigned entry_size[4]); +void genX(cmd_buffer_mem_mem_cpy)(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *dst, uint32_t dst_offset, + struct anv_bo *src, uint32_t src_offset, + uint32_t size); + void genX(cmd_buffer_streamout_cpy)(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *dst, uint32_t dst_offset, struct anv_bo *src, uint32_t src_offset, diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index 3f79ff7e7d..c8169ecd41 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -51,6 +51,47 @@ gcd_pow2_u64(uint64_t a, uint64_t b) return 1 << MIN2(a_log2, b_log2); } +/* Performs a memcpy operation without thrashing the pipeline state. */ +void +genX(cmd_buffer_mem_mem_cpy)(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *dst, uint32_t dst_offset, + struct anv_bo *src, uint32_t src_offset, + uint32_t size) +{ + /* This memcpy expects dword aligned memory. */ + assert(size % 4 == 0); + assert(dst_offset % 4 == 0); + assert(src_offset % 4 == 0); + + for (uint32_t i = 0; i < size; i += 4) { + const struct anv_address src_addr = + (struct anv_address) { src, src_offset + i}; + const struct anv_address dst_addr = + (struct anv_address) { dst, dst_offset + i}; +#if GEN_GEN >= 8 + anv_batch_emit(&cmd_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) { + cp.DestinationMemoryAddress = dst_addr; + cp.SourceMemoryAddress = src_addr; + } +#else + /* IVB does not have a general purpose register for command streamer + * commands. Therefore, we use an alternate temporary register. + */ +#define TEMP_REG 0x2400 /* MI_PREDICATE_SRC0 */ + anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), load) { + load.RegisterAddress = TEMP_REG; + load.MemoryAddress = src_addr; + } + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), store) { + store.RegisterAddress = TEMP_REG; + store.MemoryAddress = dst_addr; + } +#undef TEMP_REG +#endif + } + return; +} + void genX(cmd_buffer_streamout_cpy)(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *dst, uint32_t dst_offset, -- 2.12.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev