Re: [Mesa-dev] [PATCH] radv: make sure CP DMA is idle at the end of IBs
On 06/22/2018 05:37 PM, Bas Nieuwenhuizen wrote: I'm wondering whether we need to do this more often in pipeline barriers? Yeah, you are right. Also I'd really appreciate if you could add a check to see if there is any CP DMA activity in the first place. Sure, will do. Thanks for your feedback. On Thu, Jun 21, 2018 at 11:04 AM, Samuel Pitoiset wrote: Ported from RadeonSI. This might fix some synchronization issues. I don't know if that will affect performance. CC: Signed-off-by: Samuel Pitoiset --- src/amd/vulkan/radv_cmd_buffer.c | 6 ++ src/amd/vulkan/radv_private.h| 2 ++ src/amd/vulkan/si_cmd_buffer.c | 12 +++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 8bd41bc41ac..a8ab4d3b977 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2585,6 +2585,12 @@ VkResult radv_EndCommandBuffer( si_emit_cache_flush(cmd_buffer); } + /* Make sure CP DMA is idle at the end of IBs after L2 prefetches +* because the kernel doesn't wait for it. +*/ + if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) + si_cp_dma_wait_for_idle(cmd_buffer); + vk_free(_buffer->pool->alloc, cmd_buffer->state.attachments); if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs)) diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index c77a8b297f8..b0ecb626d4c 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1088,6 +1088,8 @@ void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size); void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value); +void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer); + void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer); bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index e350bccae33..a5ee79a9e72 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -1034,7 +1034,6 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs = cmd_buffer->cs; uint32_t header = 0, command = 0; - assert(size); assert(size <= cp_dma_max_byte_count(cmd_buffer)); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9); @@ -1237,6 +1236,17 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, } } +void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer) +{ + /* Issue a dummy DMA that copies zero bytes. +* +* The DMA engine will see that there's no work to do and skip this +* DMA request, however, the CP will see the sync flag and still wait +* for all DMAs to complete. +*/ + si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC); +} + /* For MSAA sample positions. */ #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \ -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radv: make sure CP DMA is idle at the end of IBs
I'm wondering whether we need to do this more often in pipeline barriers? Also I'd really appreciate if you could add a check to see if there is any CP DMA activity in the first place. On Thu, Jun 21, 2018 at 11:04 AM, Samuel Pitoiset wrote: > Ported from RadeonSI. > This might fix some synchronization issues. > > I don't know if that will affect performance. > > CC: > Signed-off-by: Samuel Pitoiset > --- > src/amd/vulkan/radv_cmd_buffer.c | 6 ++ > src/amd/vulkan/radv_private.h| 2 ++ > src/amd/vulkan/si_cmd_buffer.c | 12 +++- > 3 files changed, 19 insertions(+), 1 deletion(-) > > diff --git a/src/amd/vulkan/radv_cmd_buffer.c > b/src/amd/vulkan/radv_cmd_buffer.c > index 8bd41bc41ac..a8ab4d3b977 100644 > --- a/src/amd/vulkan/radv_cmd_buffer.c > +++ b/src/amd/vulkan/radv_cmd_buffer.c > @@ -2585,6 +2585,12 @@ VkResult radv_EndCommandBuffer( > si_emit_cache_flush(cmd_buffer); > } > > + /* Make sure CP DMA is idle at the end of IBs after L2 prefetches > +* because the kernel doesn't wait for it. > +*/ > + if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) > + si_cp_dma_wait_for_idle(cmd_buffer); > + > vk_free(_buffer->pool->alloc, cmd_buffer->state.attachments); > > if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs)) > diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h > index c77a8b297f8..b0ecb626d4c 100644 > --- a/src/amd/vulkan/radv_private.h > +++ b/src/amd/vulkan/radv_private.h > @@ -1088,6 +1088,8 @@ void si_cp_dma_prefetch(struct radv_cmd_buffer > *cmd_buffer, uint64_t va, > unsigned size); > void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, > uint64_t size, unsigned value); > +void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer); > + > void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer); > bool > radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, > diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c > index e350bccae33..a5ee79a9e72 100644 > --- a/src/amd/vulkan/si_cmd_buffer.c > +++ b/src/amd/vulkan/si_cmd_buffer.c > @@ -1034,7 +1034,6 @@ static void si_emit_cp_dma(struct radv_cmd_buffer > *cmd_buffer, > struct radeon_cmdbuf *cs = cmd_buffer->cs; > uint32_t header = 0, command = 0; > > - assert(size); > assert(size <= cp_dma_max_byte_count(cmd_buffer)); > > radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9); > @@ -1237,6 +1236,17 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer > *cmd_buffer, uint64_t va, > } > } > > +void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer) > +{ > + /* Issue a dummy DMA that copies zero bytes. > +* > +* The DMA engine will see that there's no work to do and skip this > +* DMA request, however, the CP will see the sync flag and still wait > +* for all DMAs to complete. > +*/ > + si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC); > +} > + > /* For MSAA sample positions. */ > #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ > (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \ > -- > 2.17.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radv: make sure CP DMA is idle at the end of IBs
Ported from RadeonSI. This might fix some synchronization issues. I don't know if that will affect performance. CC: Signed-off-by: Samuel Pitoiset --- src/amd/vulkan/radv_cmd_buffer.c | 6 ++ src/amd/vulkan/radv_private.h| 2 ++ src/amd/vulkan/si_cmd_buffer.c | 12 +++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 8bd41bc41ac..a8ab4d3b977 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2585,6 +2585,12 @@ VkResult radv_EndCommandBuffer( si_emit_cache_flush(cmd_buffer); } + /* Make sure CP DMA is idle at the end of IBs after L2 prefetches +* because the kernel doesn't wait for it. +*/ + if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) + si_cp_dma_wait_for_idle(cmd_buffer); + vk_free(_buffer->pool->alloc, cmd_buffer->state.attachments); if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs)) diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index c77a8b297f8..b0ecb626d4c 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1088,6 +1088,8 @@ void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size); void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value); +void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer); + void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer); bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index e350bccae33..a5ee79a9e72 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -1034,7 +1034,6 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs = cmd_buffer->cs; uint32_t header = 0, command = 0; - assert(size); assert(size <= cp_dma_max_byte_count(cmd_buffer)); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9); @@ -1237,6 +1236,17 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, } } +void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer) +{ + /* Issue a dummy DMA that copies zero bytes. +* +* The DMA engine will see that there's no work to do and skip this +* DMA request, however, the CP will see the sync flag and still wait +* for all DMAs to complete. +*/ + si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC); +} + /* For MSAA sample positions. */ #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \ -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev