On Sun, Oct 22, 2017 at 9:07 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > --- > src/gallium/drivers/radeonsi/si_fence.c | 83 > ++++++++++++++++++++++++++++++++- > 1 file changed, 82 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/radeonsi/si_fence.c > b/src/gallium/drivers/radeonsi/si_fence.c > index b7b02b55831..bc0ae302945 100644 > --- a/src/gallium/drivers/radeonsi/si_fence.c > +++ b/src/gallium/drivers/radeonsi/si_fence.c > @@ -22,33 +22,41 @@ > * > */ > > #include <libsync.h> > > #include "util/os_time.h" > #include "util/u_memory.h" > #include "util/u_queue.h" > > #include "si_pipe.h" > +#include "radeon/r600_cs.h" > + > +struct si_fine_fence { > + struct r600_resource *buf; > + unsigned offset; > +}; > > struct si_multi_fence { > struct pipe_reference reference; > struct pipe_fence_handle *gfx; > struct pipe_fence_handle *sdma; > struct tc_unflushed_batch_token *tc_token; > struct util_queue_fence ready; > > /* If the context wasn't flushed at fence creation, this is non-NULL. > */ > struct { > struct r600_common_context *ctx; > unsigned ib_index; > } gfx_unflushed; > + > + struct si_fine_fence fine; > }; > > static void si_add_fence_dependency(struct r600_common_context *rctx, > struct pipe_fence_handle *fence) > { > struct radeon_winsys *ws = rctx->ws; > > if (rctx->dma.cs) > ws->cs_add_fence_dependency(rctx->dma.cs, fence); > ws->cs_add_fence_dependency(rctx->gfx.cs, fence); > @@ -59,20 +67,21 @@ static void si_fence_reference(struct pipe_screen *screen, > struct pipe_fence_handle *src) > { > struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws; > struct si_multi_fence **rdst = (struct si_multi_fence **)dst; > struct si_multi_fence *rsrc = (struct si_multi_fence *)src; > > if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) { > ws->fence_reference(&(*rdst)->gfx, NULL); > ws->fence_reference(&(*rdst)->sdma, NULL); > tc_unflushed_batch_token_reference(&(*rdst)->tc_token, NULL); > + r600_resource_reference(&(*rdst)->fine.buf, NULL); > FREE(*rdst); > } > *rdst = rsrc; > } > > static struct si_multi_fence *si_create_multi_fence() > { > struct si_multi_fence *fence = CALLOC_STRUCT(si_multi_fence); > if (!fence) > return NULL; > @@ -132,20 +141,66 @@ static void si_fence_server_sync(struct pipe_context > *ctx, > * this fence dependency is signalled. > * > * Should we flush the context to allow more GPU parallelism? > */ > if (rfence->sdma) > si_add_fence_dependency(rctx, rfence->sdma); > if (rfence->gfx) > si_add_fence_dependency(rctx, rfence->gfx); > } > > +static bool si_fine_fence_signaled(struct radeon_winsys *rws, > + const struct si_fine_fence *fine) > +{ > + char *map = rws->buffer_map(fine->buf->buf, NULL, PIPE_TRANSFER_READ | > + > PIPE_TRANSFER_UNSYNCHRONIZED); > + if (!map) > + return false; > + > + uint32_t *fence = (uint32_t*)(map + fine->offset); > + return *fence != 0;
I think this is not coherent on GFX9 if MTYPE != UC. MTYPE is set by the GEM_CREATE ioctl. > +} > + > +static void si_fine_fence_set(struct si_context *ctx, > + struct si_fine_fence *fine, > + unsigned flags) > +{ > + assert(util_bitcount(flags & (PIPE_FLUSH_TOP_OF_PIPE | > PIPE_FLUSH_BOTTOM_OF_PIPE)) == 1); > + > + u_suballocator_alloc(ctx->b.allocator_zeroed_memory, 4, 4, > + &fine->offset, (struct pipe_resource > **)&fine->buf); > + if (!fine->buf) > + return; > + > + uint64_t fence_va = fine->buf->gpu_address + fine->offset; > + > + radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx, fine->buf, > + RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); > + if (flags & PIPE_FLUSH_TOP_OF_PIPE) { > + struct radeon_winsys_cs *cs = ctx->b.gfx.cs; > + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); > + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | > + S_370_WR_CONFIRM(1) | > + S_370_ENGINE_SEL(V_370_PFP)); > + radeon_emit(cs, fence_va); > + radeon_emit(cs, fence_va >> 32); > + radeon_emit(cs, 0x80000000); > + } else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) { > + si_gfx_write_event_eop(&ctx->b, V_028A90_BOTTOM_OF_PIPE_TS, 0, > + EOP_DATA_SEL_VALUE_32BIT, > + NULL, fence_va, 0x80000000, > + PIPE_QUERY_GPU_FINISHED); What's the difference between this and the winsys fence? The winsys fence is also a bottom-of-pipe fence. I don't see much difference between them because PIPE_FLUSH_BOTTOM_OF_PIPE doesn't prevent IB submission, so you'll always get a bottom-of-pipe fence in the winsys. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev