Reviewed-By: George Kyriazis <george.kyria...@intel.com>
> -----Original Message----- > From: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] On > Behalf Of BruceCherniak > Sent: Thursday, April 28, 2016 12:13 PM > To: mesa-dev@lists.freedesktop.org > Subject: [Mesa-dev] [PATCH] swr: Remove stall waiting for core query > counters. > > When gathering query results, swr_gather_stats was > unnecessarily stalling the entire pipeline. Results are now > collected asynchronously, with a fence marking completion. > --- > src/gallium/drivers/swr/swr_fence.cpp | 6 - > src/gallium/drivers/swr/swr_fence.h | 8 ++ > src/gallium/drivers/swr/swr_query.cpp | 180 ++++++++++++------------------ > --- > src/gallium/drivers/swr/swr_query.h | 11 ++- > 4 files changed, 81 insertions(+), 124 deletions(-) > > diff --git a/src/gallium/drivers/swr/swr_fence.cpp > b/src/gallium/drivers/swr/swr_fence.cpp > index 2e95b39..8a8e864 100644 > --- a/src/gallium/drivers/swr/swr_fence.cpp > +++ b/src/gallium/drivers/swr/swr_fence.cpp > @@ -105,12 +105,6 @@ swr_fence_reference(struct pipe_screen *screen, > swr_fence_destroy(old); > } > > -static INLINE boolean > -swr_is_fence_done(struct pipe_fence_handle *fence_handle) > -{ > - struct swr_fence *fence = swr_fence(fence_handle); > - return (fence->read == fence->write); > -} > > /* > * Wait for the fence to finish. > diff --git a/src/gallium/drivers/swr/swr_fence.h > b/src/gallium/drivers/swr/swr_fence.h > index df3776e..47f4d2e 100644 > --- a/src/gallium/drivers/swr/swr_fence.h > +++ b/src/gallium/drivers/swr/swr_fence.h > @@ -45,6 +45,14 @@ swr_fence(struct pipe_fence_handle *fence) > return (struct swr_fence *)fence; > } > > + > +static INLINE boolean > +swr_is_fence_done(struct pipe_fence_handle *fence_handle) > +{ > + struct swr_fence *fence = swr_fence(fence_handle); > + return (fence->read == fence->write); > +} > + > static INLINE boolean > swr_is_fence_pending(struct pipe_fence_handle *fence_handle) > { > diff --git a/src/gallium/drivers/swr/swr_query.cpp > b/src/gallium/drivers/swr/swr_query.cpp > index f038a6e..5c59965 100644 > --- a/src/gallium/drivers/swr/swr_query.cpp > +++ b/src/gallium/drivers/swr/swr_query.cpp > @@ -62,10 +62,8 @@ swr_destroy_query(struct pipe_context *pipe, struct > pipe_query *q) > struct swr_query *pq = swr_query(q); > > if (pq->fence) { > - if (!swr_is_fence_pending(pq->fence)) { > - swr_fence_submit(swr_context(pipe), pq->fence); > + if (swr_is_fence_pending(pq->fence)) > swr_fence_finish(pipe->screen, pq->fence, 0); > - } > swr_fence_reference(pipe->screen, &pq->fence, NULL); > } > > @@ -73,100 +71,45 @@ swr_destroy_query(struct pipe_context *pipe, > struct pipe_query *q) > } > > > -// XXX Create a fence callback, rather than stalling SwrWaitForIdle > static void > swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq) > { > struct swr_context *ctx = swr_context(pipe); > > assert(pq->result); > - union pipe_query_result *result = pq->result; > + struct swr_query_result *result = pq->result; > boolean enable_stats = pq->enable_stats; > - SWR_STATS swr_stats = {0}; > - > - if (pq->fence) { > - if (!swr_is_fence_pending(pq->fence)) { > - swr_fence_submit(ctx, pq->fence); > - swr_fence_finish(pipe->screen, pq->fence, 0); > - } > - swr_fence_reference(pipe->screen, &pq->fence, NULL); > - } > > - /* > - * These queries don't need SWR Stats enabled in the core > - * Set and return. > - */ > + /* A few results don't require the core, so don't involve it */ > switch (pq->type) { > case PIPE_QUERY_TIMESTAMP: > case PIPE_QUERY_TIME_ELAPSED: > - result->u64 = swr_get_timestamp(pipe->screen); > - return; > + result->timestamp = swr_get_timestamp(pipe->screen); > break; > case PIPE_QUERY_TIMESTAMP_DISJOINT: > - /* nothing to do here */ > - return; > - break; > case PIPE_QUERY_GPU_FINISHED: > - result->b = TRUE; /* XXX TODO Add an api func to SWR to compare > drawId > - vs LastRetiredId? */ > - return; > + /* nothing to do here */ > break; > default: > - /* Any query that needs SwrCore stats */ > - break; > - } > - > - /* > - * All other results are collected from SwrCore counters > - */ > + /* > + * All other results are collected from SwrCore counters via > + * SwrGetStats. This returns immediately, but results are later filled > + * in by the backend. Fence status is the only indication of > + * completion. */ > + SwrGetStats(ctx->swrContext, &result->core); > + > + if (!pq->fence) { > + struct swr_screen *screen = swr_screen(pipe->screen); > + swr_fence_reference(pipe->screen, &pq->fence, screen- > >flush_fence); > + } > + swr_fence_submit(ctx, pq->fence); > > - /* XXX, Should turn this into a fence callback and skip the stall */ > - SwrGetStats(ctx->swrContext, &swr_stats); > - /* SwrGetStats returns immediately, wait for collection */ > - SwrWaitForIdle(ctx->swrContext); > + /* Only change stat collection if there are no active queries */ > + if (ctx->active_queries == 0) > + SwrEnableStats(ctx->swrContext, enable_stats); > > - switch (pq->type) { > - case PIPE_QUERY_OCCLUSION_PREDICATE: > - case PIPE_QUERY_OCCLUSION_COUNTER: > - result->u64 = swr_stats.DepthPassCount; > - break; > - case PIPE_QUERY_PRIMITIVES_GENERATED: > - result->u64 = swr_stats.IaPrimitives; > - break; > - case PIPE_QUERY_PRIMITIVES_EMITTED: > - result->u64 = swr_stats.SoNumPrimsWritten[pq->index]; > - break; > - case PIPE_QUERY_SO_STATISTICS: > - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: { > - struct pipe_query_data_so_statistics *so_stats = > &result->so_statistics; > - so_stats->num_primitives_written = > - swr_stats.SoNumPrimsWritten[pq->index]; > - so_stats->primitives_storage_needed = > - swr_stats.SoPrimStorageNeeded[pq->index]; > - } break; > - case PIPE_QUERY_PIPELINE_STATISTICS: { > - struct pipe_query_data_pipeline_statistics *p_stats = > - &result->pipeline_statistics; > - p_stats->ia_vertices = swr_stats.IaVertices; > - p_stats->ia_primitives = swr_stats.IaPrimitives; > - p_stats->vs_invocations = swr_stats.VsInvocations; > - p_stats->gs_invocations = swr_stats.GsInvocations; > - p_stats->gs_primitives = swr_stats.GsPrimitives; > - p_stats->c_invocations = swr_stats.CPrimitives; > - p_stats->c_primitives = swr_stats.CPrimitives; > - p_stats->ps_invocations = swr_stats.PsInvocations; > - p_stats->hs_invocations = swr_stats.HsInvocations; > - p_stats->ds_invocations = swr_stats.DsInvocations; > - p_stats->cs_invocations = swr_stats.CsInvocations; > - } break; > - default: > - assert(0 && "Unsupported query"); > break; > } > - > - /* Only change stat collection if there are no active queries */ > - if (ctx->active_queries == 0) > - SwrEnableStats(ctx->swrContext, enable_stats); > } > > > @@ -176,16 +119,16 @@ swr_get_query_result(struct pipe_context *pipe, > boolean wait, > union pipe_query_result *result) > { > - struct swr_context *ctx = swr_context(pipe); > struct swr_query *pq = swr_query(q); > + struct swr_query_result *start = &pq->start; > + struct swr_query_result *end = &pq->end; > + unsigned index = pq->index; > > if (pq->fence) { > - if (!swr_is_fence_pending(pq->fence)) { > - swr_fence_submit(ctx, pq->fence); > - if (!wait) > - return FALSE; > - swr_fence_finish(pipe->screen, pq->fence, 0); > - } > + if (!wait && !swr_is_fence_done(pq->fence)) > + return FALSE; > + > + swr_fence_finish(pipe->screen, pq->fence, 0); > swr_fence_reference(pipe->screen, &pq->fence, NULL); > } > > @@ -194,62 +137,67 @@ swr_get_query_result(struct pipe_context *pipe, > switch (pq->type) { > /* Booleans */ > case PIPE_QUERY_OCCLUSION_PREDICATE: > - result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE; > + result->b = end->core.DepthPassCount != start->core.DepthPassCount; > break; > case PIPE_QUERY_GPU_FINISHED: > - result->b = pq->end.b; > + result->b = TRUE; > break; > /* Counters */ > case PIPE_QUERY_OCCLUSION_COUNTER: > + result->u64 = end->core.DepthPassCount - start->core.DepthPassCount; > + break; > case PIPE_QUERY_TIMESTAMP: > case PIPE_QUERY_TIME_ELAPSED: > + result->u64 = end->timestamp - start->timestamp; > + break; > case PIPE_QUERY_PRIMITIVES_GENERATED: > + result->u64 = end->core.IaPrimitives - start->core.IaPrimitives; > case PIPE_QUERY_PRIMITIVES_EMITTED: > - result->u64 = pq->end.u64 - pq->start.u64; > + result->u64 = end->core.SoNumPrimsWritten[index] > + - start->core.SoNumPrimsWritten[index]; > break; > /* Structures */ > case PIPE_QUERY_SO_STATISTICS: { > struct pipe_query_data_so_statistics *so_stats = > &result->so_statistics; > - struct pipe_query_data_so_statistics *start = &pq->start.so_statistics; > - struct pipe_query_data_so_statistics *end = &pq->end.so_statistics; > + struct SWR_STATS *start = &pq->start.core; > + struct SWR_STATS *end = &pq->end.core; > so_stats->num_primitives_written = > - end->num_primitives_written - start->num_primitives_written; > + end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index]; > so_stats->primitives_storage_needed = > - end->primitives_storage_needed - start->primitives_storage_needed; > + end->SoPrimStorageNeeded[index] - start- > >SoPrimStorageNeeded[index]; > } break; > - case PIPE_QUERY_TIMESTAMP_DISJOINT: { > + case PIPE_QUERY_TIMESTAMP_DISJOINT: > /* os_get_time_nano returns nanoseconds */ > result->timestamp_disjoint.frequency = UINT64_C(1000000000); > result->timestamp_disjoint.disjoint = FALSE; > - } break; > + break; > case PIPE_QUERY_PIPELINE_STATISTICS: { > struct pipe_query_data_pipeline_statistics *p_stats = > &result->pipeline_statistics; > - struct pipe_query_data_pipeline_statistics *start = > - &pq->start.pipeline_statistics; > - struct pipe_query_data_pipeline_statistics *end = > - &pq->end.pipeline_statistics; > - p_stats->ia_vertices = end->ia_vertices - start->ia_vertices; > - p_stats->ia_primitives = end->ia_primitives - start->ia_primitives; > - p_stats->vs_invocations = end->vs_invocations - start->vs_invocations; > - p_stats->gs_invocations = end->gs_invocations - start->gs_invocations; > - p_stats->gs_primitives = end->gs_primitives - start->gs_primitives; > - p_stats->c_invocations = end->c_invocations - start->c_invocations; > - p_stats->c_primitives = end->c_primitives - start->c_primitives; > - p_stats->ps_invocations = end->ps_invocations - start->ps_invocations; > - p_stats->hs_invocations = end->hs_invocations - start->hs_invocations; > - p_stats->ds_invocations = end->ds_invocations - start->ds_invocations; > - p_stats->cs_invocations = end->cs_invocations - start->cs_invocations; > - } break; > + struct SWR_STATS *start = &pq->start.core; > + struct SWR_STATS *end = &pq->end.core; > + p_stats->ia_vertices = end->IaVertices - start->IaVertices; > + p_stats->ia_primitives = end->IaPrimitives - start->IaPrimitives; > + p_stats->vs_invocations = end->VsInvocations - start->VsInvocations; > + p_stats->gs_invocations = end->GsInvocations - start->GsInvocations; > + p_stats->gs_primitives = end->GsPrimitives - start->GsPrimitives; > + p_stats->c_invocations = end->CPrimitives - start->CPrimitives; > + p_stats->c_primitives = end->CPrimitives - start->CPrimitives; > + p_stats->ps_invocations = end->PsInvocations - start->PsInvocations; > + p_stats->hs_invocations = end->HsInvocations - start->HsInvocations; > + p_stats->ds_invocations = end->DsInvocations - start->DsInvocations; > + p_stats->cs_invocations = end->CsInvocations - start->CsInvocations; > + } break; > case PIPE_QUERY_SO_OVERFLOW_PREDICATE: { > - struct pipe_query_data_so_statistics *start = &pq->start.so_statistics; > - struct pipe_query_data_so_statistics *end = &pq->end.so_statistics; > + struct SWR_STATS *start = &pq->start.core; > + struct SWR_STATS *end = &pq->end.core; > uint64_t num_primitives_written = > - end->num_primitives_written - start->num_primitives_written; > + end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index]; > uint64_t primitives_storage_needed = > - end->primitives_storage_needed - start->primitives_storage_needed; > + end->SoPrimStorageNeeded[index] - start- > >SoPrimStorageNeeded[index]; > result->b = num_primitives_written > primitives_storage_needed; > - } break; > + } > + break; > default: > assert(0 && "Unsupported query"); > break; > @@ -264,6 +212,8 @@ swr_begin_query(struct pipe_context *pipe, struct > pipe_query *q) > struct swr_context *ctx = swr_context(pipe); > struct swr_query *pq = swr_query(q); > > + assert(!pq->enable_stats && "swr_begin_query: Query is already > active!"); > + > /* Initialize Results */ > memset(&pq->start, 0, sizeof(pq->start)); > memset(&pq->end, 0, sizeof(pq->end)); > @@ -276,7 +226,7 @@ swr_begin_query(struct pipe_context *pipe, struct > pipe_query *q) > > /* override start timestamp to 0 for TIMESTAMP query */ > if (pq->type == PIPE_QUERY_TIMESTAMP) > - pq->start.u64 = 0; > + pq->start.timestamp = 0; > > return true; > } > diff --git a/src/gallium/drivers/swr/swr_query.h > b/src/gallium/drivers/swr/swr_query.h > index 836d07b..0ab034d 100644 > --- a/src/gallium/drivers/swr/swr_query.h > +++ b/src/gallium/drivers/swr/swr_query.h > @@ -27,13 +27,18 @@ > > #include <limits.h> > > +struct swr_query_result { > + SWR_STATS core; > + uint64_t timestamp; > +}; > + > struct swr_query { > unsigned type; /* PIPE_QUERY_* */ > unsigned index; > > - union pipe_query_result *result; > - union pipe_query_result start; > - union pipe_query_result end; > + struct swr_query_result *result; > + struct swr_query_result start; > + struct swr_query_result end; > > struct pipe_fence_handle *fence; > > -- > 1.7.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
