Hi Nicolai, Can you add R600_MAX_STREAM = 4 to make the code more readable?
Thanks, Marek On Thu, Jul 27, 2017 at 9:14 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > --- > src/gallium/drivers/radeon/r600_query.c | 96 > ++++++++++++++++++++++++--------- > 1 file changed, 71 insertions(+), 25 deletions(-) > > diff --git a/src/gallium/drivers/radeon/r600_query.c > b/src/gallium/drivers/radeon/r600_query.c > index 9f33bac..4c727d6 100644 > --- a/src/gallium/drivers/radeon/r600_query.c > +++ b/src/gallium/drivers/radeon/r600_query.c > @@ -648,6 +648,12 @@ static struct pipe_query *r600_query_hw_create(struct > r600_common_screen *rscree > query->num_cs_dw_end = 6; > query->stream = index; > break; > + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: > + /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ > + query->result_size = 32 * 4; > + query->num_cs_dw_begin = 6 * 4; > + query->num_cs_dw_end = 6 * 4; > + break; > case PIPE_QUERY_PIPELINE_STATISTICS: > /* 11 values on EG, 8 on R600. */ > query->result_size = (rscreen->chip_class >= EVERGREEN ? 11 : > 8) * 16; > @@ -696,9 +702,9 @@ static void r600_update_occlusion_query_state(struct > r600_common_context *rctx, > } > } > > -static unsigned event_type_for_stream(struct r600_query_hw *query) > +static unsigned event_type_for_stream(unsigned stream) > { > - switch (query->stream) { > + switch (stream) { > default: > case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS; > case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1; > @@ -707,6 +713,15 @@ static unsigned event_type_for_stream(struct > r600_query_hw *query) > } > } > > +static void emit_sample_streamout(struct radeon_winsys_cs *cs, uint64_t va, > + unsigned stream) > +{ > + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); > + radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | > EVENT_INDEX(3)); > + radeon_emit(cs, va); > + radeon_emit(cs, va >> 32); > +} > + > static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, > struct r600_query_hw *query, > struct r600_resource *buffer, > @@ -726,10 +741,11 @@ static void r600_query_hw_do_emit_start(struct > r600_common_context *ctx, > case PIPE_QUERY_PRIMITIVES_GENERATED: > case PIPE_QUERY_SO_STATISTICS: > case PIPE_QUERY_SO_OVERFLOW_PREDICATE: > - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); > - radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | > EVENT_INDEX(3)); > - radeon_emit(cs, va); > - radeon_emit(cs, va >> 32); > + emit_sample_streamout(cs, va, query->stream); > + break; > + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: > + for (unsigned stream = 0; stream < 4; ++stream) > + emit_sample_streamout(cs, va + 32 * stream, stream); > break; > case PIPE_QUERY_TIME_ELAPSED: > if (ctx->chip_class >= SI) { > @@ -821,11 +837,13 @@ static void r600_query_hw_do_emit_stop(struct > r600_common_context *ctx, > case PIPE_QUERY_PRIMITIVES_GENERATED: > case PIPE_QUERY_SO_STATISTICS: > case PIPE_QUERY_SO_OVERFLOW_PREDICATE: > - va += query->result_size/2; > - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); > - radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | > EVENT_INDEX(3)); > - radeon_emit(cs, va); > - radeon_emit(cs, va >> 32); > + va += 16; > + emit_sample_streamout(cs, va, query->stream); > + break; > + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: > + va += 16; > + for (unsigned stream = 0; stream < 4; ++stream) > + emit_sample_streamout(cs, va + 32 * stream, stream); > break; > case PIPE_QUERY_TIME_ELAPSED: > va += 8; > @@ -885,10 +903,29 @@ static void r600_query_hw_emit_stop(struct > r600_common_context *ctx, > r600_update_prims_generated_query_state(ctx, query->b.type, -1); > } > > +static void emit_set_predicate(struct r600_common_context *ctx, > + struct r600_resource *buf, uint64_t va, > + uint32_t op) > +{ > + struct radeon_winsys_cs *cs = ctx->gfx.cs; > + > + if (ctx->chip_class >= GFX9) { > + radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0)); > + radeon_emit(cs, op); > + radeon_emit(cs, va); > + radeon_emit(cs, va >> 32); > + } else { > + radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0)); > + radeon_emit(cs, va); > + radeon_emit(cs, op | ((va >> 32) & 0xFF)); > + } > + r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_READ, > + RADEON_PRIO_QUERY); > +} > + > static void r600_emit_query_predication(struct r600_common_context *ctx, > struct r600_atom *atom) > { > - struct radeon_winsys_cs *cs = ctx->gfx.cs; > struct r600_query_hw *query = (struct r600_query_hw > *)ctx->render_cond; > struct r600_query_buffer *qbuf; > uint32_t op; > @@ -907,6 +944,7 @@ static void r600_emit_query_predication(struct > r600_common_context *ctx, > op = PRED_OP(PREDICATION_OP_ZPASS); > break; > case PIPE_QUERY_SO_OVERFLOW_PREDICATE: > + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: > op = PRED_OP(PREDICATION_OP_PRIMCOUNT); > invert = !invert; > break; > @@ -931,22 +969,19 @@ static void r600_emit_query_predication(struct > r600_common_context *ctx, > while (results_base < qbuf->results_end) { > uint64_t va = va_base + results_base; > > - if (ctx->chip_class >= GFX9) { > - radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, > 0)); > - radeon_emit(cs, op); > - radeon_emit(cs, va); > - radeon_emit(cs, va >> 32); > + if (query->b.type == > PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { > + for (unsigned stream = 0; stream < 4; > ++stream) { > + emit_set_predicate(ctx, qbuf->buf, va > + 32 * stream, op); > + > + /* set CONTINUE bit for all packets > except the first */ > + op |= PREDICATION_CONTINUE; > + } > } else { > - radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, > 0)); > - radeon_emit(cs, va); > - radeon_emit(cs, op | ((va >> 32) & 0xFF)); > + emit_set_predicate(ctx, qbuf->buf, va, op); > + op |= PREDICATION_CONTINUE; > } > - r600_emit_reloc(ctx, &ctx->gfx, qbuf->buf, > RADEON_USAGE_READ, > - RADEON_PRIO_QUERY); > - results_base += query->result_size; > > - /* set CONTINUE bit for all packets except the first > */ > - op |= PREDICATION_CONTINUE; > + results_base += query->result_size; > } > } > } > @@ -1185,6 +1220,14 @@ static void r600_query_hw_add_result(struct > r600_common_screen *rscreen, > r600_query_read_result(buffer, 2, 6, true) != > r600_query_read_result(buffer, 0, 4, true); > break; > + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: > + for (unsigned stream = 0; stream < 4; ++stream) { > + result->b = result->b || > + r600_query_read_result(buffer, 2, 6, true) != > + r600_query_read_result(buffer, 0, 4, true); > + buffer = (char *)buffer + 32; > + } > + break; > case PIPE_QUERY_PIPELINE_STATISTICS: > if (rscreen->chip_class >= EVERGREEN) { > result->pipeline_statistics.ps_invocations += > @@ -1698,6 +1741,9 @@ static void r600_render_condition(struct pipe_context > *ctx, > if (query) { > for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) > atom->num_dw += (qbuf->results_end / > rquery->result_size) * 5; > + > + if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) > + atom->num_dw *= 4; > } > > rctx->set_atom_dirty(rctx, atom, query != NULL); > -- > 2.9.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev