Reviewed-by: Marek Olšák <marek.ol...@amd.com> Marek
On Sun, May 8, 2016 at 12:06 AM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > This is useful for shader-related counters, since they tend to quickly > exceed 32 bits. > --- > src/gallium/drivers/radeon/r600_perfcounter.c | 22 +++++++++++----------- > src/gallium/drivers/radeonsi/si_perfcounter.c | 13 ++++++++----- > 2 files changed, 19 insertions(+), 16 deletions(-) > > diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c > b/src/gallium/drivers/radeon/r600_perfcounter.c > index 9ab17d9..af9a692 100644 > --- a/src/gallium/drivers/radeon/r600_perfcounter.c > +++ b/src/gallium/drivers/radeon/r600_perfcounter.c > @@ -84,8 +84,8 @@ struct r600_pc_group { > > struct r600_pc_counter { > unsigned base; > - unsigned dwords; > - unsigned stride; > + unsigned qwords; > + unsigned stride; /* in uint64s */ > }; > > #define R600_PC_SHADERS_WINDOWING (1 << 31) > @@ -172,7 +172,7 @@ static void r600_pc_query_emit_stop(struct > r600_common_context *ctx, > pc->emit_read(ctx, block, > group->num_counters, > group->selectors, > buffer, va); > - va += 4 * group->num_counters; > + va += sizeof(uint64_t) * group->num_counters; > } while (group->instance < 0 && ++instance < > block->num_instances); > } while (++se < se_end); > } > @@ -194,15 +194,15 @@ static void r600_pc_query_add_result(struct > r600_common_context *ctx, > union pipe_query_result *result) > { > struct r600_query_pc *query = (struct r600_query_pc *)hwquery; > - uint32_t *results = buffer; > + uint64_t *results = buffer; > unsigned i, j; > > for (i = 0; i < query->num_counters; ++i) { > struct r600_pc_counter *counter = &query->counters[i]; > > - for (j = 0; j < counter->dwords; ++j) { > + for (j = 0; j < counter->qwords; ++j) { > uint32_t value = results[counter->base + j * > counter->stride]; > - result->batch[i].u32 += value; > + result->batch[i].u64 += value; > } > } > } > @@ -361,7 +361,7 @@ struct pipe_query *r600_create_batch_query(struct > pipe_context *ctx, > instances *= block->num_instances; > > group->result_base = i; > - query->b.result_size += 4 * instances * group->num_counters; > + query->b.result_size += sizeof(uint64_t) * instances * > group->num_counters; > i += instances * group->num_counters; > > pc->get_size(block, group->num_counters, group->selectors, > @@ -401,11 +401,11 @@ struct pipe_query *r600_create_batch_query(struct > pipe_context *ctx, > counter->base = group->result_base + j; > counter->stride = group->num_counters; > > - counter->dwords = 1; > + counter->qwords = 1; > if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0) > - counter->dwords = screen->info.max_se; > + counter->qwords = screen->info.max_se; > if (group->instance < 0) > - counter->dwords *= block->num_instances; > + counter->qwords *= block->num_instances; > } > > if (!r600_query_hw_init(rctx, &query->b)) > @@ -535,7 +535,7 @@ int r600_get_perfcounter_info(struct r600_common_screen > *screen, > info->name = block->selector_names + sub * > block->selector_name_stride; > info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index; > info->max_value.u64 = 0; > - info->type = PIPE_DRIVER_QUERY_TYPE_UINT; > + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; > info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE; > info->group_id = base_gid + sub / block->num_selectors; > info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; > diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c > b/src/gallium/drivers/radeonsi/si_perfcounter.c > index 04da197..96007a5 100644 > --- a/src/gallium/drivers/radeonsi/si_perfcounter.c > +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c > @@ -208,6 +208,7 @@ static struct si_pc_block_base cik_PA_SC = { > .layout = SI_PC_MULTI_ALTERNATE, > }; > > +/* According to docs, PA_SU counters are only 48 bits wide. */ > static struct si_pc_block_base cik_PA_SU = { > .name = "PA_SU", > .num_counters = 4, > @@ -651,24 +652,26 @@ static void si_pc_emit_read(struct r600_common_context > *ctx, > > radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); > radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | > - COPY_DATA_DST_SEL(COPY_DATA_MEM)); > + COPY_DATA_DST_SEL(COPY_DATA_MEM) | > + COPY_DATA_COUNT_SEL); /* 64 bits */ > radeon_emit(cs, reg >> 2); > radeon_emit(cs, 0); /* unused */ > radeon_emit(cs, va); > radeon_emit(cs, va >> 32); > - va += 4; > + va += sizeof(uint64_t); > reg += reg_delta; > } > } else { > for (idx = 0; idx < count; ++idx) { > radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); > radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | > - COPY_DATA_DST_SEL(COPY_DATA_MEM)); > + COPY_DATA_DST_SEL(COPY_DATA_MEM) | > + COPY_DATA_COUNT_SEL); > radeon_emit(cs, 0); /* immediate */ > - radeon_emit(cs, 0); /* unused */ > + radeon_emit(cs, 0); > radeon_emit(cs, va); > radeon_emit(cs, va >> 32); > - va += 4; > + va += sizeof(uint64_t); > } > } > } > -- > 2.7.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev