On Tuesday, April 3, 2018 7:48:10 AM PDT Lionel Landwerlin wrote: > This register contains the current/previous frequency of the GT, it's > one of the value GPA would like to have as part of their queries. > > v2: Don't use this register on baytrail/cherryview (Ken) > Use GET_FIELD() macro (Ken) > > Signed-off-by: Lionel Landwerlin <[email protected]> > --- > src/mesa/drivers/dri/i965/brw_defines.h | 12 ++++++ > src/mesa/drivers/dri/i965/brw_performance_query.c | 50 > +++++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_performance_query.h | 5 +++ > 3 files changed, 67 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 8bf6f68b67c..855f1c7d744 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -1656,6 +1656,18 @@ enum brw_pixel_shader_coverage_mask_mode { > #define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */ > # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4) > > +#define GEN7_RPSTAT1 0xA01C > +#define GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT 7 > +#define GEN7_RPSTAT1_CURR_GT_FREQ_MASK INTEL_MASK(13, 7) > +#define GEN7_RPSTAT1_PREV_GT_FREQ_SHIFT 0 > +#define GEN7_RPSTAT1_PREV_GT_FREQ_MASK INTEL_MASK(6, 0) > + > +#define GEN9_RPSTAT0 0xA01C > +#define GEN9_RPSTAT0_CURR_GT_FREQ_SHIFT 23 > +#define GEN9_RPSTAT0_CURR_GT_FREQ_MASK INTEL_MASK(31, 23) > +#define GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT 0 > +#define GEN9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0) > + > #define SLICE_COMMON_ECO_CHICKEN1 0x731c /* Gen9+ */ > # define GLK_SCEC_BARRIER_MODE_GPGPU (0 << 7) > # define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7) > diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c > b/src/mesa/drivers/dri/i965/brw_performance_query.c > index 44cac85c6e6..32cf96a333d 100644 > --- a/src/mesa/drivers/dri/i965/brw_performance_query.c > +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c > @@ -216,6 +216,8 @@ brw_perf_query(struct gl_perf_query_object *o) > > #define MI_RPC_BO_SIZE 4096 > #define MI_RPC_BO_END_OFFSET_BYTES (MI_RPC_BO_SIZE / 2) > +#define MI_FREQ_START_OFFSET_BYTES (3072) > +#define MI_FREQ_END_OFFSET_BYTES (3076) > > > /******************************************************************************/ > > @@ -946,6 +948,21 @@ close_perf(struct brw_context *brw) > } > } > > +static void > +capture_frequency_stat_register(struct brw_context *brw, > + struct brw_bo *bo, > + uint32_t bo_offset) > +{ > + const struct gen_device_info *devinfo = &brw->screen->devinfo; > + > + if (devinfo->gen >= 7 && devinfo->gen <= 8 && > + !devinfo->is_baytrail && !devinfo->is_cherryview) { > + brw_store_register_mem32(brw, bo, GEN7_RPSTAT1, bo_offset); > + } else if (devinfo->gen >= 9) { > + brw_store_register_mem32(brw, bo, GEN9_RPSTAT0, bo_offset); > + } > +} > + > /** > * Driver hook for glBeginPerfQueryINTEL(). > */ > @@ -1138,6 +1155,8 @@ brw_begin_perf_query(struct gl_context *ctx, > /* Take a starting OA counter snapshot. */ > brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0, > obj->oa.begin_report_id); > + capture_frequency_stat_register(brw, obj->oa.bo, > MI_FREQ_START_OFFSET_BYTES); > + > ++brw->perfquery.n_active_oa_queries; > > /* No already-buffered samples can possibly be associated with this > query > @@ -1221,6 +1240,7 @@ brw_end_perf_query(struct gl_context *ctx, > */ > if (!obj->oa.results_accumulated) { > /* Take an ending OA counter snapshot. */ > + capture_frequency_stat_register(brw, obj->oa.bo, > MI_FREQ_END_OFFSET_BYTES); > brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, > MI_RPC_BO_END_OFFSET_BYTES, > obj->oa.begin_report_id + 1); > @@ -1321,6 +1341,35 @@ brw_is_perf_query_ready(struct gl_context *ctx, > return false; > } > > +static void > +read_gt_frequency(struct brw_context *brw, > + struct brw_perf_query_object *obj) > +{ > + const struct gen_device_info *devinfo = &brw->screen->devinfo; > + uint32_t start = *((uint32_t *)(obj->oa.map + > MI_FREQ_START_OFFSET_BYTES)), > + end = *((uint32_t *)(obj->oa.map + MI_FREQ_END_OFFSET_BYTES)); > + > + switch (devinfo->gen) { > + case 7: > + case 8: > + obj->oa.gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) > * 50ULL; > + obj->oa.gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * > 50ULL; > + break; > + case 9: > + case 10: > + case 11: > + obj->oa.gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) > * 50ULL / 3ULL; > + obj->oa.gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * > 50ULL / 3ULL;
Thanks for the pointers to the other kernel code in your reply to v1. This looks right. One thing I noticed is that the kernel rounds to the closest, while this will truncate, but I don't think that's too crucial. Reviewed-by: Kenneth Graunke <[email protected]>
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
