Re: [Mesa-dev] [PATCH 08/11] i965: perf: snapshot RPSTAT1 register

2018-04-03 Thread Lionel Landwerlin

On 03/04/18 19:20, Kenneth Graunke wrote:

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 98666759d75..7d5b44cf61d 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -227,6 +227,8 @@ brw_perf_query(struct gl_perf_query_object *o)
   
   #define MI_RPC_BO_SIZE  4096

   #define MI_RPC_BO_END_OFFSET_BYTES  (MI_RPC_BO_SIZE / 2)
+#define MI_FREQ_START_OFFSET_BYTES  (3072)
+#define MI_FREQ_END_OFFSET_BYTES(3076)

Why these?

That's where I store the RPSTAT copy (before/after the workload).

Yeah, but I mean...why 3072?  Is it some arbtirary number?


Hmm I don't remember exactly how I arrived to that number :)
I'll use something more sensible (like BO_SIZE - 4).




   
/**/
   
@@ -1150,6 +1152,9 @@ brw_begin_perf_query(struct gl_context *ctx,

 /* Take a starting OA counter snapshot. */
 brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
 obj->oa.begin_report_id);
+  brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1,
+   MI_FREQ_START_OFFSET_BYTES);
+
 ++brw->perfquery.n_active_oa_queries;
   
 /* No already-buffered samples can possibly be associated with this query

@@ -1233,6 +1238,8 @@ brw_end_perf_query(struct gl_context *ctx,
  */
 if (!obj->oa.results_accumulated) {
/* Take an ending OA counter snapshot. */
+ brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1,
+  MI_FREQ_END_OFFSET_BYTES);
brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo,
MI_RPC_BO_END_OFFSET_BYTES,
obj->oa.begin_report_id + 1);
@@ -1333,6 +1340,43 @@ brw_is_perf_query_ready(struct gl_context *ctx,
  return false;
   }
   
+static void

+read_gt_frequency(struct brw_context *brw,
+  struct brw_perf_query_object *obj)
+{
+   const struct gen_device_info *devinfo = >screen->devinfo;
+   uint32_t *start_reg = obj->oa.map + MI_FREQ_START_OFFSET_BYTES,
+  *end_reg = obj->oa.map + MI_FREQ_END_OFFSET_BYTES;
+
+   switch (devinfo->gen) {
+   case 7:
+   case 8:
+  obj->oa.gt_frequency[0] =
+ ((start_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
+  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;

You can just do:

GET_FIELD(start_reg[0], GEN6_RPSTAT1_CURR_GT_FREQ)

instead of shifting and masking.

I think your conversions may be wrong.  In particular, you don't handle
Gen9LP and Gen9 differently, while in the kernel, GT_PM_INTERVAL_TO_US
does:

Gen9 LP:  0.833 -> usec
Gen9+ non-LP: 1.33  -> usec
other:1.28  -> usec

#define INTERVAL_1_28_TO_US(interval)  (((interval) << 7) / 100)
#define INTERVAL_1_33_TO_US(interval)  (((interval) << 2) / 3)
#define INTERVAL_0_833_TO_US(interval) (((interval) * 5)  / 6)
#define GT_PM_INTERVAL_TO_US(dev_priv, interval) (INTEL_GEN(dev_priv) >= 9 ? \
 (IS_GEN9_LP(dev_priv) ? \
 INTERVAL_0_833_TO_US(interval) : \
 INTERVAL_1_33_TO_US(interval)) : \
 INTERVAL_1_28_TO_US(interval))

I could be mistaken, though.

Actually the kernel reads rpstat1 already and computes the frequency value.
I think the current code is equivalent to what the kernel does on big
cores & small cores >= gen9.

So...we need to multiply by 50, but don't need to convert to usec?
Otherwise I'm struggling to see how this is equivalent.


I think we read a number in frequency, not time interval :

 
https://cgit.freedesktop.org/drm-tip/tree/drivers/gpu/drm/i915/i915_debugfs.c#n1157
 https://cgit.freedesktop.org/drm-tip/tree/drivers/gpu/drm/i915/intel_pm.c#n9395




On cherryview/valleyview, we need to read another register to figure out
the multipliers...

So I'll just leave it out for those small cores gens for now.

That seems reasonable...



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/11] i965: perf: snapshot RPSTAT1 register

2018-04-03 Thread Kenneth Graunke
> >> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
> >> b/src/mesa/drivers/dri/i965/brw_performance_query.c
> >> index 98666759d75..7d5b44cf61d 100644
> >> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> >> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> >> @@ -227,6 +227,8 @@ brw_perf_query(struct gl_perf_query_object *o)
> >>   
> >>   #define MI_RPC_BO_SIZE  4096
> >>   #define MI_RPC_BO_END_OFFSET_BYTES  (MI_RPC_BO_SIZE / 2)
> >> +#define MI_FREQ_START_OFFSET_BYTES  (3072)
> >> +#define MI_FREQ_END_OFFSET_BYTES(3076)
> > Why these?
> 
> That's where I store the RPSTAT copy (before/after the workload).

Yeah, but I mean...why 3072?  Is it some arbtirary number?

> >>   
> >> /**/
> >>   
> >> @@ -1150,6 +1152,9 @@ brw_begin_perf_query(struct gl_context *ctx,
> >> /* Take a starting OA counter snapshot. */
> >> brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
> >> obj->oa.begin_report_id);
> >> +  brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1,
> >> +   MI_FREQ_START_OFFSET_BYTES);
> >> +
> >> ++brw->perfquery.n_active_oa_queries;
> >>   
> >> /* No already-buffered samples can possibly be associated with 
> >> this query
> >> @@ -1233,6 +1238,8 @@ brw_end_perf_query(struct gl_context *ctx,
> >>  */
> >> if (!obj->oa.results_accumulated) {
> >>/* Take an ending OA counter snapshot. */
> >> + brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1,
> >> +  MI_FREQ_END_OFFSET_BYTES);
> >>brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo,
> >>MI_RPC_BO_END_OFFSET_BYTES,
> >>obj->oa.begin_report_id + 
> >> 1);
> >> @@ -1333,6 +1340,43 @@ brw_is_perf_query_ready(struct gl_context *ctx,
> >>  return false;
> >>   }
> >>   
> >> +static void
> >> +read_gt_frequency(struct brw_context *brw,
> >> +  struct brw_perf_query_object *obj)
> >> +{
> >> +   const struct gen_device_info *devinfo = >screen->devinfo;
> >> +   uint32_t *start_reg = obj->oa.map + MI_FREQ_START_OFFSET_BYTES,
> >> +  *end_reg = obj->oa.map + MI_FREQ_END_OFFSET_BYTES;
> >> +
> >> +   switch (devinfo->gen) {
> >> +   case 7:
> >> +   case 8:
> >> +  obj->oa.gt_frequency[0] =
> >> + ((start_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
> >> +  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;
> > You can just do:
> >
> >GET_FIELD(start_reg[0], GEN6_RPSTAT1_CURR_GT_FREQ)
> >
> > instead of shifting and masking.
> >
> > I think your conversions may be wrong.  In particular, you don't handle
> > Gen9LP and Gen9 differently, while in the kernel, GT_PM_INTERVAL_TO_US
> > does:
> >
> >Gen9 LP:  0.833 -> usec
> >Gen9+ non-LP: 1.33  -> usec
> >other:1.28  -> usec
> >
> > #define INTERVAL_1_28_TO_US(interval)  (((interval) << 7) / 100)
> > #define INTERVAL_1_33_TO_US(interval)  (((interval) << 2) / 3)
> > #define INTERVAL_0_833_TO_US(interval) (((interval) * 5)  / 6)
> > #define GT_PM_INTERVAL_TO_US(dev_priv, interval) (INTEL_GEN(dev_priv) >= 9 
> > ? \
> > (IS_GEN9_LP(dev_priv) ? \
> > INTERVAL_0_833_TO_US(interval) : \
> > INTERVAL_1_33_TO_US(interval)) : \
> > INTERVAL_1_28_TO_US(interval))
> >
> > I could be mistaken, though.
> 
> Actually the kernel reads rpstat1 already and computes the frequency value.
> I think the current code is equivalent to what the kernel does on big 
> cores & small cores >= gen9.

So...we need to multiply by 50, but don't need to convert to usec?
Otherwise I'm struggling to see how this is equivalent.

> On cherryview/valleyview, we need to read another register to figure out 
> the multipliers...
> 
> So I'll just leave it out for those small cores gens for now.

That seems reasonable...


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/11] i965: perf: snapshot RPSTAT1 register

2018-04-03 Thread Lionel Landwerlin

On 23/03/18 18:00, Kenneth Graunke wrote:

On Thursday, March 8, 2018 7:42:53 AM PDT Lionel Landwerlin wrote:

This register contains the frequency of the GT, it's one of the value
GPA would like to have as part of their queries.

Signed-off-by: Lionel Landwerlin 
---
  src/mesa/drivers/dri/i965/brw_defines.h   | 10 +
  src/mesa/drivers/dri/i965/brw_performance_query.c | 45 +++
  src/mesa/drivers/dri/i965/brw_performance_query.h |  5 +++
  3 files changed, 60 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 8bf6f68b67c..ead44ebc5e8 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1656,6 +1656,16 @@ enum brw_pixel_shader_coverage_mask_mode {
  #define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */
  # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
  
+#define GEN6_RPSTAT1   0xA01C

+#define  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT   7
+#define  GEN6_RPSTAT1_CURR_GT_FREQ_MASKINTEL_MASK(13, 7)
+#define  GEN6_RPSTAT1_PREV_GT_FREQ_SHIFT   0
+#define  GEN6_RPSTAT1_PREV_GT_FREQ_MASKINTEL_MASK(6, 0)
+#define  GEN9_RPSTAT1_CURR_GT_FREQ_SHIFT   23
+#define  GEN9_RPSTAT1_CURR_GT_FREQ_MASKINTEL_MASK(31, 23)
+#define  GEN9_RPSTAT1_PREV_GT_FREQ_SHIFT   0
+#define  GEN9_RPSTAT1_PREV_GT_FREQ_MASKINTEL_MASK(8, 0)
+

I can confirm that Haswell->Broadwell use 13:7 and 6:0, while
Skylake and Cannonlake use 31:23 and 8:0.  They apparently call this
RPSTAT1 on Haswell and RP_STATUS0 on Gen8+.

These are the wrong masks for Sandybridge, so I would not call them
GEN6_*.  The kernel has code for Sandybridge if we wanted to handle it,
but it looks like we don't expose OA on Sandybridge anyway, so there's
likely little point.

Baytrail and Cherryview should both be excluded, as you have to read the
current frequency from the PUnit.  Broxton and all others should work.


Thanks, updating.




  #define SLICE_COMMON_ECO_CHICKEN1  0x731c /* Gen9+ */
  # define GLK_SCEC_BARRIER_MODE_GPGPU   (0 << 7)
  # define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7)
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 98666759d75..7d5b44cf61d 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -227,6 +227,8 @@ brw_perf_query(struct gl_perf_query_object *o)
  
  #define MI_RPC_BO_SIZE  4096

  #define MI_RPC_BO_END_OFFSET_BYTES  (MI_RPC_BO_SIZE / 2)
+#define MI_FREQ_START_OFFSET_BYTES  (3072)
+#define MI_FREQ_END_OFFSET_BYTES(3076)

Why these?


That's where I store the RPSTAT copy (before/after the workload).




  
/**/
  
@@ -1150,6 +1152,9 @@ brw_begin_perf_query(struct gl_context *ctx,

/* Take a starting OA counter snapshot. */
brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
obj->oa.begin_report_id);
+  brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1,
+   MI_FREQ_START_OFFSET_BYTES);
+
++brw->perfquery.n_active_oa_queries;
  
/* No already-buffered samples can possibly be associated with this query

@@ -1233,6 +1238,8 @@ brw_end_perf_query(struct gl_context *ctx,
 */
if (!obj->oa.results_accumulated) {
   /* Take an ending OA counter snapshot. */
+ brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1,
+  MI_FREQ_END_OFFSET_BYTES);
   brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo,
   MI_RPC_BO_END_OFFSET_BYTES,
   obj->oa.begin_report_id + 1);
@@ -1333,6 +1340,43 @@ brw_is_perf_query_ready(struct gl_context *ctx,
 return false;
  }
  
+static void

+read_gt_frequency(struct brw_context *brw,
+  struct brw_perf_query_object *obj)
+{
+   const struct gen_device_info *devinfo = >screen->devinfo;
+   uint32_t *start_reg = obj->oa.map + MI_FREQ_START_OFFSET_BYTES,
+  *end_reg = obj->oa.map + MI_FREQ_END_OFFSET_BYTES;
+
+   switch (devinfo->gen) {
+   case 7:
+   case 8:
+  obj->oa.gt_frequency[0] =
+ ((start_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
+  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;

You can just do:

   GET_FIELD(start_reg[0], GEN6_RPSTAT1_CURR_GT_FREQ)

instead of shifting and masking.

I think your conversions may be wrong.  In particular, you don't handle
Gen9LP and Gen9 differently, while in the kernel, GT_PM_INTERVAL_TO_US
does:

   Gen9 LP:  0.833 -> usec
   Gen9+ non-LP: 1.33  -> usec
   other:1.28  -> usec

#define INTERVAL_1_28_TO_US(interval)  (((interval) << 7) / 100)
#define INTERVAL_1_33_TO_US(interval)  

Re: [Mesa-dev] [PATCH 08/11] i965: perf: snapshot RPSTAT1 register

2018-03-23 Thread Kenneth Graunke
On Thursday, March 8, 2018 7:42:53 AM PDT Lionel Landwerlin wrote:
> This register contains the frequency of the GT, it's one of the value
> GPA would like to have as part of their queries.
> 
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h   | 10 +
>  src/mesa/drivers/dri/i965/brw_performance_query.c | 45 
> +++
>  src/mesa/drivers/dri/i965/brw_performance_query.h |  5 +++
>  3 files changed, 60 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index 8bf6f68b67c..ead44ebc5e8 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1656,6 +1656,16 @@ enum brw_pixel_shader_coverage_mask_mode {
>  #define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */
>  # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
>  
> +#define GEN6_RPSTAT1   0xA01C
> +#define  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT   7
> +#define  GEN6_RPSTAT1_CURR_GT_FREQ_MASKINTEL_MASK(13, 7)
> +#define  GEN6_RPSTAT1_PREV_GT_FREQ_SHIFT   0
> +#define  GEN6_RPSTAT1_PREV_GT_FREQ_MASKINTEL_MASK(6, 0)
> +#define  GEN9_RPSTAT1_CURR_GT_FREQ_SHIFT   23
> +#define  GEN9_RPSTAT1_CURR_GT_FREQ_MASKINTEL_MASK(31, 23)
> +#define  GEN9_RPSTAT1_PREV_GT_FREQ_SHIFT   0
> +#define  GEN9_RPSTAT1_PREV_GT_FREQ_MASKINTEL_MASK(8, 0)
> +

I can confirm that Haswell->Broadwell use 13:7 and 6:0, while
Skylake and Cannonlake use 31:23 and 8:0.  They apparently call this
RPSTAT1 on Haswell and RP_STATUS0 on Gen8+.

These are the wrong masks for Sandybridge, so I would not call them
GEN6_*.  The kernel has code for Sandybridge if we wanted to handle it,
but it looks like we don't expose OA on Sandybridge anyway, so there's
likely little point.

Baytrail and Cherryview should both be excluded, as you have to read the
current frequency from the PUnit.  Broxton and all others should work.

>  #define SLICE_COMMON_ECO_CHICKEN1  0x731c /* Gen9+ */
>  # define GLK_SCEC_BARRIER_MODE_GPGPU   (0 << 7)
>  # define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7)
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
> b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index 98666759d75..7d5b44cf61d 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -227,6 +227,8 @@ brw_perf_query(struct gl_perf_query_object *o)
>  
>  #define MI_RPC_BO_SIZE  4096
>  #define MI_RPC_BO_END_OFFSET_BYTES  (MI_RPC_BO_SIZE / 2)
> +#define MI_FREQ_START_OFFSET_BYTES  (3072)
> +#define MI_FREQ_END_OFFSET_BYTES(3076)

Why these?

>  
> /**/
>  
> @@ -1150,6 +1152,9 @@ brw_begin_perf_query(struct gl_context *ctx,
>/* Take a starting OA counter snapshot. */
>brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
>obj->oa.begin_report_id);
> +  brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1,
> +   MI_FREQ_START_OFFSET_BYTES);
> +
>++brw->perfquery.n_active_oa_queries;
>  
>/* No already-buffered samples can possibly be associated with this 
> query
> @@ -1233,6 +1238,8 @@ brw_end_perf_query(struct gl_context *ctx,
> */
>if (!obj->oa.results_accumulated) {
>   /* Take an ending OA counter snapshot. */
> + brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1,
> +  MI_FREQ_END_OFFSET_BYTES);
>   brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo,
>   MI_RPC_BO_END_OFFSET_BYTES,
>   obj->oa.begin_report_id + 1);
> @@ -1333,6 +1340,43 @@ brw_is_perf_query_ready(struct gl_context *ctx,
> return false;
>  }
>  
> +static void
> +read_gt_frequency(struct brw_context *brw,
> +  struct brw_perf_query_object *obj)
> +{
> +   const struct gen_device_info *devinfo = >screen->devinfo;
> +   uint32_t *start_reg = obj->oa.map + MI_FREQ_START_OFFSET_BYTES,
> +  *end_reg = obj->oa.map + MI_FREQ_END_OFFSET_BYTES;
> +
> +   switch (devinfo->gen) {
> +   case 7:
> +   case 8:
> +  obj->oa.gt_frequency[0] =
> + ((start_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
> +  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;

You can just do:

  GET_FIELD(start_reg[0], GEN6_RPSTAT1_CURR_GT_FREQ)

instead of shifting and masking.

I think your conversions may be wrong.  In particular, you don't handle
Gen9LP and Gen9 differently, while in the kernel, GT_PM_INTERVAL_TO_US
does:

  Gen9 LP:  0.833 -> usec
  Gen9+ non-LP: 1.33  -> usec
  other:1.28  -> usec

#define INTERVAL_1_28_TO_US(interval)  (((interval) << 7) / 100)
#define INTERVAL_1_33_TO_US(interval)  (((interval) << 

Re: [Mesa-dev] [PATCH 08/11] i965: perf: snapshot RPSTAT1 register

2018-03-08 Thread Lionel Landwerlin

On 08/03/18 17:00, Chris Wilson wrote:

Quoting Lionel Landwerlin (2018-03-08 15:42:53)

+static void
+read_gt_frequency(struct brw_context *brw,
+  struct brw_perf_query_object *obj)
+{
+   const struct gen_device_info *devinfo = >screen->devinfo;
+   uint32_t *start_reg = obj->oa.map + MI_FREQ_START_OFFSET_BYTES,
+  *end_reg = obj->oa.map + MI_FREQ_END_OFFSET_BYTES;
+
+   switch (devinfo->gen) {
+   case 7:
+   case 8:
+  obj->oa.gt_frequency[0] =
+ ((start_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
+  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;
+  obj->oa.gt_frequency[1] =
+ ((end_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
+  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;

I was just thinking this was the wrong frequency conversion for byt/bsw,
but then they don't have RPSTAT1 either. Is the OA only for big core?


I think you found a bug!

I can see in the internal documentation that CHV has RPSTAT1, except it 
doesn't have the fields we want.

So this needs to be dealt with/fixed.

On BXT I got sensible numbers from this register, so that leaves gen8 
based small cores in the unknown...

Documentation is again unhelpful...

Thanks,

-
Lionel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/11] i965: perf: snapshot RPSTAT1 register

2018-03-08 Thread Chris Wilson
Quoting Lionel Landwerlin (2018-03-08 15:42:53)
> +static void
> +read_gt_frequency(struct brw_context *brw,
> +  struct brw_perf_query_object *obj)
> +{
> +   const struct gen_device_info *devinfo = >screen->devinfo;
> +   uint32_t *start_reg = obj->oa.map + MI_FREQ_START_OFFSET_BYTES,
> +  *end_reg = obj->oa.map + MI_FREQ_END_OFFSET_BYTES;
> +
> +   switch (devinfo->gen) {
> +   case 7:
> +   case 8:
> +  obj->oa.gt_frequency[0] =
> + ((start_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
> +  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;
> +  obj->oa.gt_frequency[1] =
> + ((end_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
> +  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;

I was just thinking this was the wrong frequency conversion for byt/bsw,
but then they don't have RPSTAT1 either. Is the OA only for big core?
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev