Re: [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional

2023-05-24 Thread Dixit, Ashutosh
On Wed, 24 May 2023 10:53:20 -0700, Tvrtko Ursulin wrote:
>

Hi Tvrtko,

> On 24/05/2023 18:38, Dixit, Ashutosh wrote:
> > On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
> >> On 23/05/2023 16:19, Ashutosh Dixit wrote:
> >>> No functional changes but we can remove some unsightly index computation
> >>> and read/write functions if we convert the PMU sample array from a
> >>> one-dimensional to a two-dimensional array.
> >>>
> >>> Suggested-by: Tvrtko Ursulin 
> >>> Signed-off-by: Ashutosh Dixit 
> >>> ---
> >>>drivers/gpu/drm/i915/i915_pmu.c | 60 ++---
> >>>drivers/gpu/drm/i915/i915_pmu.h |  2 +-
> >>>2 files changed, 19 insertions(+), 43 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c 
> >>> b/drivers/gpu/drm/i915/i915_pmu.c
> >>> index b47d890d4ada1..137e0df9573ee 100644
> >>> --- a/drivers/gpu/drm/i915/i915_pmu.c
> >>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> >>> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> >>>   return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> >>>}
> >>>-static unsigned int
> >>> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> >>> -{
> >>> - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> >>> -
> >>> - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> >>> -
> >>> - return idx;
> >>> -}
> >>> -
> >>> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int 
> >>> sample)
> >>> -{
> >>> - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> >>> -}
> >>> -
> >>> -static void
> >>> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 
> >>> val)
> >>> -{
> >>> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> >>> -}
> >>> -
> >>> -static void
> >>> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, 
> >>> u32 val, u32 mul)
> >>> -{
> >>> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, 
> >>> mul);
> >>> -}
> >>
> >> IMO read and store helpers could have stayed and just changed the
> >> implementation. Like add_sample_mult which you just moved. I would have
> >> been a smaller patch. So dunno.. a bit of a reluctant r-b.
> >
> > Are you referring just to add_sample_mult or to all the other functions
> > too? add_sample_mult I moved it to where it was before bc4be0a38b63
>
> Read and store helpers.
>
> > ("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
> > left it here I guess.
> >
> > The other read and store helpers are not needed with the 2-d array at all
> > since the compiler itself will do that, so I thought it was better to get
> > rid of them completely.
>
> Yes I get it, just that I didn't see the benefit of removing them.
>
> For example:
>
>  -store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
>  +pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
>
> It's a meh for me. Either flavour looks fine to me so I would have erred on
> the side of keeping the patch small. If anything I probably slightly prefer
> that the struct pmu_sample implementation was able to be changed with less
> churn before. For example. But a very minor argument really.

OK, I finally understood and have made this change in Patch v2. Please take
a look.

>
> Or maybe next step is get rid of the struct i915_pmu_sample. It is a struct
> because originally previous value was tracked too. Then I removed that and
> it was easier to keep the struct. I guess it can go now and then the
> removal of helpers here will look somewhat nicer without the trailing .cur
> on every affected line.

I have left this as is for now in case the i915_pmu_sample need to be
expanded again. Should be ok with the read/store helpers.

>
> > Let me know if you want any changes, otherwise I will leave as is.
>
> You can leave it as is, I dont' mind much.

I went ahead and changed it anyway since you seemed to want it.

Thanks.
--
Ashutosh

>
> >> Reviewed-by: Tvrtko Ursulin 
> >
> > Thanks for the review. Thanks Andrzej too :)
> > --
> > Ashutosh
> >
> >>> -
> >>>static u64 get_rc6(struct intel_gt *gt)
> >>>{
> >>>   struct drm_i915_private *i915 = gt->i915;
> >>> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> >>>   spin_lock_irqsave(>lock, flags);
> >>>   if (awake) {
> >>> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> >>> + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> >>>   } else {
> >>>   /*
> >>>* We think we are runtime suspended.
> >>> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> >>>* counter value.
> >>>*/
> >>>   val = ktime_since_raw(pmu->sleep_last[gt_id]);
> >>> - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> >>> + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> >>>   }
> >>>-  if (val < read_sample(pmu, gt_id, 
> >>> __I915_SAMPLE_RC6_LAST_REPORTED))
> >>> - val = 

[PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional

2023-05-24 Thread Ashutosh Dixit
No functional changes but we can remove some unsightly index computation
and read/write functions if we convert the PMU sample array from a
one-dimensional to a two-dimensional array.

v2: Retain read/store helpers (Tvrtko)

Suggested-by: Tvrtko Ursulin 
Reviewed-by: Andrzej Hajda 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_pmu.c | 16 +++-
 drivers/gpu/drm/i915/i915_pmu.h |  2 +-
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 09313cf9316b4..f96fe92dca4e4 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -191,31 +191,21 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
 }
 
-static unsigned int
-__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-   unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
-
-   GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
-
-   return idx;
-}
-
 static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
 {
-   return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
+   return pmu->sample[gt_id][sample].cur;
 }
 
 static void
 store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
 {
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
+   pmu->sample[gt_id][sample].cur = val;
 }
 
 static void
 add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, 
u32 mul)
 {
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, 
mul);
+   pmu->sample[gt_id][sample].cur += mul_u32_u32(val, mul);
 }
 
 static u64 get_rc6(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 33d80fbaab8bc..d20592e7db999 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -127,7 +127,7 @@ struct i915_pmu {
 * Only global counters are held here, while the per-engine ones are in
 * struct intel_engine_cs.
 */
-   struct i915_pmu_sample sample[I915_PMU_MAX_GTS * 
__I915_NUM_PMU_SAMPLERS];
+   struct i915_pmu_sample 
sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
/**
 * @sleep_last: Last time GT parked for RC6 estimation.
 */
-- 
2.38.0



Re: [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional

2023-05-24 Thread Tvrtko Ursulin



On 24/05/2023 18:38, Dixit, Ashutosh wrote:

On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:




Hi Tvrtko,


On 23/05/2023 16:19, Ashutosh Dixit wrote:

No functional changes but we can remove some unsightly index computation
and read/write functions if we convert the PMU sample array from a
one-dimensional to a two-dimensional array.

Suggested-by: Tvrtko Ursulin 
Signed-off-by: Ashutosh Dixit 
---
   drivers/gpu/drm/i915/i915_pmu.c | 60 ++---
   drivers/gpu/drm/i915/i915_pmu.h |  2 +-
   2 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index b47d890d4ada1..137e0df9573ee 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
   }
   -static unsigned int
-__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-   unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
-
-   GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
-
-   return idx;
-}
-
-static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-   return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
-}
-
-static void
-store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
-{
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
-}
-
-static void
-add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, 
u32 mul)
-{
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, 
mul);
-}


IMO read and store helpers could have stayed and just changed the
implementation. Like add_sample_mult which you just moved. I would have
been a smaller patch. So dunno.. a bit of a reluctant r-b.


Are you referring just to add_sample_mult or to all the other functions
too? add_sample_mult I moved it to where it was before bc4be0a38b63


Read and store helpers.


("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
left it here I guess.

The other read and store helpers are not needed with the 2-d array at all
since the compiler itself will do that, so I thought it was better to get
rid of them completely.


Yes I get it, just that I didn't see the benefit of removing them.

For example:

 -  store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
 +  pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;

It's a meh for me. Either flavour looks fine to me so I would have erred 
on the side of keeping the patch small. If anything I probably slightly 
prefer that the struct pmu_sample implementation was able to be changed 
with less churn before. For example. But a very minor argument really.


Or maybe next step is get rid of the struct i915_pmu_sample. It is a 
struct because originally previous value was tracked too. Then I removed 
that and it was easier to keep the struct. I guess it can go now and 
then the removal of helpers here will look somewhat nicer without the 
trailing .cur on every affected line.



Let me know if you want any changes, otherwise I will leave as is.


You can leave it as is, I dont' mind much.

Regards,

Tvrtko


Reviewed-by: Tvrtko Ursulin 


Thanks for the review. Thanks Andrzej too :)
--
Ashutosh


-
   static u64 get_rc6(struct intel_gt *gt)
   {
struct drm_i915_private *i915 = gt->i915;
@@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
spin_lock_irqsave(>lock, flags);
if (awake) {
-   store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
+   pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
} else {
/*
 * We think we are runtime suspended.
@@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
 * counter value.
 */
val = ktime_since_raw(pmu->sleep_last[gt_id]);
-   val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
+   val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
}
   -if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
-   val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
+   if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
+   val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
else
-   store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
+   pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
spin_unlock_irqrestore(>lock, flags);
   @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
u64 val = __get_rc6(gt);
   -store_sample(pmu, i, __I915_SAMPLE_RC6, val);
-   store_sample(pmu, i, 

Re: [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional

2023-05-24 Thread Dixit, Ashutosh
On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
>

Hi Tvrtko,

> On 23/05/2023 16:19, Ashutosh Dixit wrote:
> > No functional changes but we can remove some unsightly index computation
> > and read/write functions if we convert the PMU sample array from a
> > one-dimensional to a two-dimensional array.
> >
> > Suggested-by: Tvrtko Ursulin 
> > Signed-off-by: Ashutosh Dixit 
> > ---
> >   drivers/gpu/drm/i915/i915_pmu.c | 60 ++---
> >   drivers/gpu/drm/i915/i915_pmu.h |  2 +-
> >   2 files changed, 19 insertions(+), 43 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c 
> > b/drivers/gpu/drm/i915/i915_pmu.c
> > index b47d890d4ada1..137e0df9573ee 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> > return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> >   }
> >   -static unsigned int
> > -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> > -{
> > -   unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> > -
> > -   GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> > -
> > -   return idx;
> > -}
> > -
> > -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int 
> > sample)
> > -{
> > -   return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> > -}
> > -
> > -static void
> > -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> > -{
> > -   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> > -}
> > -
> > -static void
> > -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 
> > val, u32 mul)
> > -{
> > -   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, 
> > mul);
> > -}
>
> IMO read and store helpers could have stayed and just changed the
> implementation. Like add_sample_mult which you just moved. I would have
> been a smaller patch. So dunno.. a bit of a reluctant r-b.

Are you referring just to add_sample_mult or to all the other functions
too? add_sample_mult I moved it to where it was before bc4be0a38b63
("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
left it here I guess.

The other read and store helpers are not needed with the 2-d array at all
since the compiler itself will do that, so I thought it was better to get
rid of them completely.

Let me know if you want any changes, otherwise I will leave as is.

> Reviewed-by: Tvrtko Ursulin 

Thanks for the review. Thanks Andrzej too :)
--
Ashutosh

> > -
> >   static u64 get_rc6(struct intel_gt *gt)
> >   {
> > struct drm_i915_private *i915 = gt->i915;
> > @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> > spin_lock_irqsave(>lock, flags);
> > if (awake) {
> > -   store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> > +   pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> > } else {
> > /*
> >  * We think we are runtime suspended.
> > @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> >  * counter value.
> >  */
> > val = ktime_since_raw(pmu->sleep_last[gt_id]);
> > -   val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> > +   val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> > }
> >   - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> > -   val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> > +   if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> > +   val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
> > else
> > -   store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> > +   pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> > spin_unlock_irqrestore(>lock, flags);
> >   @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
> > with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> > u64 val = __get_rc6(gt);
> >   - store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> > -   store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> > -val);
> > +   pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> > +   pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = 
> > val;
> > pmu->sleep_last[i] = ktime_get_raw();
> > }
> > }
> > @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
> >   {
> > struct i915_pmu *pmu = >i915->pmu;
> >   - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> > +   pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
> > pmu->sleep_last[gt->info.id] = ktime_get_raw();
> >   }
> >   @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int
> > period_ns)
> > }
> >   }
> >   +static void
> > +add_sample_mult(struct 

Re: [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional

2023-05-24 Thread Tvrtko Ursulin



On 23/05/2023 16:19, Ashutosh Dixit wrote:

No functional changes but we can remove some unsightly index computation
and read/write functions if we convert the PMU sample array from a
one-dimensional to a two-dimensional array.

Suggested-by: Tvrtko Ursulin 
Signed-off-by: Ashutosh Dixit 
---
  drivers/gpu/drm/i915/i915_pmu.c | 60 ++---
  drivers/gpu/drm/i915/i915_pmu.h |  2 +-
  2 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index b47d890d4ada1..137e0df9573ee 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
  }
  
-static unsigned int

-__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-   unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
-
-   GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
-
-   return idx;
-}
-
-static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-   return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
-}
-
-static void
-store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
-{
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
-}
-
-static void
-add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, 
u32 mul)
-{
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, 
mul);
-}


IMO read and store helpers could have stayed and just changed the 
implementation. Like add_sample_mult which you just moved. I would have 
been a smaller patch. So dunno.. a bit of a reluctant r-b.


Reviewed-by: Tvrtko Ursulin 

Regards,

Tvrtko


-
  static u64 get_rc6(struct intel_gt *gt)
  {
struct drm_i915_private *i915 = gt->i915;
@@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
spin_lock_irqsave(>lock, flags);
  
  	if (awake) {

-   store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
+   pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
} else {
/*
 * We think we are runtime suspended.
@@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
 * counter value.
 */
val = ktime_since_raw(pmu->sleep_last[gt_id]);
-   val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
+   val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
}
  
-	if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))

-   val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
+   if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
+   val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
else
-   store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
+   pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
  
  	spin_unlock_irqrestore(>lock, flags);
  
@@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)

with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
u64 val = __get_rc6(gt);
  
-			store_sample(pmu, i, __I915_SAMPLE_RC6, val);

-   store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
-val);
+   pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
+   pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = 
val;
pmu->sleep_last[i] = ktime_get_raw();
}
}
@@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
  {
struct i915_pmu *pmu = >i915->pmu;
  
-	store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));

+   pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
pmu->sleep_last[gt->info.id] = ktime_get_raw();
  }
  
@@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)

}
  }
  
+static void

+add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
+{
+   sample->cur += mul_u32_u32(val, mul);
+}
+
  static bool
  frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
  {
@@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int 
period_ns)
if (!val)
val = intel_gpu_freq(rps, rps->cur_freq);
  
-		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,

+   add_sample_mult(>sample[gt_id][__I915_SAMPLE_FREQ_ACT],
val, period_ns / 1000);
}
  
  	if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {

-   add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
+   add_sample_mult(>sample[gt_id][__I915_SAMPLE_FREQ_REQ],
intel_rps_get_requested_frequency(rps),

Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional

2023-05-24 Thread Andrzej Hajda

On 23.05.2023 17:19, Ashutosh Dixit wrote:

No functional changes but we can remove some unsightly index computation
and read/write functions if we convert the PMU sample array from a
one-dimensional to a two-dimensional array.

Suggested-by: Tvrtko Ursulin 
Signed-off-by: Ashutosh Dixit 


Reviewed-by: Andrzej Hajda 

Regards
Andrzej


---
  drivers/gpu/drm/i915/i915_pmu.c | 60 ++---
  drivers/gpu/drm/i915/i915_pmu.h |  2 +-
  2 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index b47d890d4ada1..137e0df9573ee 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
  }
  
-static unsigned int

-__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-   unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
-
-   GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
-
-   return idx;
-}
-
-static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-   return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
-}
-
-static void
-store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
-{
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
-}
-
-static void
-add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, 
u32 mul)
-{
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, 
mul);
-}
-
  static u64 get_rc6(struct intel_gt *gt)
  {
struct drm_i915_private *i915 = gt->i915;
@@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
spin_lock_irqsave(>lock, flags);
  
  	if (awake) {

-   store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
+   pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
} else {
/*
 * We think we are runtime suspended.
@@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
 * counter value.
 */
val = ktime_since_raw(pmu->sleep_last[gt_id]);
-   val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
+   val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
}
  
-	if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))

-   val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
+   if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
+   val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
else
-   store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
+   pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
  
  	spin_unlock_irqrestore(>lock, flags);
  
@@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)

with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
u64 val = __get_rc6(gt);
  
-			store_sample(pmu, i, __I915_SAMPLE_RC6, val);

-   store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
-val);
+   pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
+   pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = 
val;
pmu->sleep_last[i] = ktime_get_raw();
}
}
@@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
  {
struct i915_pmu *pmu = >i915->pmu;
  
-	store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));

+   pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
pmu->sleep_last[gt->info.id] = ktime_get_raw();
  }
  
@@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)

}
  }
  
+static void

+add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
+{
+   sample->cur += mul_u32_u32(val, mul);
+}
+
  static bool
  frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
  {
@@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int 
period_ns)
if (!val)
val = intel_gpu_freq(rps, rps->cur_freq);
  
-		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,

+   add_sample_mult(>sample[gt_id][__I915_SAMPLE_FREQ_ACT],
val, period_ns / 1000);
}
  
  	if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {

-   add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
+   add_sample_mult(>sample[gt_id][__I915_SAMPLE_FREQ_REQ],
intel_rps_get_requested_frequency(rps),
period_ns / 1000);
}
@@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
switch (config) {
case 

[PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional

2023-05-23 Thread Ashutosh Dixit
No functional changes but we can remove some unsightly index computation
and read/write functions if we convert the PMU sample array from a
one-dimensional to a two-dimensional array.

Suggested-by: Tvrtko Ursulin 
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_pmu.c | 60 ++---
 drivers/gpu/drm/i915/i915_pmu.h |  2 +-
 2 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index b47d890d4ada1..137e0df9573ee 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
 }
 
-static unsigned int
-__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-   unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
-
-   GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
-
-   return idx;
-}
-
-static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-   return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
-}
-
-static void
-store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
-{
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
-}
-
-static void
-add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, 
u32 mul)
-{
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, 
mul);
-}
-
 static u64 get_rc6(struct intel_gt *gt)
 {
struct drm_i915_private *i915 = gt->i915;
@@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
spin_lock_irqsave(>lock, flags);
 
if (awake) {
-   store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
+   pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
} else {
/*
 * We think we are runtime suspended.
@@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
 * counter value.
 */
val = ktime_since_raw(pmu->sleep_last[gt_id]);
-   val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
+   val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
}
 
-   if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
-   val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
+   if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
+   val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
else
-   store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
+   pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
 
spin_unlock_irqrestore(>lock, flags);
 
@@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
u64 val = __get_rc6(gt);
 
-   store_sample(pmu, i, __I915_SAMPLE_RC6, val);
-   store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
-val);
+   pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
+   pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = 
val;
pmu->sleep_last[i] = ktime_get_raw();
}
}
@@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
 {
struct i915_pmu *pmu = >i915->pmu;
 
-   store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
+   pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
pmu->sleep_last[gt->info.id] = ktime_get_raw();
 }
 
@@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
}
 }
 
+static void
+add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
+{
+   sample->cur += mul_u32_u32(val, mul);
+}
+
 static bool
 frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
 {
@@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int 
period_ns)
if (!val)
val = intel_gpu_freq(rps, rps->cur_freq);
 
-   add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
+   add_sample_mult(>sample[gt_id][__I915_SAMPLE_FREQ_ACT],
val, period_ns / 1000);
}
 
if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
-   add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
+   add_sample_mult(>sample[gt_id][__I915_SAMPLE_FREQ_REQ],
intel_rps_get_requested_frequency(rps),
period_ns / 1000);
}
@@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
switch (config) {
case I915_PMU_ACTUAL_FREQUENCY:
val =
-