Re: [Intel-gfx] [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-09-12 Thread Tvrtko Ursulin


On 11/09/2019 17:38, Chris Wilson wrote:

As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

v2: Stick to using ktime_t
v3: Track user_wakerefs that interfere with the new
intel_gt_pm_wait_for_idle

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
  drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  19 
  drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
  drivers/gpu/drm/i915/i915_debugfs.c  |  22 ++---
  drivers/gpu/drm/i915/i915_pmu.c  | 120 +++
  drivers/gpu/drm/i915/i915_pmu.h  |   4 +-
  5 files changed, 90 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3bd764104d41..45a72cb698db 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -141,6 +141,21 @@ bool i915_gem_load_power_context(struct drm_i915_private 
*i915)
return switch_to_kernel_context_sync(>gt);
  }
  
+static void user_forcewake(struct intel_gt *gt, bool suspend)

+{
+   int count = atomic_read(>user_wakeref);
+
+   if (likely(!count))
+   return;
+
+   intel_gt_pm_get(gt);
+   if (suspend)
+   atomic_sub(count, >wakeref.count);
+   else
+   atomic_add(count, >wakeref.count);
+   intel_gt_pm_put(gt);
+}
+
  void i915_gem_suspend(struct drm_i915_private *i915)
  {
GEM_TRACE("\n");
@@ -148,6 +163,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
intel_wakeref_auto(>ggtt.userfault_wakeref, 0);
flush_workqueue(i915->wq);
  
+	user_forcewake(>gt, true);

+
mutex_lock(>drm.struct_mutex);
  
  	/*

@@ -259,6 +276,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
if (!i915_gem_load_power_context(i915))
goto err_wedged;
  
+	user_forcewake(>gt, false);

+
  out_unlock:
intel_uncore_forcewake_put(>uncore, FORCEWAKE_ALL);
mutex_unlock(>drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index dc295c196d11..3039cef64b11 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -50,6 +50,7 @@ struct intel_gt {
} timelines;
  
  	struct intel_wakeref wakeref;

+   atomic_t user_wakeref;
  
  	struct list_head closed_vma;

spinlock_t closed_lock; /* guards the list of closed_vma */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 708855e051b5..f00c0dc4f57e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3988,13 +3988,12 @@ static int i915_sseu_status(struct seq_file *m, void 
*unused)
  static int i915_forcewake_open(struct inode *inode, struct file *file)
  {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
  
-	if (INTEL_GEN(i915) < 6)

-   return 0;
-
-   file->private_data =
-   (void *)(uintptr_t)intel_runtime_pm_get(>runtime_pm);
-   intel_uncore_forcewake_user_get(>uncore);
+   atomic_inc(>user_wakeref);
+   intel_gt_pm_get(gt);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_get(gt->uncore);
  
  	return 0;

  }
@@ -4002,13 +4001,12 @@ static int i915_forcewake_open(struct inode *inode, 
struct file *file)
  static int i915_forcewake_release(struct inode *inode, struct file *file)
  {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
  
-	if (INTEL_GEN(i915) < 6)

-   return 0;
-
-   intel_uncore_forcewake_user_put(>uncore);
-   intel_runtime_pm_put(>runtime_pm,
-(intel_wakeref_t)(uintptr_t)file->private_data);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_put(>uncore);
+   intel_gt_pm_put(gt);
+   atomic_dec(>user_wakeref);
  
  	return 0;

  }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 8e251e719390..a3d3d3e39c15 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -116,19 +116,51 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool 
gpu_active)
return enable;
  }
  
+static u64 __get_rc6(struct intel_gt *gt)

+{
+   struct drm_i915_private *i915 = gt->i915;
+   u64 val;
+
+   val = intel_rc6_residency_ns(i915,
+IS_VALLEYVIEW(i915) ?
+VLV_GT_RENDER_RC6 :
+GEN6_GT_GFX_RC6);
+
+   if (HAS_RC6p(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+   if (HAS_RC6pp(i915))
+   val += 

Re: [Intel-gfx] [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-09-12 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-09-12 10:55:00)
> 
> On 12/09/2019 10:39, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-09-12 10:20:39)
> >> Don't we end up doing the irqsave spinlock needlessly when !CONFIG_PM?
> > 
> > No, the intent is to serialise with i915_pmu_gt_parked and
> > i915_pmu_gt_unparked (and the GT awake state), which are independent of
> > CONFIG_PM.
> 
> Yes but with !CONFIG_PM we can always read the real counters and don't 
> need to do any additional magic. In fact code in i915_pmu_gt_(un)parked 
> could be ifdef-ed out for that case as well.

Oh, you mean if we didn't have to worry about runtime-pm at all for
mmio access. I was not thinking of that at all, just balancing parked vs
sample.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-09-12 Thread Tvrtko Ursulin


On 12/09/2019 10:39, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2019-09-12 10:20:39)


On 11/09/2019 17:38, Chris Wilson wrote:

As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

v2: Stick to using ktime_t
v3: Track user_wakerefs that interfere with the new
intel_gt_pm_wait_for_idle

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
   drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  19 
   drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
   drivers/gpu/drm/i915/i915_debugfs.c  |  22 ++---
   drivers/gpu/drm/i915/i915_pmu.c  | 120 +++
   drivers/gpu/drm/i915/i915_pmu.h  |   4 +-
   5 files changed, 90 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3bd764104d41..45a72cb698db 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -141,6 +141,21 @@ bool i915_gem_load_power_context(struct drm_i915_private 
*i915)
   return switch_to_kernel_context_sync(>gt);
   }
   
+static void user_forcewake(struct intel_gt *gt, bool suspend)

+{
+ int count = atomic_read(>user_wakeref); >>> +
+ if (likely(!count))
+ return;
+
+ intel_gt_pm_get(gt);
+ if (suspend)
+ atomic_sub(count, >wakeref.count);


GEM_BUG_ON for underflow?

Presumably count is effectively atomic here since userspace is not 
running yet/any more. Might warrant a comment?



+ else
+ atomic_add(count, >wakeref.count);
+ intel_gt_pm_put(gt);
+}
+
   void i915_gem_suspend(struct drm_i915_private *i915)
   {
   GEM_TRACE("\n");
@@ -148,6 +163,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
   intel_wakeref_auto(>ggtt.userfault_wakeref, 0);
   flush_workqueue(i915->wq);
   
+ user_forcewake(>gt, true);


This complication is needed only because you changed user forcewake
handling to use intel_gt_pm_get/put instead of intel_runtime_pm_get?
Which in turn is because of the CONFIG_PM ifdef removal below? Wouldn't
it be simpler to keep both as were? Maybe I am missing something...


Not quite. The change is because we stop tracking rc6 after parking,
because we stop using the pm-timestamps in favour of our own gt
tracking. However, that required tying the debugfs into the gt pm in
order for us to notice the forced wakeup outside of the request flow.

Either we keep using the unreliable runtime-pm interactions, or not. The
patch hinges upon that decision. Or alternative, we say we just don't
care about miscounting with debugfs/i915_user_forcewake.


True, I think we have to account for it.


   static u64 get_rc6(struct intel_gt *gt)
   {
-#if IS_ENABLED(CONFIG_PM)
   struct drm_i915_private *i915 = gt->i915;
- struct intel_runtime_pm *rpm = >runtime_pm;
   struct i915_pmu *pmu = >pmu;
- intel_wakeref_t wakeref;
   unsigned long flags;
   u64 val;
   
- wakeref = intel_runtime_pm_get_if_in_use(rpm);

- if (wakeref) {
+ spin_lock_irqsave(>lock, flags);
+
+ if (intel_gt_pm_get_if_awake(gt)) {
   val = __get_rc6(gt);
- intel_runtime_pm_put(rpm, wakeref);
+ intel_gt_pm_put(gt);
   
   /*

* If we are coming back from being runtime suspended we must
@@ -466,19 +494,13 @@ static u64 get_rc6(struct intel_gt *gt)
* previously.
*/
   
- spin_lock_irqsave(>lock, flags);

-
   if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
   pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
   pmu->sample[__I915_SAMPLE_RC6].cur = val;
   } else {
   val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
   }
-
- spin_unlock_irqrestore(>lock, flags);


For this branch pmu->lock is only needed over the estimation block, not
over __get_rc6() and intel_gt_pm_get_if_awake(). But I agree it's more
efficient to do it like this to avoid multiple irq-off-on transitions
via intel_rc6_residency_ns. I wanted to suggest local_irq_disable and
separate spin_(un)lock's on both if branches for more self-documenting ,
less confusion, but then single call also has it's benefits.


If I am not mistaken, we need to serialise over the get_if_awake. Or at
least it makes it easier to argue about the GT state and whether we
need to choose between updating ESTIMATED or ACTUAL.


I am not sure that we do but anyway doesn't harm a lot and has the above 
described benefits as well so okay.




[snip]


Don't we end up doing the irqsave spinlock needlessly when !CONFIG_PM?


No, the intent is to serialise with i915_pmu_gt_parked and
i915_pmu_gt_unparked (and the GT awake state), which are 

Re: [Intel-gfx] [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-09-12 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-09-12 10:20:39)
> 
> On 11/09/2019 17:38, Chris Wilson wrote:
> > As we track when we put the GT device to sleep upon idling, we can use
> > that callback to sample the current rc6 counters and record the
> > timestamp for estimating samples after that point while asleep.
> > 
> > v2: Stick to using ktime_t
> > v3: Track user_wakerefs that interfere with the new
> > intel_gt_pm_wait_for_idle
> > 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
> > Signed-off-by: Chris Wilson 
> > Cc: Tvrtko Ursulin 
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  19 
> >   drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
> >   drivers/gpu/drm/i915/i915_debugfs.c  |  22 ++---
> >   drivers/gpu/drm/i915/i915_pmu.c  | 120 +++
> >   drivers/gpu/drm/i915/i915_pmu.h  |   4 +-
> >   5 files changed, 90 insertions(+), 76 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> > index 3bd764104d41..45a72cb698db 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> > @@ -141,6 +141,21 @@ bool i915_gem_load_power_context(struct 
> > drm_i915_private *i915)
> >   return switch_to_kernel_context_sync(>gt);
> >   }
> >   
> > +static void user_forcewake(struct intel_gt *gt, bool suspend)
> > +{
> > + int count = atomic_read(>user_wakeref);
> > +
> > + if (likely(!count))
> > + return;
> > +
> > + intel_gt_pm_get(gt);
> > + if (suspend)
> > + atomic_sub(count, >wakeref.count);
> > + else
> > + atomic_add(count, >wakeref.count);
> > + intel_gt_pm_put(gt);
> > +}
> > +
> >   void i915_gem_suspend(struct drm_i915_private *i915)
> >   {
> >   GEM_TRACE("\n");
> > @@ -148,6 +163,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
> >   intel_wakeref_auto(>ggtt.userfault_wakeref, 0);
> >   flush_workqueue(i915->wq);
> >   
> > + user_forcewake(>gt, true);
> 
> This complication is needed only because you changed user forcewake 
> handling to use intel_gt_pm_get/put instead of intel_runtime_pm_get? 
> Which in turn is because of the CONFIG_PM ifdef removal below? Wouldn't 
> it be simpler to keep both as were? Maybe I am missing something...

Not quite. The change is because we stop tracking rc6 after parking,
because we stop using the pm-timestamps in favour of our own gt
tracking. However, that required tying the debugfs into the gt pm in
order for us to notice the forced wakeup outside of the request flow.

Either we keep using the unreliable runtime-pm interactions, or not. The
patch hinges upon that decision. Or alternative, we say we just don't
care about miscounting with debugfs/i915_user_forcewake.

> >   static u64 get_rc6(struct intel_gt *gt)
> >   {
> > -#if IS_ENABLED(CONFIG_PM)
> >   struct drm_i915_private *i915 = gt->i915;
> > - struct intel_runtime_pm *rpm = >runtime_pm;
> >   struct i915_pmu *pmu = >pmu;
> > - intel_wakeref_t wakeref;
> >   unsigned long flags;
> >   u64 val;
> >   
> > - wakeref = intel_runtime_pm_get_if_in_use(rpm);
> > - if (wakeref) {
> > + spin_lock_irqsave(>lock, flags);
> > +
> > + if (intel_gt_pm_get_if_awake(gt)) {
> >   val = __get_rc6(gt);
> > - intel_runtime_pm_put(rpm, wakeref);
> > + intel_gt_pm_put(gt);
> >   
> >   /*
> >* If we are coming back from being runtime suspended we must
> > @@ -466,19 +494,13 @@ static u64 get_rc6(struct intel_gt *gt)
> >* previously.
> >*/
> >   
> > - spin_lock_irqsave(>lock, flags);
> > -
> >   if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
> >   pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
> >   pmu->sample[__I915_SAMPLE_RC6].cur = val;
> >   } else {
> >   val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
> >   }
> > -
> > - spin_unlock_irqrestore(>lock, flags);
> 
> For this branch pmu->lock is only needed over the estimation block, not 
> over __get_rc6() and intel_gt_pm_get_if_awake(). But I agree it's more 
> efficient to do it like this to avoid multiple irq-off-on transitions 
> via intel_rc6_residency_ns. I wanted to suggest local_irq_disable and 
> separate spin_(un)lock's on both if branches for more self-documenting , 
> less confusion, but then single call also has it's benefits.

If I am not mistaken, we need to serialise over the get_if_awake. Or at
least it makes it easier to argue about the GT state and whether we
need to choose between updating ESTIMATED or ACTUAL.

[snip]

> Don't we end up doing the irqsave spinlock needlessly when !CONFIG_PM?

No, the intent is to serialise with i915_pmu_gt_parked and
i915_pmu_gt_unparked (and the GT awake state), which are independent of

Re: [Intel-gfx] [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-09-12 Thread Tvrtko Ursulin


On 11/09/2019 17:38, Chris Wilson wrote:

As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

v2: Stick to using ktime_t
v3: Track user_wakerefs that interfere with the new
intel_gt_pm_wait_for_idle

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
  drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  19 
  drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
  drivers/gpu/drm/i915/i915_debugfs.c  |  22 ++---
  drivers/gpu/drm/i915/i915_pmu.c  | 120 +++
  drivers/gpu/drm/i915/i915_pmu.h  |   4 +-
  5 files changed, 90 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3bd764104d41..45a72cb698db 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -141,6 +141,21 @@ bool i915_gem_load_power_context(struct drm_i915_private 
*i915)
return switch_to_kernel_context_sync(>gt);
  }
  
+static void user_forcewake(struct intel_gt *gt, bool suspend)

+{
+   int count = atomic_read(>user_wakeref);
+
+   if (likely(!count))
+   return;
+
+   intel_gt_pm_get(gt);
+   if (suspend)
+   atomic_sub(count, >wakeref.count);
+   else
+   atomic_add(count, >wakeref.count);
+   intel_gt_pm_put(gt);
+}
+
  void i915_gem_suspend(struct drm_i915_private *i915)
  {
GEM_TRACE("\n");
@@ -148,6 +163,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
intel_wakeref_auto(>ggtt.userfault_wakeref, 0);
flush_workqueue(i915->wq);
  
+	user_forcewake(>gt, true);


This complication is needed only because you changed user forcewake 
handling to use intel_gt_pm_get/put instead of intel_runtime_pm_get? 
Which in turn is because of the CONFIG_PM ifdef removal below? Wouldn't 
it be simpler to keep both as were? Maybe I am missing something...



+
mutex_lock(>drm.struct_mutex);
  
  	/*

@@ -259,6 +276,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
if (!i915_gem_load_power_context(i915))
goto err_wedged;
  
+	user_forcewake(>gt, false);

+
  out_unlock:
intel_uncore_forcewake_put(>uncore, FORCEWAKE_ALL);
mutex_unlock(>drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index dc295c196d11..3039cef64b11 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -50,6 +50,7 @@ struct intel_gt {
} timelines;
  
  	struct intel_wakeref wakeref;

+   atomic_t user_wakeref;
  
  	struct list_head closed_vma;

spinlock_t closed_lock; /* guards the list of closed_vma */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 708855e051b5..f00c0dc4f57e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3988,13 +3988,12 @@ static int i915_sseu_status(struct seq_file *m, void 
*unused)
  static int i915_forcewake_open(struct inode *inode, struct file *file)
  {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
  
-	if (INTEL_GEN(i915) < 6)

-   return 0;
-
-   file->private_data =
-   (void *)(uintptr_t)intel_runtime_pm_get(>runtime_pm);
-   intel_uncore_forcewake_user_get(>uncore);
+   atomic_inc(>user_wakeref);
+   intel_gt_pm_get(gt);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_get(gt->uncore);
  
  	return 0;

  }
@@ -4002,13 +4001,12 @@ static int i915_forcewake_open(struct inode *inode, 
struct file *file)
  static int i915_forcewake_release(struct inode *inode, struct file *file)
  {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
  
-	if (INTEL_GEN(i915) < 6)

-   return 0;
-
-   intel_uncore_forcewake_user_put(>uncore);
-   intel_runtime_pm_put(>runtime_pm,
-(intel_wakeref_t)(uintptr_t)file->private_data);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_put(>uncore);
+   intel_gt_pm_put(gt);
+   atomic_dec(>user_wakeref);
  
  	return 0;

  }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 8e251e719390..a3d3d3e39c15 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -116,19 +116,51 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool 
gpu_active)
return enable;
  }
  
+static u64 __get_rc6(struct intel_gt *gt)

+{
+   struct drm_i915_private *i915 = gt->i915;
+   u64 val;
+
+   val = intel_rc6_residency_ns(i915,
+IS_VALLEYVIEW(i915) ?
+  

[Intel-gfx] [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-09-11 Thread Chris Wilson
As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

v2: Stick to using ktime_t
v3: Track user_wakerefs that interfere with the new
intel_gt_pm_wait_for_idle

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  19 
 drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
 drivers/gpu/drm/i915/i915_debugfs.c  |  22 ++---
 drivers/gpu/drm/i915/i915_pmu.c  | 120 +++
 drivers/gpu/drm/i915/i915_pmu.h  |   4 +-
 5 files changed, 90 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3bd764104d41..45a72cb698db 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -141,6 +141,21 @@ bool i915_gem_load_power_context(struct drm_i915_private 
*i915)
return switch_to_kernel_context_sync(>gt);
 }
 
+static void user_forcewake(struct intel_gt *gt, bool suspend)
+{
+   int count = atomic_read(>user_wakeref);
+
+   if (likely(!count))
+   return;
+
+   intel_gt_pm_get(gt);
+   if (suspend)
+   atomic_sub(count, >wakeref.count);
+   else
+   atomic_add(count, >wakeref.count);
+   intel_gt_pm_put(gt);
+}
+
 void i915_gem_suspend(struct drm_i915_private *i915)
 {
GEM_TRACE("\n");
@@ -148,6 +163,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
intel_wakeref_auto(>ggtt.userfault_wakeref, 0);
flush_workqueue(i915->wq);
 
+   user_forcewake(>gt, true);
+
mutex_lock(>drm.struct_mutex);
 
/*
@@ -259,6 +276,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
if (!i915_gem_load_power_context(i915))
goto err_wedged;
 
+   user_forcewake(>gt, false);
+
 out_unlock:
intel_uncore_forcewake_put(>uncore, FORCEWAKE_ALL);
mutex_unlock(>drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index dc295c196d11..3039cef64b11 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -50,6 +50,7 @@ struct intel_gt {
} timelines;
 
struct intel_wakeref wakeref;
+   atomic_t user_wakeref;
 
struct list_head closed_vma;
spinlock_t closed_lock; /* guards the list of closed_vma */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 708855e051b5..f00c0dc4f57e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3988,13 +3988,12 @@ static int i915_sseu_status(struct seq_file *m, void 
*unused)
 static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
 
-   if (INTEL_GEN(i915) < 6)
-   return 0;
-
-   file->private_data =
-   (void *)(uintptr_t)intel_runtime_pm_get(>runtime_pm);
-   intel_uncore_forcewake_user_get(>uncore);
+   atomic_inc(>user_wakeref);
+   intel_gt_pm_get(gt);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_get(gt->uncore);
 
return 0;
 }
@@ -4002,13 +4001,12 @@ static int i915_forcewake_open(struct inode *inode, 
struct file *file)
 static int i915_forcewake_release(struct inode *inode, struct file *file)
 {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
 
-   if (INTEL_GEN(i915) < 6)
-   return 0;
-
-   intel_uncore_forcewake_user_put(>uncore);
-   intel_runtime_pm_put(>runtime_pm,
-(intel_wakeref_t)(uintptr_t)file->private_data);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_put(>uncore);
+   intel_gt_pm_put(gt);
+   atomic_dec(>user_wakeref);
 
return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 8e251e719390..a3d3d3e39c15 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -116,19 +116,51 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool 
gpu_active)
return enable;
 }
 
+static u64 __get_rc6(struct intel_gt *gt)
+{
+   struct drm_i915_private *i915 = gt->i915;
+   u64 val;
+
+   val = intel_rc6_residency_ns(i915,
+IS_VALLEYVIEW(i915) ?
+VLV_GT_RENDER_RC6 :
+GEN6_GT_GFX_RC6);
+
+   if (HAS_RC6p(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+   if (HAS_RC6pp(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+   

[Intel-gfx] [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-08-03 Thread Chris Wilson
As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

v2: Stick to using ktime_t

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  21 +++--
 drivers/gpu/drm/i915/i915_pmu.c | 120 ++--
 drivers/gpu/drm/i915/i915_pmu.h |   4 +-
 3 files changed, 69 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 19f156a7f501..80da59b5a1b0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -39,6 +39,7 @@
 #include "display/intel_psr.h"
 
 #include "gem/i915_gem_context.h"
+#include "gt/intel_gt_pm.h"
 #include "gt/intel_reset.h"
 #include "gt/uc/intel_guc_submission.h"
 
@@ -4063,13 +4064,11 @@ static int i915_sseu_status(struct seq_file *m, void 
*unused)
 static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
 
-   if (INTEL_GEN(i915) < 6)
-   return 0;
-
-   file->private_data =
-   (void *)(uintptr_t)intel_runtime_pm_get(>runtime_pm);
-   intel_uncore_forcewake_user_get(>uncore);
+   intel_gt_pm_get(gt);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_get(gt->uncore);
 
return 0;
 }
@@ -4077,13 +4076,11 @@ static int i915_forcewake_open(struct inode *inode, 
struct file *file)
 static int i915_forcewake_release(struct inode *inode, struct file *file)
 {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
 
-   if (INTEL_GEN(i915) < 6)
-   return 0;
-
-   intel_uncore_forcewake_user_put(>uncore);
-   intel_runtime_pm_put(>runtime_pm,
-(intel_wakeref_t)(uintptr_t)file->private_data);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_put(>uncore);
+   intel_gt_pm_put(gt);
 
return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index c2e5f6d5c1e0..61d4fa99e413 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -115,19 +115,51 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool 
gpu_active)
return enable;
 }
 
+static u64 __get_rc6(struct intel_gt *gt)
+{
+   struct drm_i915_private *i915 = gt->i915;
+   u64 val;
+
+   val = intel_rc6_residency_ns(i915,
+IS_VALLEYVIEW(i915) ?
+VLV_GT_RENDER_RC6 :
+GEN6_GT_GFX_RC6);
+
+   if (HAS_RC6p(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+   if (HAS_RC6pp(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+   return val;
+}
+
 void i915_pmu_gt_parked(struct drm_i915_private *i915)
 {
struct i915_pmu *pmu = >pmu;
+   u64 val;
 
if (!pmu->base.event_init)
return;
 
spin_lock_irq(>lock);
+
+   val = 0;
+   if (pmu->sample[__I915_SAMPLE_RC6].cur)
+   val = __get_rc6(>gt);
+
+   if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+   pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+   pmu->sample[__I915_SAMPLE_RC6].cur = val;
+   }
+   pmu->sleep_last = ktime_get();
+
/*
 * Signal sampling timer to stop if only engine events are enabled and
 * GPU went idle.
 */
pmu->timer_enabled = pmu_needs_timer(pmu, false);
+
spin_unlock_irq(>lock);
 }
 
@@ -142,6 +174,11 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu 
*pmu)
}
 }
 
+static inline s64 ktime_since(const ktime_t kt)
+{
+   return ktime_to_ns(ktime_sub(ktime_get(), kt));
+}
+
 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 {
struct i915_pmu *pmu = >pmu;
@@ -150,10 +187,22 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
return;
 
spin_lock_irq(>lock);
+
/*
 * Re-enable sampling timer when GPU goes active.
 */
__i915_pmu_maybe_start_timer(pmu);
+
+   /* Estimate how long we slept and accumulate that into rc6 counters */
+   if (pmu->sample[__I915_SAMPLE_RC6].cur) {
+   u64 val;
+
+   val = ktime_since(pmu->sleep_last);
+   val += pmu->sample[__I915_SAMPLE_RC6].cur;
+
+   pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+   }
+
spin_unlock_irq(>lock);
 }
 
@@ -425,39 +474,18 @@ static int i915_pmu_event_init(struct perf_event *event)
return 0;
 }
 
-static u64 __get_rc6(struct intel_gt *gt)
-{
-   struct 

Re: [Intel-gfx] [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-02-12 Thread Chris Wilson
Quoting Chris Wilson (2019-02-12 22:39:46)
> As we track when we put the GT device to sleep upon idling, we can use
> that callback to sample the current rc6 counters and record the
> timestamp for estimating samples after that point while asleep.

Bah, the perf_pmu/rc6 test bypasses the GPU and pokes the forcewake bit
instead. This needs

@@ -1495,9 +1496,11 @@ test_rc6(int gem_fd, unsigned int flags)
igt_assert(fw >= 0);
usleep(1e3); /* wait for the rc6 cycle counter to stop ticking */

+   spin = igt_spin_batch_new(gem_fd, .engine = I915_EXEC_RENDER);
prev = pmu_read_single(fd);
usleep(duration_ns / 1000);
busy = pmu_read_single(fd);
+   igt_spin_batch_end(spin);

to paper over the difference.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-02-12 Thread Chris Wilson
As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_pmu.c | 83 -
 drivers/gpu/drm/i915/i915_pmu.h |  4 +-
 2 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 13d70b90dd0f..bb6b66624e64 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -110,17 +110,49 @@ static bool pmu_needs_timer(struct drm_i915_private 
*i915, bool gpu_active)
return enable;
 }
 
+static u64 __get_rc6(struct drm_i915_private *i915)
+{
+   u64 val;
+
+   val = intel_rc6_residency_ns(i915,
+IS_VALLEYVIEW(i915) ?
+VLV_GT_RENDER_RC6 :
+GEN6_GT_GFX_RC6);
+
+   if (HAS_RC6p(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+   if (HAS_RC6pp(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+   return val;
+}
+
 void i915_pmu_gt_parked(struct drm_i915_private *i915)
 {
+   u64 val;
+
if (!i915->pmu.base.event_init)
return;
 
+   val = 0;
+   if (i915->pmu.sample[__I915_SAMPLE_RC6].cur)
+   val = __get_rc6(i915);
+
spin_lock_irq(>pmu.lock);
+
+   if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+   i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
+   }
+   i915->pmu.sleep_timestamp = jiffies;
+
/*
 * Signal sampling timer to stop if only engine events are enabled and
 * GPU went idle.
 */
i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
+
spin_unlock_irq(>pmu.lock);
 }
 
@@ -141,10 +173,23 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
return;
 
spin_lock_irq(>pmu.lock);
+
/*
 * Re-enable sampling timer when GPU goes active.
 */
__i915_pmu_maybe_start_timer(i915);
+
+   /* Estimate how long we slept and accumulate that into rc6 counters */
+   if (i915->pmu.sample[__I915_SAMPLE_RC6].cur) {
+   u64 val;
+
+   val = jiffies - i915->pmu.sleep_timestamp;
+   val = jiffies_to_nsecs(val);
+   val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
+
+   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+   }
+
spin_unlock_irq(>pmu.lock);
 }
 
@@ -425,24 +470,6 @@ static int i915_pmu_event_init(struct perf_event *event)
return 0;
 }
 
-static u64 __get_rc6(struct drm_i915_private *i915)
-{
-   u64 val;
-
-   val = intel_rc6_residency_ns(i915,
-IS_VALLEYVIEW(i915) ?
-VLV_GT_RENDER_RC6 :
-GEN6_GT_GFX_RC6);
-
-   if (HAS_RC6p(i915))
-   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
-
-   if (HAS_RC6pp(i915))
-   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
-
-   return val;
-}
-
 static u64 get_rc6(struct drm_i915_private *i915)
 {
 #if IS_ENABLED(CONFIG_PM)
@@ -450,7 +477,9 @@ static u64 get_rc6(struct drm_i915_private *i915)
unsigned long flags;
u64 val;
 
-   wakeref = intel_runtime_pm_get_if_in_use(i915);
+   wakeref = 0;
+   if (READ_ONCE(i915->gt.awake))
+   wakeref = intel_runtime_pm_get_if_in_use(i915);
if (wakeref) {
val = __get_rc6(i915);
intel_runtime_pm_put(i915, wakeref);
@@ -472,9 +501,6 @@ static u64 get_rc6(struct drm_i915_private *i915)
 
spin_unlock_irqrestore(>pmu.lock, flags);
} else {
-   struct pci_dev *pdev = i915->drm.pdev;
-   struct device *kdev = >dev;
-
/*
 * We are runtime suspended.
 *
@@ -483,7 +509,6 @@ static u64 get_rc6(struct drm_i915_private *i915)
 * counter value.
 */
spin_lock_irqsave(>pmu.lock, flags);
-   spin_lock(>power.lock);
 
/*
 * After the above branch intel_runtime_pm_get_if_in_use failed
@@ -496,15 +521,8 @@ static u64 get_rc6(struct drm_i915_private *i915)
 * suspended and if not we cannot do better than report the last
 * known RC6 value.
 */
-   if (kdev->power.runtime_status == RPM_SUSPENDED) {
-   if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
-   i915->pmu.suspended_jiffies_last =
-