Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-11-29 Thread Imre Deak
On Wed, Nov 29, 2017 at 10:59:27AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> It seems that the DMC likes to transition between the DC states a lot when
> there are no connected displays (no active power domains) during command
> submission.
> 
> This activity on DC states has a negative impact on the performance of the
> chip with huge latencies observed in the interrupt handlers and elsewhere.
> Simple tests like igt/gem_latency -n 0 are slowed down by a factor of
> eight.
> 
> Work around it by introducing a new power domain named,
> POWER_DOMAIN_GT_IRQ, associtated with the "DC off" power well, which is
> held for the duration of command submission activity.
> 
> v2:
>  * Add commit text as comment in i915_gem_mark_busy. (Chris Wilson)
>  * Protect macro body with braces. (Jani Nikula)
> 
> v3:
>  * Add dedicated power domain for clarity. (Chris, Imre)
>  * Commit message and comment text updates.
>  * Apply to all big-core GEN9 parts apart for Skylake which is pending DMC
>firmware release.
> 
> Signed-off-by: Tvrtko Ursulin 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
> Testcase: igt/gem_exec_nop/headless
> Cc: Imre Deak 
> Acked-by: Chris Wilson 
> Cc: Chris Wilson 
> Cc: Dmitry Rogozhkin 
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  5 +
>  drivers/gpu/drm/i915/i915_gem.c |  4 
>  drivers/gpu/drm/i915/i915_gem_request.c | 14 ++
>  drivers/gpu/drm/i915/intel_runtime_pm.c |  4 
>  4 files changed, 27 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bddd65839f60..17340aadc566 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -398,6 +398,7 @@ enum intel_display_power_domain {
>   POWER_DOMAIN_AUX_D,
>   POWER_DOMAIN_GMBUS,
>   POWER_DOMAIN_MODESET,
> + POWER_DOMAIN_GT_IRQ,
>   POWER_DOMAIN_INIT,
>  
>   POWER_DOMAIN_NUM,
> @@ -3285,6 +3286,10 @@ intel_info(const struct drm_i915_private *dev_priv)
>  #define GT_FREQUENCY_MULTIPLIER 50
>  #define GEN9_FREQ_SCALER 3
>  
> +#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
> + (HAS_CSR(dev_priv) && (dev_priv)->csr.dmc_payload && \
> + IS_GEN9_BC(dev_priv) && !IS_SKYLAKE(dev_priv))

BXT and GLK seems to be also affected so IS_GEN9().

> +
>  #include "i915_trace.h"
>  
>  static inline bool intel_vtd_active(void)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 354b0546a191..feec2a621120 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3381,7 +3381,11 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  
>   if (INTEL_GEN(dev_priv) >= 6)
>   gen6_rps_idle(dev_priv);
> +
>   intel_runtime_pm_put(dev_priv);
> +
> + if (NEEDS_CSR_GT_PERF_WA(dev_priv))
> + intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
>  out_unlock:
>   mutex_unlock(_priv->drm.struct_mutex);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
> b/drivers/gpu/drm/i915/i915_gem_request.c
> index a90bdd26571f..619377a05810 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -251,6 +251,20 @@ static void mark_busy(struct drm_i915_private *i915)
>  
>   GEM_BUG_ON(!i915->gt.active_requests);
>  
> + /*
> +  * It seems that the DMC likes to transition between the DC states a lot
> +  * when there are no connected displays (no active power domains) during
> +  * command submission.
> +  *
> +  * This activity has negative impact on the performance of the chip with
> +  * huge latencies observed in the interrupt handler and elsewhere.
> +  *
> +  * Work around it by grabbing a GT IRQ power domain whilst there is any
> +  * GT activity, preventing any DC state transitions.
> +  */
> + if (NEEDS_CSR_GT_PERF_WA(i915))
> + intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
> +
>   intel_runtime_pm_get_noresume(i915);
>   i915->gt.awake = true;
>  
> diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c 
> b/drivers/gpu/drm/i915/intel_runtime_pm.c
> index 8315499452dc..f491c7aaa096 100644
> --- a/drivers/gpu/drm/i915/intel_runtime_pm.c
> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
> @@ -130,6 +130,8 @@ intel_display_power_domain_str(enum 
> intel_display_power_domain domain)
>   return "INIT";
>   case POWER_DOMAIN_MODESET:
>   return "MODESET";
> + case POWER_DOMAIN_GT_IRQ:
> + return "GT_IRQ";
>   default:
>   MISSING_CASE(domain);
>   return "?";
> @@ -1705,6 +1707,7 @@ void intel_display_power_put(struct drm_i915_private 
> *dev_priv,
>   BIT_ULL(POWER_DOMAIN_INIT))
>  #define 

Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-11-29 Thread Imre Deak
On Wed, Nov 29, 2017 at 11:59:04AM +, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2017-11-29 11:53:41)
> > 
> > On 29/11/2017 11:40, Chris Wilson wrote:
> > > Quoting Tvrtko Ursulin (2017-11-29 11:34:27)
> > >>
> > >> On 29/11/2017 11:12, Daniel Vetter wrote:
> > >>> I think given that DMC is strongly recommended there shouldn't be a real
> > >>> cost with making this unconditional.
> > >>
> > >> I don't know, not liking it on the first go. But then again I have no
> > >> idea how much power would that waste for use cases where DMC fw is
> > >> deliberately not present.
> > >>
> > >> Perhaps it would be acceptable to mark GT busy during the async CSR
> > >> load. Chris, any thoughts?
> > > 
> > > It's tightly coupled to requests, adding in an external call seems
> > > troublesome.
> > > 
> > > What's the reason for depending on the CSR being loaded? The old fw is
> > > broke no matter what, it doesn't get any more broken by us holding a
> > > powerwell wakeref. I think we should go for simplicity and always take
> > > the powerwell along with the rpm?
> > 
> > It's the unknown, maybe only for me, on how much power always holding 
> > the power well would waste for use cases where DMC firmware has been 
> > deliberately removed.
> > 
> > If I understand correctly that the Daniel's and your proposal is to just 
> > got with HAS_CSR as the wa/ criteria, instead of fw loaded check.
> 
> If I am reading the powerwell code correctly, it already takes the dmc
> fw into account. I would transfer the problem to there :) i.e. we have
> an unconditional call from gt:mark_busy, gt:mark_idle and the powerwell
> code knows what needs to be done under the different circumstances.

Yes, a power well ref for all domains is held while the firmware is
being loaded, or the firmware fails to load. So taking it
unconditionally is ok (not counting the platform check due to the
corruption issue).

> -Chris
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-11-29 Thread Daniel Vetter
On Wed, Nov 29, 2017 at 11:34:27AM +, Tvrtko Ursulin wrote:
> 
> On 29/11/2017 11:12, Daniel Vetter wrote:
> > On Wed, Nov 29, 2017 at 10:59:27AM +, Tvrtko Ursulin wrote:
> > > From: Tvrtko Ursulin 
> > > 
> > > It seems that the DMC likes to transition between the DC states a lot when
> > > there are no connected displays (no active power domains) during command
> > > submission.
> > > 
> > > This activity on DC states has a negative impact on the performance of the
> > > chip with huge latencies observed in the interrupt handlers and elsewhere.
> > > Simple tests like igt/gem_latency -n 0 are slowed down by a factor of
> > > eight.
> > > 
> > > Work around it by introducing a new power domain named,
> > > POWER_DOMAIN_GT_IRQ, associtated with the "DC off" power well, which is
> > > held for the duration of command submission activity.
> > > 
> > > v2:
> > >   * Add commit text as comment in i915_gem_mark_busy. (Chris Wilson)
> > >   * Protect macro body with braces. (Jani Nikula)
> > > 
> > > v3:
> > >   * Add dedicated power domain for clarity. (Chris, Imre)
> > >   * Commit message and comment text updates.
> > >   * Apply to all big-core GEN9 parts apart for Skylake which is pending 
> > > DMC
> > > firmware release.
> > > 
> > > Signed-off-by: Tvrtko Ursulin 
> > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
> > > Testcase: igt/gem_exec_nop/headless
> > > Cc: Imre Deak 
> > > Acked-by: Chris Wilson 
> > > Cc: Chris Wilson 
> > > Cc: Dmitry Rogozhkin 
> > > ---
> > >   drivers/gpu/drm/i915/i915_drv.h |  5 +
> > >   drivers/gpu/drm/i915/i915_gem.c |  4 
> > >   drivers/gpu/drm/i915/i915_gem_request.c | 14 ++
> > >   drivers/gpu/drm/i915/intel_runtime_pm.c |  4 
> > >   4 files changed, 27 insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h 
> > > b/drivers/gpu/drm/i915/i915_drv.h
> > > index bddd65839f60..17340aadc566 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -398,6 +398,7 @@ enum intel_display_power_domain {
> > >   POWER_DOMAIN_AUX_D,
> > >   POWER_DOMAIN_GMBUS,
> > >   POWER_DOMAIN_MODESET,
> > > + POWER_DOMAIN_GT_IRQ,
> > >   POWER_DOMAIN_INIT,
> > >   POWER_DOMAIN_NUM,
> > > @@ -3285,6 +3286,10 @@ intel_info(const struct drm_i915_private *dev_priv)
> > >   #define GT_FREQUENCY_MULTIPLIER 50
> > >   #define GEN9_FREQ_SCALER 3
> > > +#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
> > > + (HAS_CSR(dev_priv) && (dev_priv)->csr.dmc_payload && \
> > 
> > Doesn't this check race with the async dmc loading? I.e. when you submit
> > something right at boot-up, before DMC is fully loaded, we might end up
> > with an unbalanced pm refcount.
> 
> Oh yeah, well spotted.
> 
> > I think given that DMC is strongly recommended there shouldn't be a real
> > cost with making this unconditional.
> 
> I don't know, not liking it on the first go. But then again I have no idea
> how much power would that waste for use cases where DMC fw is deliberately
> not present.
> 
> Perhaps it would be acceptable to mark GT busy during the async CSR load.
> Chris, any thoughts?

I only meant that we pm_put without pm_get (because when we would have
called pm_get dmc_payload == NULL and then != NULL when we reach the check
for pm_put).

The actual get/put vs. dmc loading should be protected already by the
async dmc load code holding pm references to prevent any havoc.
-Daniel

> 
> Regards,
> 
> Tvrtko
> 
> > With that changed:
> > 
> > Reviewed-by: Daniel Vetter 
> > 
> > > + IS_GEN9_BC(dev_priv) && !IS_SKYLAKE(dev_priv))
> > > +
> > >   #include "i915_trace.h"
> > >   static inline bool intel_vtd_active(void)
> > > diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> > > b/drivers/gpu/drm/i915/i915_gem.c
> > > index 354b0546a191..feec2a621120 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > @@ -3381,7 +3381,11 @@ i915_gem_idle_work_handler(struct work_struct 
> > > *work)
> > >   if (INTEL_GEN(dev_priv) >= 6)
> > >   gen6_rps_idle(dev_priv);
> > > +
> > >   intel_runtime_pm_put(dev_priv);
> > > +
> > > + if (NEEDS_CSR_GT_PERF_WA(dev_priv))
> > > + intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
> > >   out_unlock:
> > >   mutex_unlock(_priv->drm.struct_mutex);
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
> > > b/drivers/gpu/drm/i915/i915_gem_request.c
> > > index a90bdd26571f..619377a05810 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_request.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> > > @@ -251,6 +251,20 @@ static void mark_busy(struct drm_i915_private *i915)
> > >   GEM_BUG_ON(!i915->gt.active_requests);
> > > + /*
> > > +  * It 

Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-11-29 Thread Imre Deak
On Wed, Nov 29, 2017 at 11:10:58AM +, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2017-11-29 10:59:27)
> > From: Tvrtko Ursulin 
> > 
> > It seems that the DMC likes to transition between the DC states a lot when
> > there are no connected displays (no active power domains) during command
> > submission.
> > 
> > This activity on DC states has a negative impact on the performance of the
> > chip with huge latencies observed in the interrupt handlers and elsewhere.
> > Simple tests like igt/gem_latency -n 0 are slowed down by a factor of
> > eight.
> > 
> > Work around it by introducing a new power domain named,
> > POWER_DOMAIN_GT_IRQ, associtated with the "DC off" power well, which is
> > held for the duration of command submission activity.
> > 
> > v2:
> >  * Add commit text as comment in i915_gem_mark_busy. (Chris Wilson)
> >  * Protect macro body with braces. (Jani Nikula)
> > 
> > v3:
> >  * Add dedicated power domain for clarity. (Chris, Imre)
> >  * Commit message and comment text updates.
> >  * Apply to all big-core GEN9 parts apart for Skylake which is pending DMC
> >firmware release.
> > 
> > Signed-off-by: Tvrtko Ursulin 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
> > Testcase: igt/gem_exec_nop/headless
> > Cc: Imre Deak 
> > Acked-by: Chris Wilson 
> > Cc: Chris Wilson 
> > Cc: Dmitry Rogozhkin 
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h |  5 +
> >  drivers/gpu/drm/i915/i915_gem.c |  4 
> >  drivers/gpu/drm/i915/i915_gem_request.c | 14 ++
> >  drivers/gpu/drm/i915/intel_runtime_pm.c |  4 
> >  4 files changed, 27 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h 
> > b/drivers/gpu/drm/i915/i915_drv.h
> > index bddd65839f60..17340aadc566 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -398,6 +398,7 @@ enum intel_display_power_domain {
> > POWER_DOMAIN_AUX_D,
> > POWER_DOMAIN_GMBUS,
> > POWER_DOMAIN_MODESET,
> > +   POWER_DOMAIN_GT_IRQ,
> > POWER_DOMAIN_INIT,
> >  
> > POWER_DOMAIN_NUM,
> > @@ -3285,6 +3286,10 @@ intel_info(const struct drm_i915_private *dev_priv)
> >  #define GT_FREQUENCY_MULTIPLIER 50
> >  #define GEN9_FREQ_SCALER 3
> >  
> > +#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
> > +   (HAS_CSR(dev_priv) && (dev_priv)->csr.dmc_payload && \
> 
> We can't have a dmc_payload unless CSR, right?
> 
> > +   IS_GEN9_BC(dev_priv) && !IS_SKYLAKE(dev_priv))
> > +
> >  #include "i915_trace.h"
> >  
> >  static inline bool intel_vtd_active(void)
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> > b/drivers/gpu/drm/i915/i915_gem.c
> > index 354b0546a191..feec2a621120 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3381,7 +3381,11 @@ i915_gem_idle_work_handler(struct work_struct *work)
> >  
> > if (INTEL_GEN(dev_priv) >= 6)
> > gen6_rps_idle(dev_priv);
> > +
> > intel_runtime_pm_put(dev_priv);
> > +
> > +   if (NEEDS_CSR_GT_PERF_WA(dev_priv))
> > +   intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
> 
> Which is the outer wakeref? I expect runtime, being device-global, to be
> the outer wakeref, with display_power_put being the inner subset.

Yes, logically the runtime ref is the outer one. But
intel_display_power_get() takes a runtime ref too and both runtime and
display power references can be held for "a long time" and so can be
taken in both order.

> -Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-11-29 Thread Chris Wilson
Quoting Tvrtko Ursulin (2017-11-29 11:53:41)
> 
> On 29/11/2017 11:40, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2017-11-29 11:34:27)
> >>
> >> On 29/11/2017 11:12, Daniel Vetter wrote:
> >>> I think given that DMC is strongly recommended there shouldn't be a real
> >>> cost with making this unconditional.
> >>
> >> I don't know, not liking it on the first go. But then again I have no
> >> idea how much power would that waste for use cases where DMC fw is
> >> deliberately not present.
> >>
> >> Perhaps it would be acceptable to mark GT busy during the async CSR
> >> load. Chris, any thoughts?
> > 
> > It's tightly coupled to requests, adding in an external call seems
> > troublesome.
> > 
> > What's the reason for depending on the CSR being loaded? The old fw is
> > broke no matter what, it doesn't get any more broken by us holding a
> > powerwell wakeref. I think we should go for simplicity and always take
> > the powerwell along with the rpm?
> 
> It's the unknown, maybe only for me, on how much power always holding 
> the power well would waste for use cases where DMC firmware has been 
> deliberately removed.
> 
> If I understand correctly that the Daniel's and your proposal is to just 
> got with HAS_CSR as the wa/ criteria, instead of fw loaded check.

If I am reading the powerwell code correctly, it already takes the dmc
fw into account. I would transfer the problem to there :) i.e. we have
an unconditional call from gt:mark_busy, gt:mark_idle and the powerwell
code knows what needs to be done under the different circumstances.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-11-29 Thread Tvrtko Ursulin


On 29/11/2017 11:40, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2017-11-29 11:34:27)


On 29/11/2017 11:12, Daniel Vetter wrote:

I think given that DMC is strongly recommended there shouldn't be a real
cost with making this unconditional.


I don't know, not liking it on the first go. But then again I have no
idea how much power would that waste for use cases where DMC fw is
deliberately not present.

Perhaps it would be acceptable to mark GT busy during the async CSR
load. Chris, any thoughts?


It's tightly coupled to requests, adding in an external call seems
troublesome.

What's the reason for depending on the CSR being loaded? The old fw is
broke no matter what, it doesn't get any more broken by us holding a
powerwell wakeref. I think we should go for simplicity and always take
the powerwell along with the rpm?


It's the unknown, maybe only for me, on how much power always holding 
the power well would waste for use cases where DMC firmware has been 
deliberately removed.


If I understand correctly that the Daniel's and your proposal is to just 
got with HAS_CSR as the wa/ criteria, instead of fw loaded check.


Regards,

Tvrtko
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-11-29 Thread Chris Wilson
Quoting Tvrtko Ursulin (2017-11-29 11:34:27)
> 
> On 29/11/2017 11:12, Daniel Vetter wrote:
> > I think given that DMC is strongly recommended there shouldn't be a real
> > cost with making this unconditional.
> 
> I don't know, not liking it on the first go. But then again I have no 
> idea how much power would that waste for use cases where DMC fw is 
> deliberately not present.
> 
> Perhaps it would be acceptable to mark GT busy during the async CSR 
> load. Chris, any thoughts?

It's tightly coupled to requests, adding in an external call seems
troublesome.

What's the reason for depending on the CSR being loaded? The old fw is
broke no matter what, it doesn't get any more broken by us holding a
powerwell wakeref. I think we should go for simplicity and always take
the powerwell along with the rpm?
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-11-29 Thread Tvrtko Ursulin


On 29/11/2017 11:12, Daniel Vetter wrote:

On Wed, Nov 29, 2017 at 10:59:27AM +, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

It seems that the DMC likes to transition between the DC states a lot when
there are no connected displays (no active power domains) during command
submission.

This activity on DC states has a negative impact on the performance of the
chip with huge latencies observed in the interrupt handlers and elsewhere.
Simple tests like igt/gem_latency -n 0 are slowed down by a factor of
eight.

Work around it by introducing a new power domain named,
POWER_DOMAIN_GT_IRQ, associtated with the "DC off" power well, which is
held for the duration of command submission activity.

v2:
  * Add commit text as comment in i915_gem_mark_busy. (Chris Wilson)
  * Protect macro body with braces. (Jani Nikula)

v3:
  * Add dedicated power domain for clarity. (Chris, Imre)
  * Commit message and comment text updates.
  * Apply to all big-core GEN9 parts apart for Skylake which is pending DMC
firmware release.

Signed-off-by: Tvrtko Ursulin 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
Testcase: igt/gem_exec_nop/headless
Cc: Imre Deak 
Acked-by: Chris Wilson 
Cc: Chris Wilson 
Cc: Dmitry Rogozhkin 
---
  drivers/gpu/drm/i915/i915_drv.h |  5 +
  drivers/gpu/drm/i915/i915_gem.c |  4 
  drivers/gpu/drm/i915/i915_gem_request.c | 14 ++
  drivers/gpu/drm/i915/intel_runtime_pm.c |  4 
  4 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bddd65839f60..17340aadc566 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -398,6 +398,7 @@ enum intel_display_power_domain {
POWER_DOMAIN_AUX_D,
POWER_DOMAIN_GMBUS,
POWER_DOMAIN_MODESET,
+   POWER_DOMAIN_GT_IRQ,
POWER_DOMAIN_INIT,
  
  	POWER_DOMAIN_NUM,

@@ -3285,6 +3286,10 @@ intel_info(const struct drm_i915_private *dev_priv)
  #define GT_FREQUENCY_MULTIPLIER 50
  #define GEN9_FREQ_SCALER 3
  
+#define NEEDS_CSR_GT_PERF_WA(dev_priv) \

+   (HAS_CSR(dev_priv) && (dev_priv)->csr.dmc_payload && \


Doesn't this check race with the async dmc loading? I.e. when you submit
something right at boot-up, before DMC is fully loaded, we might end up
with an unbalanced pm refcount.


Oh yeah, well spotted.


I think given that DMC is strongly recommended there shouldn't be a real
cost with making this unconditional.


I don't know, not liking it on the first go. But then again I have no 
idea how much power would that waste for use cases where DMC fw is 
deliberately not present.


Perhaps it would be acceptable to mark GT busy during the async CSR 
load. Chris, any thoughts?


Regards,

Tvrtko


With that changed:

Reviewed-by: Daniel Vetter 


+   IS_GEN9_BC(dev_priv) && !IS_SKYLAKE(dev_priv))
+
  #include "i915_trace.h"
  
  static inline bool intel_vtd_active(void)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 354b0546a191..feec2a621120 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3381,7 +3381,11 @@ i915_gem_idle_work_handler(struct work_struct *work)
  
  	if (INTEL_GEN(dev_priv) >= 6)

gen6_rps_idle(dev_priv);
+
intel_runtime_pm_put(dev_priv);
+
+   if (NEEDS_CSR_GT_PERF_WA(dev_priv))
+   intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
  out_unlock:
mutex_unlock(_priv->drm.struct_mutex);
  
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c

index a90bdd26571f..619377a05810 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -251,6 +251,20 @@ static void mark_busy(struct drm_i915_private *i915)
  
  	GEM_BUG_ON(!i915->gt.active_requests);
  
+	/*

+* It seems that the DMC likes to transition between the DC states a lot
+* when there are no connected displays (no active power domains) during
+* command submission.
+*
+* This activity has negative impact on the performance of the chip with
+* huge latencies observed in the interrupt handler and elsewhere.
+*
+* Work around it by grabbing a GT IRQ power domain whilst there is any
+* GT activity, preventing any DC state transitions.
+*/
+   if (NEEDS_CSR_GT_PERF_WA(i915))
+   intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+
intel_runtime_pm_get_noresume(i915);
i915->gt.awake = true;
  
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c

index 8315499452dc..f491c7aaa096 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ 

Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-11-29 Thread Daniel Vetter
On Wed, Nov 29, 2017 at 10:59:27AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> It seems that the DMC likes to transition between the DC states a lot when
> there are no connected displays (no active power domains) during command
> submission.
> 
> This activity on DC states has a negative impact on the performance of the
> chip with huge latencies observed in the interrupt handlers and elsewhere.
> Simple tests like igt/gem_latency -n 0 are slowed down by a factor of
> eight.
> 
> Work around it by introducing a new power domain named,
> POWER_DOMAIN_GT_IRQ, associtated with the "DC off" power well, which is
> held for the duration of command submission activity.
> 
> v2:
>  * Add commit text as comment in i915_gem_mark_busy. (Chris Wilson)
>  * Protect macro body with braces. (Jani Nikula)
> 
> v3:
>  * Add dedicated power domain for clarity. (Chris, Imre)
>  * Commit message and comment text updates.
>  * Apply to all big-core GEN9 parts apart for Skylake which is pending DMC
>firmware release.
> 
> Signed-off-by: Tvrtko Ursulin 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
> Testcase: igt/gem_exec_nop/headless
> Cc: Imre Deak 
> Acked-by: Chris Wilson 
> Cc: Chris Wilson 
> Cc: Dmitry Rogozhkin 
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  5 +
>  drivers/gpu/drm/i915/i915_gem.c |  4 
>  drivers/gpu/drm/i915/i915_gem_request.c | 14 ++
>  drivers/gpu/drm/i915/intel_runtime_pm.c |  4 
>  4 files changed, 27 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bddd65839f60..17340aadc566 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -398,6 +398,7 @@ enum intel_display_power_domain {
>   POWER_DOMAIN_AUX_D,
>   POWER_DOMAIN_GMBUS,
>   POWER_DOMAIN_MODESET,
> + POWER_DOMAIN_GT_IRQ,
>   POWER_DOMAIN_INIT,
>  
>   POWER_DOMAIN_NUM,
> @@ -3285,6 +3286,10 @@ intel_info(const struct drm_i915_private *dev_priv)
>  #define GT_FREQUENCY_MULTIPLIER 50
>  #define GEN9_FREQ_SCALER 3
>  
> +#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
> + (HAS_CSR(dev_priv) && (dev_priv)->csr.dmc_payload && \

Doesn't this check race with the async dmc loading? I.e. when you submit
something right at boot-up, before DMC is fully loaded, we might end up
with an unbalanced pm refcount.

I think given that DMC is strongly recommended there shouldn't be a real
cost with making this unconditional.

With that changed:

Reviewed-by: Daniel Vetter 

> + IS_GEN9_BC(dev_priv) && !IS_SKYLAKE(dev_priv))
> +
>  #include "i915_trace.h"
>  
>  static inline bool intel_vtd_active(void)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 354b0546a191..feec2a621120 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3381,7 +3381,11 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  
>   if (INTEL_GEN(dev_priv) >= 6)
>   gen6_rps_idle(dev_priv);
> +
>   intel_runtime_pm_put(dev_priv);
> +
> + if (NEEDS_CSR_GT_PERF_WA(dev_priv))
> + intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
>  out_unlock:
>   mutex_unlock(_priv->drm.struct_mutex);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
> b/drivers/gpu/drm/i915/i915_gem_request.c
> index a90bdd26571f..619377a05810 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -251,6 +251,20 @@ static void mark_busy(struct drm_i915_private *i915)
>  
>   GEM_BUG_ON(!i915->gt.active_requests);
>  
> + /*
> +  * It seems that the DMC likes to transition between the DC states a lot
> +  * when there are no connected displays (no active power domains) during
> +  * command submission.
> +  *
> +  * This activity has negative impact on the performance of the chip with
> +  * huge latencies observed in the interrupt handler and elsewhere.
> +  *
> +  * Work around it by grabbing a GT IRQ power domain whilst there is any
> +  * GT activity, preventing any DC state transitions.
> +  */
> + if (NEEDS_CSR_GT_PERF_WA(i915))
> + intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
> +
>   intel_runtime_pm_get_noresume(i915);
>   i915->gt.awake = true;
>  
> diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c 
> b/drivers/gpu/drm/i915/intel_runtime_pm.c
> index 8315499452dc..f491c7aaa096 100644
> --- a/drivers/gpu/drm/i915/intel_runtime_pm.c
> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
> @@ -130,6 +130,8 @@ intel_display_power_domain_str(enum 
> intel_display_power_domain domain)
>   return "INIT";
>   case POWER_DOMAIN_MODESET:
>   return "MODESET";
> + 

Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-11-29 Thread Chris Wilson
Quoting Tvrtko Ursulin (2017-11-29 10:59:27)
> From: Tvrtko Ursulin 
> 
> It seems that the DMC likes to transition between the DC states a lot when
> there are no connected displays (no active power domains) during command
> submission.
> 
> This activity on DC states has a negative impact on the performance of the
> chip with huge latencies observed in the interrupt handlers and elsewhere.
> Simple tests like igt/gem_latency -n 0 are slowed down by a factor of
> eight.
> 
> Work around it by introducing a new power domain named,
> POWER_DOMAIN_GT_IRQ, associtated with the "DC off" power well, which is
> held for the duration of command submission activity.
> 
> v2:
>  * Add commit text as comment in i915_gem_mark_busy. (Chris Wilson)
>  * Protect macro body with braces. (Jani Nikula)
> 
> v3:
>  * Add dedicated power domain for clarity. (Chris, Imre)
>  * Commit message and comment text updates.
>  * Apply to all big-core GEN9 parts apart for Skylake which is pending DMC
>firmware release.
> 
> Signed-off-by: Tvrtko Ursulin 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
> Testcase: igt/gem_exec_nop/headless
> Cc: Imre Deak 
> Acked-by: Chris Wilson 
> Cc: Chris Wilson 
> Cc: Dmitry Rogozhkin 
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  5 +
>  drivers/gpu/drm/i915/i915_gem.c |  4 
>  drivers/gpu/drm/i915/i915_gem_request.c | 14 ++
>  drivers/gpu/drm/i915/intel_runtime_pm.c |  4 
>  4 files changed, 27 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bddd65839f60..17340aadc566 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -398,6 +398,7 @@ enum intel_display_power_domain {
> POWER_DOMAIN_AUX_D,
> POWER_DOMAIN_GMBUS,
> POWER_DOMAIN_MODESET,
> +   POWER_DOMAIN_GT_IRQ,
> POWER_DOMAIN_INIT,
>  
> POWER_DOMAIN_NUM,
> @@ -3285,6 +3286,10 @@ intel_info(const struct drm_i915_private *dev_priv)
>  #define GT_FREQUENCY_MULTIPLIER 50
>  #define GEN9_FREQ_SCALER 3
>  
> +#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
> +   (HAS_CSR(dev_priv) && (dev_priv)->csr.dmc_payload && \

We can't have a dmc_payload unless CSR, right?

> +   IS_GEN9_BC(dev_priv) && !IS_SKYLAKE(dev_priv))
> +
>  #include "i915_trace.h"
>  
>  static inline bool intel_vtd_active(void)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 354b0546a191..feec2a621120 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3381,7 +3381,11 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  
> if (INTEL_GEN(dev_priv) >= 6)
> gen6_rps_idle(dev_priv);
> +
> intel_runtime_pm_put(dev_priv);
> +
> +   if (NEEDS_CSR_GT_PERF_WA(dev_priv))
> +   intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);

Which is the outer wakeref? I expect runtime, being device-global, to be
the outer wakeref, with display_power_put being the inner subset.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-11-29 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

It seems that the DMC likes to transition between the DC states a lot when
there are no connected displays (no active power domains) during command
submission.

This activity on DC states has a negative impact on the performance of the
chip with huge latencies observed in the interrupt handlers and elsewhere.
Simple tests like igt/gem_latency -n 0 are slowed down by a factor of
eight.

Work around it by introducing a new power domain named,
POWER_DOMAIN_GT_IRQ, associtated with the "DC off" power well, which is
held for the duration of command submission activity.

v2:
 * Add commit text as comment in i915_gem_mark_busy. (Chris Wilson)
 * Protect macro body with braces. (Jani Nikula)

v3:
 * Add dedicated power domain for clarity. (Chris, Imre)
 * Commit message and comment text updates.
 * Apply to all big-core GEN9 parts apart for Skylake which is pending DMC
   firmware release.

Signed-off-by: Tvrtko Ursulin 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
Testcase: igt/gem_exec_nop/headless
Cc: Imre Deak 
Acked-by: Chris Wilson 
Cc: Chris Wilson 
Cc: Dmitry Rogozhkin 
---
 drivers/gpu/drm/i915/i915_drv.h |  5 +
 drivers/gpu/drm/i915/i915_gem.c |  4 
 drivers/gpu/drm/i915/i915_gem_request.c | 14 ++
 drivers/gpu/drm/i915/intel_runtime_pm.c |  4 
 4 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bddd65839f60..17340aadc566 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -398,6 +398,7 @@ enum intel_display_power_domain {
POWER_DOMAIN_AUX_D,
POWER_DOMAIN_GMBUS,
POWER_DOMAIN_MODESET,
+   POWER_DOMAIN_GT_IRQ,
POWER_DOMAIN_INIT,
 
POWER_DOMAIN_NUM,
@@ -3285,6 +3286,10 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define GT_FREQUENCY_MULTIPLIER 50
 #define GEN9_FREQ_SCALER 3
 
+#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
+   (HAS_CSR(dev_priv) && (dev_priv)->csr.dmc_payload && \
+   IS_GEN9_BC(dev_priv) && !IS_SKYLAKE(dev_priv))
+
 #include "i915_trace.h"
 
 static inline bool intel_vtd_active(void)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 354b0546a191..feec2a621120 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3381,7 +3381,11 @@ i915_gem_idle_work_handler(struct work_struct *work)
 
if (INTEL_GEN(dev_priv) >= 6)
gen6_rps_idle(dev_priv);
+
intel_runtime_pm_put(dev_priv);
+
+   if (NEEDS_CSR_GT_PERF_WA(dev_priv))
+   intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
 out_unlock:
mutex_unlock(_priv->drm.struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index a90bdd26571f..619377a05810 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -251,6 +251,20 @@ static void mark_busy(struct drm_i915_private *i915)
 
GEM_BUG_ON(!i915->gt.active_requests);
 
+   /*
+* It seems that the DMC likes to transition between the DC states a lot
+* when there are no connected displays (no active power domains) during
+* command submission.
+*
+* This activity has negative impact on the performance of the chip with
+* huge latencies observed in the interrupt handler and elsewhere.
+*
+* Work around it by grabbing a GT IRQ power domain whilst there is any
+* GT activity, preventing any DC state transitions.
+*/
+   if (NEEDS_CSR_GT_PERF_WA(i915))
+   intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+
intel_runtime_pm_get_noresume(i915);
i915->gt.awake = true;
 
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c 
b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 8315499452dc..f491c7aaa096 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -130,6 +130,8 @@ intel_display_power_domain_str(enum 
intel_display_power_domain domain)
return "INIT";
case POWER_DOMAIN_MODESET:
return "MODESET";
+   case POWER_DOMAIN_GT_IRQ:
+   return "GT_IRQ";
default:
MISSING_CASE(domain);
return "?";
@@ -1705,6 +1707,7 @@ void intel_display_power_put(struct drm_i915_private 
*dev_priv,
BIT_ULL(POWER_DOMAIN_INIT))
 #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS ( \
SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \
+   BIT_ULL(POWER_DOMAIN_GT_IRQ) |  \
BIT_ULL(POWER_DOMAIN_MODESET) | \
BIT_ULL(POWER_DOMAIN_AUX_A) |   \
BIT_ULL(POWER_DOMAIN_INIT))

Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-05-08 Thread Jani Nikula
On Fri, 05 May 2017, Tvrtko Ursulin  wrote:
> From: Tvrtko Ursulin 
>
> It seems that the DMC likes to transition between the DC states
> a lot when there are no connected displays (no active power
> domains) during simple command submission.
>
> This frantic activity on DC states has a terrible impact on the
> performance of the overall chip with huge latencies observed in
> the interrupt handlers and elsewhere. Simple tests like
> igt/gem_latency -n 0 are slowed down by a factor of eight.
>
> Work around it by grabbing a modeset display power domain whilst
> there is any GT activity. This seems to be effective in making
> the DMC keep its paws off the chip.
>
> On the other hand this may have a negative impact on the overall
> power budget of the chip and so could still affect performance.
>
> This version limits the workaround got SKL GT3 and GT4 parts but
> this is just due the absence of testing on other platforms. It
> is possible we will have to apply it wider.
>
> Signed-off-by: Tvrtko Ursulin 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
> Testcase: igt/gem_exec_nop/headless
> Cc: Imre Deak 
> ---
>  drivers/gpu/drm/i915/i915_drv.h | 5 +
>  drivers/gpu/drm/i915/i915_gem.c | 4 
>  drivers/gpu/drm/i915/i915_gem_request.c | 3 +++
>  3 files changed, 12 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 320c16df1c9c..4d58e2e28c2f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2990,6 +2990,11 @@ intel_info(const struct drm_i915_private *dev_priv)
>  
>  #define HAS_DECOUPLED_MMIO(dev_priv) 
> (INTEL_INFO(dev_priv)->has_decoupled_mmio)
>  
> +#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
> + HAS_CSR(dev_priv) && \
> + (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) && \
> + (dev_priv)->csr.dmc_payload

Nitpick, the whole thing could use braces around it for consistency,
although I don't see any sane use of the macro causing precedence
surprises.

BR,
Jani.

> +
>  #include "i915_trace.h"
>  
>  static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private 
> *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b2727905ef2b..c52d863f409c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3200,7 +3200,11 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  
>   if (INTEL_GEN(dev_priv) >= 6)
>   gen6_rps_idle(dev_priv);
> +
>   intel_runtime_pm_put(dev_priv);
> +
> + if (NEEDS_CSR_GT_PERF_WA(dev_priv))
> + intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET);
>  out_unlock:
>   mutex_unlock(>struct_mutex);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
> b/drivers/gpu/drm/i915/i915_gem_request.c
> index 10361c7e3b37..10a3b51f6362 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -873,6 +873,9 @@ static void i915_gem_mark_busy(const struct 
> intel_engine_cs *engine)
>  
>   GEM_BUG_ON(!dev_priv->gt.active_requests);
>  
> + if (NEEDS_CSR_GT_PERF_WA(dev_priv))
> + intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
> +
>   intel_runtime_pm_get_noresume(dev_priv);
>   dev_priv->gt.awake = true;

-- 
Jani Nikula, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-05-05 Thread Ville Syrjälä
On Fri, May 05, 2017 at 05:13:58PM +0100, Tvrtko Ursulin wrote:
> 
> On 05/05/2017 15:49, Ville Syrjälä wrote:
> > On Fri, May 05, 2017 at 12:43:21PM +0100, Tvrtko Ursulin wrote:
> >> From: Tvrtko Ursulin 
> >>
> >> It seems that the DMC likes to transition between the DC states
> >> a lot when there are no connected displays (no active power
> >> domains) during simple command submission.
> >
> > Is it trapping on some interrupt register accesses or what? And
> > if so which registers are affected?
> 
> It looks like GT IIR or something along those lines it but I couldn't 
> say with total confidence.


for i in `seq 1 100` ; do IGT_NO_FORCEWAKE=1 intel_reg read  ; done


Should be a pretty trivial to run that against the suspect
registers.

> It is just a guess. Firmware binary 
> definitely "mentions" those registers as can be seen by inspecting it 
> with a hex editor.
> 
> The data I collected at least seems to present a correlation between the 
> batch frequency and DC state transition frequency:
> 
> tgt   DC  irqsirqs/s  irq batch/s DC/sDC/batch
> submittransitions /
> freq  batch
> 
> 1 2   78300   7830.00 1.964000.00 2000.00 0.50
> 9901  14000   52855   7550.71 1.325714.29 2000.00 0.35
> 9524  13500   49100   7328.36 1.235970.15 2014.93 0.34
> 9091  13500   49200   7235.29 1.235882.35 1985.29 0.34
> 5000  16900   33290   3916.47 0.834705.88 1988.24 0.42
>   27800   69550   4932.62 1.742836.88 1971.63 0.70
> 1667  57200   80200   2655.63 2.011324.50 1894.04 1.43
> 909   8   80034   1482.11 2.00740.74  1481.48 2.00
> 476   87000   80039   820.91  2.00410.26  892.31  2.18
> 196   16  80055   334.40  2.00167.08  668.34  4.00
> 
> Submitted batches were ~100us long in all cases. So with low batch 
> frequency it looks pretty believable. For example when we have 167.08 
> batches/s, we have 334.40 irq/s - which is double - as expected for 
> execlists. And we get again double that in terms of DC transitions per 
> second. Each irq is one GT IIR write from the GPU side, and another from 
> the CPU side.

GPU doesn't actually write the IIRs. It's just latching stuff from the
ISR. Whether the ISR edge or some higher level interrupt event actually
causes the DMC to kick into action isn't clear at all. My original
impressions was that it just traps the register accesses.

-- 
Ville Syrjälä
Intel OTC
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-05-05 Thread Tvrtko Ursulin


On 05/05/2017 15:49, Ville Syrjälä wrote:

On Fri, May 05, 2017 at 12:43:21PM +0100, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

It seems that the DMC likes to transition between the DC states
a lot when there are no connected displays (no active power
domains) during simple command submission.


Is it trapping on some interrupt register accesses or what? And
if so which registers are affected?


It looks like GT IIR or something along those lines it but I couldn't 
say with total confidence. It is just a guess. Firmware binary 
definitely "mentions" those registers as can be seen by inspecting it 
with a hex editor.


The data I collected at least seems to present a correlation between the 
batch frequency and DC state transition frequency:


tgt DC  irqsirqs/s  irq batch/s DC/sDC/batch
submit  transitions /
freqbatch

1   2   78300   7830.00 1.964000.00 2000.00 0.50
990114000   52855   7550.71 1.325714.29 2000.00 0.35
952413500   49100   7328.36 1.235970.15 2014.93 0.34
909113500   49200   7235.29 1.235882.35 1985.29 0.34
500016900   33290   3916.47 0.834705.88 1988.24 0.42
27800   69550   4932.62 1.742836.88 1971.63 0.70
166757200   80200   2655.63 2.011324.50 1894.04 1.43
909 8   80034   1482.11 2.00740.74  1481.48 2.00
476 87000   80039   820.91  2.00410.26  892.31  2.18
196 16  80055   334.40  2.00167.08  668.34  4.00

Submitted batches were ~100us long in all cases. So with low batch 
frequency it looks pretty believable. For example when we have 167.08 
batches/s, we have 334.40 irq/s - which is double - as expected for 
execlists. And we get again double that in terms of DC transitions per 
second. Each irq is one GT IIR write from the GPU side, and another from 
the CPU side.


This was actually Imre's suggestion btw. Before I was only looking at 
the irq/s to DC/s correlation which was confusing me a lot since there 
are more of the latter, so I thought it can't be the trigger. But once 
Imre mentioned the possibility that things are triggered by IIR register 
writes numbers started making more sense.


Then with higher batch frequencies the ratio starts falling, is it 
because DC transitions are too slow to keep up, or something else I am 
not sure.


Regards,

Tvrtko
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-05-05 Thread Ville Syrjälä
On Fri, May 05, 2017 at 12:43:21PM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> It seems that the DMC likes to transition between the DC states
> a lot when there are no connected displays (no active power
> domains) during simple command submission.

Is it trapping on some interrupt register accesses or what? And
if so which registers are affected?

> 
> This frantic activity on DC states has a terrible impact on the
> performance of the overall chip with huge latencies observed in
> the interrupt handlers and elsewhere. Simple tests like
> igt/gem_latency -n 0 are slowed down by a factor of eight.
> 
> Work around it by grabbing a modeset display power domain whilst
> there is any GT activity. This seems to be effective in making
> the DMC keep its paws off the chip.
> 
> On the other hand this may have a negative impact on the overall
> power budget of the chip and so could still affect performance.
> 
> This version limits the workaround got SKL GT3 and GT4 parts but
> this is just due the absence of testing on other platforms. It
> is possible we will have to apply it wider.
> 
> Signed-off-by: Tvrtko Ursulin 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
> Testcase: igt/gem_exec_nop/headless
> Cc: Imre Deak 
> ---
>  drivers/gpu/drm/i915/i915_drv.h | 5 +
>  drivers/gpu/drm/i915/i915_gem.c | 4 
>  drivers/gpu/drm/i915/i915_gem_request.c | 3 +++
>  3 files changed, 12 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 320c16df1c9c..4d58e2e28c2f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2990,6 +2990,11 @@ intel_info(const struct drm_i915_private *dev_priv)
>  
>  #define HAS_DECOUPLED_MMIO(dev_priv) 
> (INTEL_INFO(dev_priv)->has_decoupled_mmio)
>  
> +#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
> + HAS_CSR(dev_priv) && \
> + (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) && \
> + (dev_priv)->csr.dmc_payload
> +
>  #include "i915_trace.h"
>  
>  static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private 
> *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b2727905ef2b..c52d863f409c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3200,7 +3200,11 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  
>   if (INTEL_GEN(dev_priv) >= 6)
>   gen6_rps_idle(dev_priv);
> +
>   intel_runtime_pm_put(dev_priv);
> +
> + if (NEEDS_CSR_GT_PERF_WA(dev_priv))
> + intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET);
>  out_unlock:
>   mutex_unlock(>struct_mutex);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
> b/drivers/gpu/drm/i915/i915_gem_request.c
> index 10361c7e3b37..10a3b51f6362 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -873,6 +873,9 @@ static void i915_gem_mark_busy(const struct 
> intel_engine_cs *engine)
>  
>   GEM_BUG_ON(!dev_priv->gt.active_requests);
>  
> + if (NEEDS_CSR_GT_PERF_WA(dev_priv))
> + intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
> +
>   intel_runtime_pm_get_noresume(dev_priv);
>   dev_priv->gt.awake = true;
>  
> -- 
> 2.9.3
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-05-05 Thread Chris Wilson
On Fri, May 05, 2017 at 12:43:21PM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> It seems that the DMC likes to transition between the DC states
> a lot when there are no connected displays (no active power
> domains) during simple command submission.
> 
> This frantic activity on DC states has a terrible impact on the
> performance of the overall chip with huge latencies observed in
> the interrupt handlers and elsewhere. Simple tests like
> igt/gem_latency -n 0 are slowed down by a factor of eight.
> 
> Work around it by grabbing a modeset display power domain whilst
> there is any GT activity. This seems to be effective in making
> the DMC keep its paws off the chip.
> 
> On the other hand this may have a negative impact on the overall
> power budget of the chip and so could still affect performance.

Please add this as a comment to the code, I think in mark_busy(). I
don't think this w/a will remain applicable forever and so merits a
continual reminder and being discussed again in future.
 
> This version limits the workaround got SKL GT3 and GT4 parts but
> this is just due the absence of testing on other platforms. It
> is possible we will have to apply it wider.
> 
> Signed-off-by: Tvrtko Ursulin 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
> Testcase: igt/gem_exec_nop/headless
> Cc: Imre Deak 
> ---
>  drivers/gpu/drm/i915/i915_drv.h | 5 +
>  drivers/gpu/drm/i915/i915_gem.c | 4 
>  drivers/gpu/drm/i915/i915_gem_request.c | 3 +++
>  3 files changed, 12 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 320c16df1c9c..4d58e2e28c2f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2990,6 +2990,11 @@ intel_info(const struct drm_i915_private *dev_priv)
>  
>  #define HAS_DECOUPLED_MMIO(dev_priv) 
> (INTEL_INFO(dev_priv)->has_decoupled_mmio)
>  
> +#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
> + HAS_CSR(dev_priv) && \
> + (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) && \
> + (dev_priv)->csr.dmc_payload

csr.dmc_payload is a bit of a surprise, but looks correct.

Acked-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Restore GT performance in headless mode with DMC loaded

2017-05-05 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

It seems that the DMC likes to transition between the DC states
a lot when there are no connected displays (no active power
domains) during simple command submission.

This frantic activity on DC states has a terrible impact on the
performance of the overall chip with huge latencies observed in
the interrupt handlers and elsewhere. Simple tests like
igt/gem_latency -n 0 are slowed down by a factor of eight.

Work around it by grabbing a modeset display power domain whilst
there is any GT activity. This seems to be effective in making
the DMC keep its paws off the chip.

On the other hand this may have a negative impact on the overall
power budget of the chip and so could still affect performance.

This version limits the workaround got SKL GT3 and GT4 parts but
this is just due the absence of testing on other platforms. It
is possible we will have to apply it wider.

Signed-off-by: Tvrtko Ursulin 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
Testcase: igt/gem_exec_nop/headless
Cc: Imre Deak 
---
 drivers/gpu/drm/i915/i915_drv.h | 5 +
 drivers/gpu/drm/i915/i915_gem.c | 4 
 drivers/gpu/drm/i915/i915_gem_request.c | 3 +++
 3 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 320c16df1c9c..4d58e2e28c2f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2990,6 +2990,11 @@ intel_info(const struct drm_i915_private *dev_priv)
 
 #define HAS_DECOUPLED_MMIO(dev_priv) (INTEL_INFO(dev_priv)->has_decoupled_mmio)
 
+#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
+   HAS_CSR(dev_priv) && \
+   (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) && \
+   (dev_priv)->csr.dmc_payload
+
 #include "i915_trace.h"
 
 static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private 
*dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b2727905ef2b..c52d863f409c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3200,7 +3200,11 @@ i915_gem_idle_work_handler(struct work_struct *work)
 
if (INTEL_GEN(dev_priv) >= 6)
gen6_rps_idle(dev_priv);
+
intel_runtime_pm_put(dev_priv);
+
+   if (NEEDS_CSR_GT_PERF_WA(dev_priv))
+   intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET);
 out_unlock:
mutex_unlock(>struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 10361c7e3b37..10a3b51f6362 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -873,6 +873,9 @@ static void i915_gem_mark_busy(const struct intel_engine_cs 
*engine)
 
GEM_BUG_ON(!dev_priv->gt.active_requests);
 
+   if (NEEDS_CSR_GT_PERF_WA(dev_priv))
+   intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
+
intel_runtime_pm_get_noresume(dev_priv);
dev_priv->gt.awake = true;
 
-- 
2.9.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx