Re: [Intel-gfx] [PATCH 01/10] drm/i915: Disable preemption and sleeping while using the punit sideband

2019-04-23 Thread Ville Syrjälä
On Fri, Apr 19, 2019 at 06:13:53PM +0100, Chris Wilson wrote:
> While we talk to the punit over its sideband, we need to prevent the cpu
> from sleeping in order to prevent a potential machine hang.
> 
> Note that by itself, it appears that pm_qos_update_request (via
> intel_idle) doesn't provide a sufficient barrier to ensure that all core
> are indeed awake (out of Cstate) and that the package is awake. To do so,
> we need to supplement the pm_qos with a manual ping on_each_cpu.
> 
> v2: Restrict the heavy-weight wakeup to just the ISOF_PORT_PUNIT, there
> is insufficient evidence to implicate a wider problem atm. Similarly,
> restrict the w/a to Valleyview, as Cherryview doesn't have an angry cadre
> of users.
> 
> The working theory, courtesy of Ville and Hans, is the issue lies within
> the power delivery and so is likely to be unit and board specific and
> occurs when both the unit/fw require extra power at the same time as the
> cpu package is changing its own power state.
> 
> References: https://bugzilla.kernel.org/show_bug.cgi?id=109051
> References: https://bugs.freedesktop.org/show_bug.cgi?id=102657
> References: https://bugzilla.kernel.org/show_bug.cgi?id=195255
> Signed-off-by: Chris Wilson 
> Cc: Mika Kuoppala 
> Cc: Hans de Goede 
> Cc: Ville Syrjälä 
> ---
>  drivers/gpu/drm/i915/i915_drv.c   |   6 +
>  drivers/gpu/drm/i915/i915_drv.h   |   1 +
>  drivers/gpu/drm/i915/intel_sideband.c | 203 +-
>  3 files changed, 139 insertions(+), 71 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 5e2ae2300454..9e657a0410c2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -884,6 +884,9 @@ static int i915_driver_init_early(struct drm_i915_private 
> *dev_priv)
>   mutex_init(_priv->backlight_lock);
>  
>   mutex_init(_priv->sb_lock);
> + pm_qos_add_request(_priv->sb_qos,
> +PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);
> +
>   mutex_init(_priv->av_mutex);
>   mutex_init(_priv->wm.wm_mutex);
>   mutex_init(_priv->pps_mutex);
> @@ -943,6 +946,9 @@ static void i915_driver_cleanup_early(struct 
> drm_i915_private *dev_priv)
>   i915_gem_cleanup_early(dev_priv);
>   i915_workqueues_cleanup(dev_priv);
>   i915_engines_cleanup(dev_priv);
> +
> + pm_qos_remove_request(_priv->sb_qos);
> + mutex_destroy(_priv->sb_lock);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 71612e7fc8bc..afb979ff416f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1561,6 +1561,7 @@ struct drm_i915_private {
>  
>   /* Sideband mailbox protection */
>   struct mutex sb_lock;
> + struct pm_qos_request sb_qos;

Should we call it punit_qos maybe?

I'm a bit sad to all the complexity for this, but no one has come up
with a way to really fix it so I guess we just have to swallow this.

Reviewed-by: Ville Syrjälä 

>  
>   /** Cached value of IMR to avoid reads in updating the bitfield */
>   union {
> diff --git a/drivers/gpu/drm/i915/intel_sideband.c 
> b/drivers/gpu/drm/i915/intel_sideband.c
> index 57de41b1f989..fc8913461622 100644
> --- a/drivers/gpu/drm/i915/intel_sideband.c
> +++ b/drivers/gpu/drm/i915/intel_sideband.c
> @@ -22,6 +22,8 @@
>   *
>   */
>  
> +#include 
> +
>  #include "i915_drv.h"
>  #include "intel_drv.h"
>  
> @@ -39,19 +41,50 @@
>  /* Private register write, double-word addressing, non-posted */
>  #define SB_CRWRDA_NP 0x07
>  
> -static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
> -u32 port, u32 opcode, u32 addr, u32 *val)
> +static void ping(void *info)
>  {
> - u32 cmd, be = 0xf, bar = 0;
> - bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP);
> +}
>  
> - cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) |
> - (port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) |
> - (bar << IOSF_BAR_SHIFT);
> +static void __vlv_punit_get(struct drm_i915_private *i915)
> +{
> + iosf_mbi_punit_acquire();
>  
> - WARN_ON(!mutex_is_locked(_priv->sb_lock));
> + /*
> +  * Prevent the cpu from sleeping while we use this sideband, otherwise
> +  * the punit may cause a machine hang. The issue appears to be isolated
> +  * with changing the power state of the CPU package while changing
> +  * the power state via the punit, and we have only observed it
> +  * reliably on 4-core Baytail systems suggesting the issue is in the
> +  * power delivery mechanism and likely to be be board/function
> +  * specific. Hence we presume the workaround needs only be applied
> +  * to the Valleyview P-unit and not all sideband communications.
> +  */
> + if (IS_VALLEYVIEW(i915)) {
> + pm_qos_update_request(>sb_qos, 0);
> + on_each_cpu(ping, NULL, 

[Intel-gfx] [PATCH 01/10] drm/i915: Disable preemption and sleeping while using the punit sideband

2019-04-19 Thread Chris Wilson
While we talk to the punit over its sideband, we need to prevent the cpu
from sleeping in order to prevent a potential machine hang.

Note that by itself, it appears that pm_qos_update_request (via
intel_idle) doesn't provide a sufficient barrier to ensure that all core
are indeed awake (out of Cstate) and that the package is awake. To do so,
we need to supplement the pm_qos with a manual ping on_each_cpu.

v2: Restrict the heavy-weight wakeup to just the ISOF_PORT_PUNIT, there
is insufficient evidence to implicate a wider problem atm. Similarly,
restrict the w/a to Valleyview, as Cherryview doesn't have an angry cadre
of users.

The working theory, courtesy of Ville and Hans, is the issue lies within
the power delivery and so is likely to be unit and board specific and
occurs when both the unit/fw require extra power at the same time as the
cpu package is changing its own power state.

References: https://bugzilla.kernel.org/show_bug.cgi?id=109051
References: https://bugs.freedesktop.org/show_bug.cgi?id=102657
References: https://bugzilla.kernel.org/show_bug.cgi?id=195255
Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Hans de Goede 
Cc: Ville Syrjälä 
---
 drivers/gpu/drm/i915/i915_drv.c   |   6 +
 drivers/gpu/drm/i915/i915_drv.h   |   1 +
 drivers/gpu/drm/i915/intel_sideband.c | 203 +-
 3 files changed, 139 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5e2ae2300454..9e657a0410c2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -884,6 +884,9 @@ static int i915_driver_init_early(struct drm_i915_private 
*dev_priv)
mutex_init(_priv->backlight_lock);
 
mutex_init(_priv->sb_lock);
+   pm_qos_add_request(_priv->sb_qos,
+  PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);
+
mutex_init(_priv->av_mutex);
mutex_init(_priv->wm.wm_mutex);
mutex_init(_priv->pps_mutex);
@@ -943,6 +946,9 @@ static void i915_driver_cleanup_early(struct 
drm_i915_private *dev_priv)
i915_gem_cleanup_early(dev_priv);
i915_workqueues_cleanup(dev_priv);
i915_engines_cleanup(dev_priv);
+
+   pm_qos_remove_request(_priv->sb_qos);
+   mutex_destroy(_priv->sb_lock);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 71612e7fc8bc..afb979ff416f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1561,6 +1561,7 @@ struct drm_i915_private {
 
/* Sideband mailbox protection */
struct mutex sb_lock;
+   struct pm_qos_request sb_qos;
 
/** Cached value of IMR to avoid reads in updating the bitfield */
union {
diff --git a/drivers/gpu/drm/i915/intel_sideband.c 
b/drivers/gpu/drm/i915/intel_sideband.c
index 57de41b1f989..fc8913461622 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -22,6 +22,8 @@
  *
  */
 
+#include 
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 
@@ -39,19 +41,50 @@
 /* Private register write, double-word addressing, non-posted */
 #define SB_CRWRDA_NP   0x07
 
-static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
-  u32 port, u32 opcode, u32 addr, u32 *val)
+static void ping(void *info)
 {
-   u32 cmd, be = 0xf, bar = 0;
-   bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP);
+}
 
-   cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) |
-   (port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) |
-   (bar << IOSF_BAR_SHIFT);
+static void __vlv_punit_get(struct drm_i915_private *i915)
+{
+   iosf_mbi_punit_acquire();
 
-   WARN_ON(!mutex_is_locked(_priv->sb_lock));
+   /*
+* Prevent the cpu from sleeping while we use this sideband, otherwise
+* the punit may cause a machine hang. The issue appears to be isolated
+* with changing the power state of the CPU package while changing
+* the power state via the punit, and we have only observed it
+* reliably on 4-core Baytail systems suggesting the issue is in the
+* power delivery mechanism and likely to be be board/function
+* specific. Hence we presume the workaround needs only be applied
+* to the Valleyview P-unit and not all sideband communications.
+*/
+   if (IS_VALLEYVIEW(i915)) {
+   pm_qos_update_request(>sb_qos, 0);
+   on_each_cpu(ping, NULL, 1);
+   }
+}
 
-   if (intel_wait_for_register(_priv->uncore,
+static void __vlv_punit_put(struct drm_i915_private *i915)
+{
+   if (IS_VALLEYVIEW(i915))
+   pm_qos_update_request(>sb_qos, PM_QOS_DEFAULT_VALUE);
+
+   iosf_mbi_punit_release();
+}
+
+static int vlv_sideband_rw(struct drm_i915_private *i915,
+  u32 devfn, u32 port, u32 opcode,
+  

[Intel-gfx] [PATCH 01/10] drm/i915: Disable preemption and sleeping while using the punit sideband

2018-03-07 Thread Chris Wilson
While we talk to the punit over its sideband, we need to prevent the cpu
from sleeping in order to prevent a potential machine hang.

Note that by itself, it appears that pm_qos_update_request (via
intel_idle) doesn't provide a sufficient barrier to ensure that all core
are indeed awake (out of Cstate) and that the package is awake. To do so,
we need to supplement the pm_qos with a manual ping on_each_cpu.

v2: Restrict the heavy-weight wakeup to just the ISOF_PORT_PUNIT, there
is insufficient evidence to implicate a wider problem atm. Similarly,
restrict the w/a to Valleyview, as Cherryview doesn't have an angry cadre
of users.

The working theory, courtesy of Ville and Hans, is the issue lies within
the power delivery and so is likely to be unit and board specific and
occurs when both the unit/fw require extra power at the same time as the
cpu package is changing its own power state.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=109051
References: https://bugs.freedesktop.org/show_bug.cgi?id=102657
References: https://bugzilla.kernel.org/show_bug.cgi?id=195255
Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Hans de Goede 
Cc: Ville Syrjälä 
---
 drivers/gpu/drm/i915/i915_drv.c   |  6 +++
 drivers/gpu/drm/i915/i915_drv.h   |  1 +
 drivers/gpu/drm/i915/intel_sideband.c | 89 +++
 3 files changed, 77 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d61b51c0bf0b..bfb9d7b6b678 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -913,6 +913,9 @@ static int i915_driver_init_early(struct drm_i915_private 
*dev_priv,
spin_lock_init(_priv->uncore.lock);
 
mutex_init(_priv->sb_lock);
+   pm_qos_add_request(_priv->sb_qos,
+  PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);
+
mutex_init(_priv->modeset_restore_lock);
mutex_init(_priv->av_mutex);
mutex_init(_priv->wm.wm_mutex);
@@ -964,6 +967,9 @@ static void i915_driver_cleanup_early(struct 
drm_i915_private *dev_priv)
intel_irq_fini(dev_priv);
i915_workqueues_cleanup(dev_priv);
i915_engines_cleanup(dev_priv);
+
+   pm_qos_remove_request(_priv->sb_qos);
+   mutex_destroy(_priv->sb_lock);
 }
 
 static int i915_mmio_setup(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 604389d0b6a3..b8da17304ebe 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1907,6 +1907,7 @@ struct drm_i915_private {
 
/* Sideband mailbox protection */
struct mutex sb_lock;
+   struct pm_qos_request sb_qos;
 
/** Cached value of IMR to avoid reads in updating the bitfield */
union {
diff --git a/drivers/gpu/drm/i915/intel_sideband.c 
b/drivers/gpu/drm/i915/intel_sideband.c
index 75c872bb8cc9..d56eda33734e 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -22,6 +22,8 @@
  *
  */
 
+#include 
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 
@@ -39,18 +41,48 @@
 /* Private register write, double-word addressing, non-posted */
 #define SB_CRWRDA_NP   0x07
 
-static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
-  u32 port, u32 opcode, u32 addr, u32 *val)
+static void ping(void *info)
 {
-   u32 cmd, be = 0xf, bar = 0;
-   bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP);
+}
 
-   cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) |
-   (port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) |
-   (bar << IOSF_BAR_SHIFT);
+static void __vlv_punit_get(struct drm_i915_private *dev_priv)
+{
+   iosf_mbi_punit_acquire();
 
-   WARN_ON(!mutex_is_locked(_priv->sb_lock));
+   /*
+* Prevent the cpu from sleeping while we use this sideband, otherwise
+* the punit may cause a machine hang. The issue appears to be isolated
+* with changing the power state of the CPU package while changing
+* the power state via the punit, and we have only observed it
+* reliably on 4-core Baytail systems suggesting the issue is in the
+* power delivery mechanism and likely to be be board/function
+* specific. Hence we presume the workaround needs only be applied
+* to the Valleyview P-unit and not all sideband communications.
+*/
+   if (IS_VALLEYVIEW(dev_priv)) {
+   pm_qos_update_request(_priv->sb_qos, 0);
+   on_each_cpu(ping, NULL, 1);
+   }
+}
+
+static void __vlv_punit_put(struct drm_i915_private *dev_priv)
+{
+   if (IS_VALLEYVIEW(dev_priv))
+   pm_qos_update_request(_priv->sb_qos, PM_QOS_DEFAULT_VALUE);
 
+