Re: [Intel-gfx] [PATCH v2] drm/i915: Stop the machine as we install the wedged submit_request handler

2016-11-22 Thread Mika Kuoppala
Chris Wilson  writes:

> In order to prevent a race between the old callback submitting an
> incomplete request and i915_gem_set_wedged() installing its nop handler,
> we must ensure that the swap occurs when the machine is idle
> (stop_machine).
>
> v2: move context lost from out of BKL.
>
> Signed-off-by: Chris Wilson 

Reviewed-by: Mika Kuoppala 

> ---
>  drivers/gpu/drm/i915/i915_gem.c | 26 +-
>  1 file changed, 21 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d0dcaf35b429..e80ad6906fb4 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -38,6 +38,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -2770,6 +2771,12 @@ static void nop_submit_request(struct 
> drm_i915_gem_request *request)
>  
>  static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
>  {
> + /* We need to be sure that no thread is running the old callback as
> +  * we install the nop handler (otherwise we would submit a request
> +  * to hardware that will never complete). In order to prevent this
> +  * race, we wait until the machine is idle before making the swap
> +  * (using stop_machine()).
> +  */
>   engine->submit_request = nop_submit_request;
>  
>   /* Mark all pending requests as complete so that any concurrent
> @@ -2800,20 +2807,29 @@ static void i915_gem_cleanup_engine(struct 
> intel_engine_cs *engine)
>   }
>  }
>  
> -void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
> +static int __i915_gem_set_wedged_BKL(void *data)
>  {
> + struct drm_i915_private *i915 = data;
>   struct intel_engine_cs *engine;
>   enum intel_engine_id id;
>  
> + for_each_engine(engine, i915, id)
> + i915_gem_cleanup_engine(engine);
> +
> + return 0;
> +}
> +
> +void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
> +{
>   lockdep_assert_held(_priv->drm.struct_mutex);
>   set_bit(I915_WEDGED, _priv->gpu_error.flags);
>  
> - i915_gem_context_lost(dev_priv);
> - for_each_engine(engine, dev_priv, id)
> - i915_gem_cleanup_engine(engine);
> - mod_delayed_work(dev_priv->wq, _priv->gt.idle_work, 0);
> + stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
>  
> + i915_gem_context_lost(dev_priv);
>   i915_gem_retire_requests(dev_priv);
> +
> + mod_delayed_work(dev_priv->wq, _priv->gt.idle_work, 0);
>  }
>  
>  static void
> -- 
> 2.10.2
>
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2] drm/i915: Stop the machine as we install the wedged submit_request handler

2016-11-21 Thread Chris Wilson
In order to prevent a race between the old callback submitting an
incomplete request and i915_gem_set_wedged() installing its nop handler,
we must ensure that the swap occurs when the machine is idle
(stop_machine).

v2: move context lost from out of BKL.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 26 +-
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d0dcaf35b429..e80ad6906fb4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -38,6 +38,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2770,6 +2771,12 @@ static void nop_submit_request(struct 
drm_i915_gem_request *request)
 
 static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
 {
+   /* We need to be sure that no thread is running the old callback as
+* we install the nop handler (otherwise we would submit a request
+* to hardware that will never complete). In order to prevent this
+* race, we wait until the machine is idle before making the swap
+* (using stop_machine()).
+*/
engine->submit_request = nop_submit_request;
 
/* Mark all pending requests as complete so that any concurrent
@@ -2800,20 +2807,29 @@ static void i915_gem_cleanup_engine(struct 
intel_engine_cs *engine)
}
 }
 
-void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
+static int __i915_gem_set_wedged_BKL(void *data)
 {
+   struct drm_i915_private *i915 = data;
struct intel_engine_cs *engine;
enum intel_engine_id id;
 
+   for_each_engine(engine, i915, id)
+   i915_gem_cleanup_engine(engine);
+
+   return 0;
+}
+
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
+{
lockdep_assert_held(_priv->drm.struct_mutex);
set_bit(I915_WEDGED, _priv->gpu_error.flags);
 
-   i915_gem_context_lost(dev_priv);
-   for_each_engine(engine, dev_priv, id)
-   i915_gem_cleanup_engine(engine);
-   mod_delayed_work(dev_priv->wq, _priv->gt.idle_work, 0);
+   stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
 
+   i915_gem_context_lost(dev_priv);
i915_gem_retire_requests(dev_priv);
+
+   mod_delayed_work(dev_priv->wq, _priv->gt.idle_work, 0);
 }
 
 static void
-- 
2.10.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2] drm/i915: Stop the machine as we install the wedged submit_request handler

2016-11-21 Thread Chris Wilson
In order to prevent a race between the old callback submitting an
incomplete request and i915_gem_set_wedged() installing its nop handler,
we must ensure that the swap occurs when the machine is idle
(stop_machine).

v2: move context lost from out of BKL.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 26 +-
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d0dcaf35b429..e80ad6906fb4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -38,6 +38,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2770,6 +2771,12 @@ static void nop_submit_request(struct 
drm_i915_gem_request *request)
 
 static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
 {
+   /* We need to be sure that no thread is running the old callback as
+* we install the nop handler (otherwise we would submit a request
+* to hardware that will never complete). In order to prevent this
+* race, we wait until the machine is idle before making the swap
+* (using stop_machine()).
+*/
engine->submit_request = nop_submit_request;
 
/* Mark all pending requests as complete so that any concurrent
@@ -2800,20 +2807,29 @@ static void i915_gem_cleanup_engine(struct 
intel_engine_cs *engine)
}
 }
 
-void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
+static int __i915_gem_set_wedged_BKL(void *data)
 {
+   struct drm_i915_private *i915 = data;
struct intel_engine_cs *engine;
enum intel_engine_id id;
 
+   for_each_engine(engine, i915, id)
+   i915_gem_cleanup_engine(engine);
+
+   return 0;
+}
+
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
+{
lockdep_assert_held(_priv->drm.struct_mutex);
set_bit(I915_WEDGED, _priv->gpu_error.flags);
 
-   i915_gem_context_lost(dev_priv);
-   for_each_engine(engine, dev_priv, id)
-   i915_gem_cleanup_engine(engine);
-   mod_delayed_work(dev_priv->wq, _priv->gt.idle_work, 0);
+   stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
 
+   i915_gem_context_lost(dev_priv);
i915_gem_retire_requests(dev_priv);
+
+   mod_delayed_work(dev_priv->wq, _priv->gt.idle_work, 0);
 }
 
 static void
-- 
2.10.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx