Re: [Intel-gfx] [PATCH v2] drm/i915: Stop the machine as we install the wedged submit_request handler
Chris Wilsonwrites: > In order to prevent a race between the old callback submitting an > incomplete request and i915_gem_set_wedged() installing its nop handler, > we must ensure that the swap occurs when the machine is idle > (stop_machine). > > v2: move context lost from out of BKL. > > Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala > --- > drivers/gpu/drm/i915/i915_gem.c | 26 +- > 1 file changed, 21 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index d0dcaf35b429..e80ad6906fb4 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -38,6 +38,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -2770,6 +2771,12 @@ static void nop_submit_request(struct > drm_i915_gem_request *request) > > static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) > { > + /* We need to be sure that no thread is running the old callback as > + * we install the nop handler (otherwise we would submit a request > + * to hardware that will never complete). In order to prevent this > + * race, we wait until the machine is idle before making the swap > + * (using stop_machine()). > + */ > engine->submit_request = nop_submit_request; > > /* Mark all pending requests as complete so that any concurrent > @@ -2800,20 +2807,29 @@ static void i915_gem_cleanup_engine(struct > intel_engine_cs *engine) > } > } > > -void i915_gem_set_wedged(struct drm_i915_private *dev_priv) > +static int __i915_gem_set_wedged_BKL(void *data) > { > + struct drm_i915_private *i915 = data; > struct intel_engine_cs *engine; > enum intel_engine_id id; > > + for_each_engine(engine, i915, id) > + i915_gem_cleanup_engine(engine); > + > + return 0; > +} > + > +void i915_gem_set_wedged(struct drm_i915_private *dev_priv) > +{ > lockdep_assert_held(_priv->drm.struct_mutex); > set_bit(I915_WEDGED, _priv->gpu_error.flags); > > - i915_gem_context_lost(dev_priv); > - for_each_engine(engine, dev_priv, id) > - i915_gem_cleanup_engine(engine); > - mod_delayed_work(dev_priv->wq, _priv->gt.idle_work, 0); > + stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); > > + i915_gem_context_lost(dev_priv); > i915_gem_retire_requests(dev_priv); > + > + mod_delayed_work(dev_priv->wq, _priv->gt.idle_work, 0); > } > > static void > -- > 2.10.2 > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2] drm/i915: Stop the machine as we install the wedged submit_request handler
In order to prevent a race between the old callback submitting an incomplete request and i915_gem_set_wedged() installing its nop handler, we must ensure that the swap occurs when the machine is idle (stop_machine). v2: move context lost from out of BKL. Signed-off-by: Chris Wilson--- drivers/gpu/drm/i915/i915_gem.c | 26 +- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d0dcaf35b429..e80ad6906fb4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -2770,6 +2771,12 @@ static void nop_submit_request(struct drm_i915_gem_request *request) static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) { + /* We need to be sure that no thread is running the old callback as +* we install the nop handler (otherwise we would submit a request +* to hardware that will never complete). In order to prevent this +* race, we wait until the machine is idle before making the swap +* (using stop_machine()). +*/ engine->submit_request = nop_submit_request; /* Mark all pending requests as complete so that any concurrent @@ -2800,20 +2807,29 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) } } -void i915_gem_set_wedged(struct drm_i915_private *dev_priv) +static int __i915_gem_set_wedged_BKL(void *data) { + struct drm_i915_private *i915 = data; struct intel_engine_cs *engine; enum intel_engine_id id; + for_each_engine(engine, i915, id) + i915_gem_cleanup_engine(engine); + + return 0; +} + +void i915_gem_set_wedged(struct drm_i915_private *dev_priv) +{ lockdep_assert_held(_priv->drm.struct_mutex); set_bit(I915_WEDGED, _priv->gpu_error.flags); - i915_gem_context_lost(dev_priv); - for_each_engine(engine, dev_priv, id) - i915_gem_cleanup_engine(engine); - mod_delayed_work(dev_priv->wq, _priv->gt.idle_work, 0); + stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); + i915_gem_context_lost(dev_priv); i915_gem_retire_requests(dev_priv); + + mod_delayed_work(dev_priv->wq, _priv->gt.idle_work, 0); } static void -- 2.10.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2] drm/i915: Stop the machine as we install the wedged submit_request handler
In order to prevent a race between the old callback submitting an incomplete request and i915_gem_set_wedged() installing its nop handler, we must ensure that the swap occurs when the machine is idle (stop_machine). v2: move context lost from out of BKL. Signed-off-by: Chris Wilson--- drivers/gpu/drm/i915/i915_gem.c | 26 +- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d0dcaf35b429..e80ad6906fb4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -2770,6 +2771,12 @@ static void nop_submit_request(struct drm_i915_gem_request *request) static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) { + /* We need to be sure that no thread is running the old callback as +* we install the nop handler (otherwise we would submit a request +* to hardware that will never complete). In order to prevent this +* race, we wait until the machine is idle before making the swap +* (using stop_machine()). +*/ engine->submit_request = nop_submit_request; /* Mark all pending requests as complete so that any concurrent @@ -2800,20 +2807,29 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) } } -void i915_gem_set_wedged(struct drm_i915_private *dev_priv) +static int __i915_gem_set_wedged_BKL(void *data) { + struct drm_i915_private *i915 = data; struct intel_engine_cs *engine; enum intel_engine_id id; + for_each_engine(engine, i915, id) + i915_gem_cleanup_engine(engine); + + return 0; +} + +void i915_gem_set_wedged(struct drm_i915_private *dev_priv) +{ lockdep_assert_held(_priv->drm.struct_mutex); set_bit(I915_WEDGED, _priv->gpu_error.flags); - i915_gem_context_lost(dev_priv); - for_each_engine(engine, dev_priv, id) - i915_gem_cleanup_engine(engine); - mod_delayed_work(dev_priv->wq, _priv->gt.idle_work, 0); + stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); + i915_gem_context_lost(dev_priv); i915_gem_retire_requests(dev_priv); + + mod_delayed_work(dev_priv->wq, _priv->gt.idle_work, 0); } static void -- 2.10.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx