Chris Wilson <ch...@chris-wilson.co.uk> writes:

> As we make preparations to reset the GPU state, we assume that the GPU
> is hung and will not advance. Make this assumption more explicit by
> setting the STOP_RING bit on the engines as part of our early reset
> preparations.
>
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
> Cc: Michel Thierry <michel.thie...@intel.com>
> ---
> See 
> https://intel-gfx-ci.01.org/tree/drm-tip/kasan_15/fi-bdw-5557u/pstore22-1519879816_Panic_3.log
> for a bizarre error that kasan-farm keeps on trying over. Maybe related
> to this?
> ---
>  drivers/gpu/drm/i915/i915_drv.c     |  3 +++
>  drivers/gpu/drm/i915/i915_drv.h     | 10 ++++++++--
>  drivers/gpu/drm/i915/intel_uncore.c | 33 +++++++++++++++++++++++++++++++++
>  3 files changed, 44 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index aaa861b51024..925f5722d077 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1908,6 +1908,8 @@ void i915_reset(struct drm_i915_private *i915, unsigned 
> int flags)
>       error->reset_count++;
>  
>       disable_irq(i915->drm.irq);
> +     intel_gpu_reset_prepare(i915, ALL_ENGINES);
> +
>       ret = i915_gem_reset_prepare(i915);
>       if (ret) {
>               dev_err(i915->drm.dev, "GPU recovery failed\n");
> @@ -1969,6 +1971,7 @@ void i915_reset(struct drm_i915_private *i915, unsigned 
> int flags)
>  
>  finish:
>       i915_gem_reset_finish(i915);
> +     intel_gpu_reset_finish(i915, ALL_ENGINES);
>       enable_irq(i915->drm.irq);
>  
>  wakeup:
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 10c9e5e619ab..b95e675e0834 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2957,8 +2957,14 @@ extern const struct dev_pm_ops i915_pm_ops;
>  extern int i915_driver_load(struct pci_dev *pdev,
>                           const struct pci_device_id *ent);
>  extern void i915_driver_unload(struct drm_device *dev);
> -extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 
> engine_mask);
> -extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
> +
> +bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
> +
> +void intel_gpu_reset_prepare(struct drm_i915_private *dev_priv,
> +                          unsigned engine_mask);
> +int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
> +void intel_gpu_reset_finish(struct drm_i915_private *dev_priv,
> +                         unsigned engine_mask);
>  
>  #define I915_RESET_QUIET BIT(0)
>  extern void i915_reset(struct drm_i915_private *i915, unsigned int flags);
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
> b/drivers/gpu/drm/i915/intel_uncore.c
> index 5ae9a62712ca..7186fe4d2ba9 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1899,6 +1899,29 @@ static reset_func intel_get_gpu_reset(struct 
> drm_i915_private *dev_priv)
>               return NULL;
>  }
>  
> +static void i915_engines_set_mode(struct drm_i915_private *dev_priv,
> +                               unsigned engine_mask,
> +                               u32 mode)
> +{
> +     struct intel_engine_cs *engine;
> +     enum intel_engine_id id;
> +
> +     if (INTEL_GEN(dev_priv) < 3)
> +             return;
> +
> +     for_each_engine_masked(engine, dev_priv, engine_mask, id)
> +             I915_WRITE_FW(RING_MI_MODE(engine->mmio_base), mode);

Is there reason to not use gen3_stop_engine in this level?

-Mika

> +}
> +
> +void intel_gpu_reset_prepare(struct drm_i915_private *dev_priv,
> +                          unsigned engine_mask)
> +{
> +     intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +     i915_engines_set_mode(dev_priv, engine_mask,
> +                           _MASKED_BIT_ENABLE(STOP_RING));
> +}
> +
>  int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
>  {
>       reset_func reset = intel_get_gpu_reset(dev_priv);
> @@ -1939,6 +1962,16 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, 
> unsigned engine_mask)
>       return ret;
>  }
>  
> +void intel_gpu_reset_finish(struct drm_i915_private *dev_priv,
> +                         unsigned engine_mask)
> +{
> +     /* Clear the STOP_RING bit as the reset may not have occurred */
> +     i915_engines_set_mode(dev_priv, engine_mask,
> +                           _MASKED_BIT_DISABLE(STOP_RING));
> +
> +     intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +}
> +
>  bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
>  {
>       return intel_get_gpu_reset(dev_priv) != NULL;
> -- 
> 2.16.2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to