Arun Siluvery <[email protected]> writes:

> [ text/plain ]
> In preparation for engine reset, the wedged argument of i915_handle_error()
> is extended to reflect as a mask of engines that are hung. This is further
> passed down to error state capture functions which are also updated.
>
> Engine reset recovery mechanism uses this mask and schedules recovery work
> for those particular engines.
>
> Cc: Chris Wilson <[email protected]>
> Cc: Mika Kuoppala <[email protected]>
> Signed-off-by: Tomas Elf <[email protected]>
> Signed-off-by: Arun Siluvery <[email protected]>
> ---
>  drivers/gpu/drm/i915/i915_drv.h       |  4 ++--
>  drivers/gpu/drm/i915/i915_gpu_error.c |  8 ++++----
>  drivers/gpu/drm/i915/i915_irq.c       | 16 ++++++++--------
>  3 files changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 549a232..49ac065 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2735,7 +2735,7 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port 
> *port);
>  /* i915_irq.c */
>  void i915_queue_hangcheck(struct drm_device *dev);
>  __printf(3, 4)
> -void i915_handle_error(struct drm_device *dev, bool wedged,
> +void i915_handle_error(struct drm_device *dev, u32 engine_mask,
>                      const char *fmt, ...);
>  
>  extern void intel_irq_init(struct drm_i915_private *dev_priv);
> @@ -3321,7 +3321,7 @@ static inline void i915_error_state_buf_release(
>  {
>       kfree(eb->buf);
>  }
> -void i915_capture_error_state(struct drm_device *dev, bool wedge,
> +void i915_capture_error_state(struct drm_device *dev, u32 engine_mask,
>                             const char *error_msg);
>  void i915_error_state_get(struct drm_device *dev,
>                         struct i915_error_state_file_priv *error_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
> b/drivers/gpu/drm/i915/i915_gpu_error.c
> index db8600a..1f8ff06 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1301,7 +1301,7 @@ static void i915_capture_reg_state(struct 
> drm_i915_private *dev_priv,
>  
>  static void i915_error_capture_msg(struct drm_device *dev,
>                                  struct drm_i915_error_state *error,
> -                                bool wedged,
> +                                u32 engine_mask,
>                                  const char *error_msg)
>  {
>       struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -1324,7 +1324,7 @@ static void i915_error_capture_msg(struct drm_device 
> *dev,
>       scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
>                 ", reason: %s, action: %s",
>                 error_msg,
> -               wedged ? "reset" : "continue");
> +               engine_mask ? "reset" : "continue");
>  }
>  
>  static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
> @@ -1347,7 +1347,7 @@ static void i915_capture_gen_state(struct 
> drm_i915_private *dev_priv,
>   * out a structure which becomes available in debugfs for user level tools
>   * to pick up.
>   */
> -void i915_capture_error_state(struct drm_device *dev, bool wedged,
> +void i915_capture_error_state(struct drm_device *dev, u32 engine_mask,
>                             const char *error_msg)
>  {
>       static bool warned;
> @@ -1375,7 +1375,7 @@ void i915_capture_error_state(struct drm_device *dev, 
> bool wedged,
>       error->overlay = intel_overlay_capture_error_state(dev);
>       error->display = intel_display_capture_error_state(dev);
>  
> -     i915_error_capture_msg(dev, error, wedged, error_msg);
> +     i915_error_capture_msg(dev, error, engine_mask, error_msg);
>       DRM_INFO("%s\n", error->error_msg);
>  
>       spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 8f3e330..a55a7cc 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2653,14 +2653,14 @@ static void i915_report_and_clear_eir(struct 
> drm_device *dev)
>  /**
>   * i915_handle_error - handle a gpu error
>   * @dev: drm device
> - *
> + * @engine_mask: mask representing engines that are hung
>   * Do some basic checking of register state at error time and
>   * dump it to the syslog.  Also call i915_capture_error_state() to make
>   * sure we get a record and make it available in debugfs.  Fire a uevent
>   * so userspace knows something bad happened (should trigger collection
>   * of a ring dump etc.).
>   */
> -void i915_handle_error(struct drm_device *dev, bool wedged,
> +void i915_handle_error(struct drm_device *dev, u32 engine_mask,
>                      const char *fmt, ...)
>  {
>       struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -2671,10 +2671,10 @@ void i915_handle_error(struct drm_device *dev, bool 
> wedged,
>       vscnprintf(error_msg, sizeof(error_msg), fmt, args);
>       va_end(args);
>  
> -     i915_capture_error_state(dev, wedged, error_msg);
> +     i915_capture_error_state(dev, engine_mask, error_msg);
>       i915_report_and_clear_eir(dev);
>  
> -     if (wedged) {
> +     if (engine_mask) {
>               atomic_or(I915_RESET_IN_PROGRESS_FLAG,
>                               &dev_priv->gpu_error.reset_counter);
>  
> @@ -3033,7 +3033,7 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd)
>        */
>       tmp = I915_READ_CTL(engine);
>       if (tmp & RING_WAIT) {
> -             i915_handle_error(dev, false,
> +             i915_handle_error(dev, 0,
>                                 "Kicking stuck wait on %s",
>                                 engine->name);
>               I915_WRITE_CTL(engine, tmp);
> @@ -3045,7 +3045,7 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd)
>               default:
>                       return HANGCHECK_HUNG;
>               case 1:
> -                     i915_handle_error(dev, false,
> +                     i915_handle_error(dev, 0,
>                                         "Kicking stuck semaphore on %s",
>                                         engine->name);
>                       I915_WRITE_CTL(engine, tmp);
> @@ -3189,12 +3189,12 @@ static void i915_hangcheck_elapsed(struct work_struct 
> *work)
>                       DRM_INFO("%s on %s\n",
>                                stuck[i] ? "stuck" : "no progress",
>                                engine->name);
> -                     rings_hung++;
> +                     rings_hung |= intel_engine_flag(engine);

We can change the int to u32 when we rename rings_hung to engines_hung.

Reviewed-by: Mika Kuoppala <[email protected]>

>               }
>       }
>  
>       if (rings_hung) {
> -             i915_handle_error(dev, true, "Ring hung");
> +             i915_handle_error(dev, rings_hung, "Engine(s) hung");
>               goto out;
>       }
>  
> -- 
> 1.9.1
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to