Re: [Intel-gfx] [PATCH v1 3/3] drm/i915/gt: Timeout when waiting for idle in suspending

2023-08-09 Thread Teres Alexis, Alan Previn
Thanks Rodrigo for reviewing this.

On Mon, 2023-08-07 at 13:56 -0400, Vivi, Rodrigo wrote:
> On Wed, Aug 02, 2023 at 04:35:01PM -0700, Alan Previn wrote:
> > When suspending, add a timeout when calling
> > intel_gt_pm_wait_for_idle else if we have a lost
> > G2H event that holds a wakeref (which would be
> > indicating of a bug elsewhere in the driver), we
> > get to complete the suspend-resume cycle, albeit
> > without all the lower power hw counters hitting
> > its targets, instead of hanging in the kernel.
> > 
alan:snip

> > -int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
> > +int intel_wakeref_wait_for_idle(struct intel_wakeref *wf, int timeout_ms)
> >  {
> > -   int err;
> > +   int err = 0;
> >  
> > might_sleep();
> >  
> > -   err = wait_var_event_killable(>wakeref,
> > - !intel_wakeref_is_active(wf));
> > +   if (!timeout_ms)
> > +   err = wait_var_event_killable(>wakeref,
> > + !intel_wakeref_is_active(wf));
> > +   else if (wait_var_event_timeout(>wakeref,
> > +   !intel_wakeref_is_active(wf),
> > +   msecs_to_jiffies(timeout_ms)) < 1)
> > +   err = -ETIME;
> 
> it looks to me that -ETIMEDOUT would be a better error.
alan: okay - will do, thanks.



Re: [Intel-gfx] [PATCH v1 3/3] drm/i915/gt: Timeout when waiting for idle in suspending

2023-08-07 Thread Rodrigo Vivi
On Wed, Aug 02, 2023 at 04:35:01PM -0700, Alan Previn wrote:
> When suspending, add a timeout when calling
> intel_gt_pm_wait_for_idle else if we have a lost
> G2H event that holds a wakeref (which would be
> indicating of a bug elsewhere in the driver), we
> get to complete the suspend-resume cycle, albeit
> without all the lower power hw counters hitting
> its targets, instead of hanging in the kernel.
> 
> Signed-off-by: Alan Previn 
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c |  2 +-
>  drivers/gpu/drm/i915/gt/intel_gt_pm.c |  7 ++-
>  drivers/gpu/drm/i915/gt/intel_gt_pm.h |  7 ++-
>  drivers/gpu/drm/i915/intel_wakeref.c  | 14 ++
>  drivers/gpu/drm/i915/intel_wakeref.h  |  5 +++--
>  5 files changed, 26 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index ee15486fed0d..090438eb8682 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -688,7 +688,7 @@ void intel_engines_release(struct intel_gt *gt)
>   if (!engine->release)
>   continue;
>  
> - intel_wakeref_wait_for_idle(>wakeref);
> + intel_wakeref_wait_for_idle(>wakeref, 0);
>   GEM_BUG_ON(intel_engine_pm_is_awake(engine));
>  
>   engine->release(engine);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index 3162d859ed68..dfe77eb3efd1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -289,6 +289,8 @@ int intel_gt_resume(struct intel_gt *gt)
>  
>  static void wait_for_suspend(struct intel_gt *gt)
>  {
> + int timeout_ms = CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT ? : 1;
> +
>   if (!intel_gt_pm_is_awake(gt)) {
>   intel_uc_suspend_prepare(>uc);
>   return;
> @@ -305,7 +307,10 @@ static void wait_for_suspend(struct intel_gt *gt)
>   intel_uc_suspend_prepare(>uc);
>   }
>  
> - intel_gt_pm_wait_for_idle(gt);
> + /* we are suspending, so we shouldn't be waiting forever */
> + if (intel_gt_pm_wait_timeout_for_idle(gt, timeout_ms) == -ETIME)
> + drm_warn(>i915->drm, "Bailing from %s after %d milisec 
> timeout\n",
> +  __func__, timeout_ms);
>  }
>  
>  void intel_gt_suspend_prepare(struct intel_gt *gt)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> index 6c9a46452364..5358acc2b5b1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> @@ -68,7 +68,12 @@ static inline void intel_gt_pm_might_put(struct intel_gt 
> *gt)
>  
>  static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
>  {
> - return intel_wakeref_wait_for_idle(>wakeref);
> + return intel_wakeref_wait_for_idle(>wakeref, 0);
> +}
> +
> +static inline int intel_gt_pm_wait_timeout_for_idle(struct intel_gt *gt, int 
> timeout_ms)
> +{
> + return intel_wakeref_wait_for_idle(>wakeref, timeout_ms);
>  }
>  
>  void intel_gt_pm_init_early(struct intel_gt *gt);
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.c 
> b/drivers/gpu/drm/i915/intel_wakeref.c
> index 718f2f1b6174..7e01d4cc300c 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.c
> +++ b/drivers/gpu/drm/i915/intel_wakeref.c
> @@ -111,14 +111,20 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
>"wakeref.work", >work, 0);
>  }
>  
> -int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
> +int intel_wakeref_wait_for_idle(struct intel_wakeref *wf, int timeout_ms)
>  {
> - int err;
> + int err = 0;
>  
>   might_sleep();
>  
> - err = wait_var_event_killable(>wakeref,
> -   !intel_wakeref_is_active(wf));
> + if (!timeout_ms)
> + err = wait_var_event_killable(>wakeref,
> +   !intel_wakeref_is_active(wf));
> + else if (wait_var_event_timeout(>wakeref,
> + !intel_wakeref_is_active(wf),
> + msecs_to_jiffies(timeout_ms)) < 1)
> + err = -ETIME;

it looks to me that -ETIMEDOUT would be a better error.

> +
>   if (err)
>   return err;
>  
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.h 
> b/drivers/gpu/drm/i915/intel_wakeref.h
> index ec881b097368..6fbb7a2fb6ea 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.h
> +++ b/drivers/gpu/drm/i915/intel_wakeref.h
> @@ -251,15 +251,16 @@ __intel_wakeref_defer_park(struct intel_wakeref *wf)
>  /**
>   * intel_wakeref_wait_for_idle: Wait until the wakeref is idle
>   * @wf: the wakeref
> + * @timeout_ms: timeout to wait in milisecs, zero means forever
>   *
>   * Wait for the earlier asynchronous release of the wakeref. Note
>   * this will wait for any third party as well, so make sure you