On Thu, Sep 25, 2025 at 9:19 AM Christian König
<[email protected]> wrote:
>
> There has been multiple complains that 10 seconds are usually to long.
>
> The original requirement for longer timeout came from compute tests on
> AMDVLK, since that is no longer a topic reduce the timeout back to 2
> seconds for all queues.
>
> While at it also remove any special handling for compute queues under
> SRIOV or pass through.
>
> Signed-off-by: Christian König <[email protected]>

Reviewed-by: Alex Deucher <[email protected]>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 85 ++++++++++------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 21 ++----
>  2 files changed, 48 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index a77000c2e0bb..ceb3c616292c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4278,58 +4278,53 @@ static int 
> amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
>         long timeout;
>         int ret = 0;
>
> -       /*
> -        * By default timeout for jobs is 10 sec
> -        */
> -       adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000);
> -       adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
> +       /* By default timeout for all queues is 2 sec */
> +       adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
> +               adev->video_timeout = msecs_to_jiffies(2000);
>
> -       if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
> -               while ((timeout_setting = strsep(&input, ",")) &&
> -                               strnlen(timeout_setting, 
> AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
> -                       ret = kstrtol(timeout_setting, 0, &timeout);
> -                       if (ret)
> -                               return ret;
> +       if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
> +               return 0;
>
> -                       if (timeout == 0) {
> -                               index++;
> -                               continue;
> -                       } else if (timeout < 0) {
> -                               timeout = MAX_SCHEDULE_TIMEOUT;
> -                               dev_warn(adev->dev, "lockup timeout 
> disabled");
> -                               add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
> -                       } else {
> -                               timeout = msecs_to_jiffies(timeout);
> -                       }
> +       while ((timeout_setting = strsep(&input, ",")) &&
> +              strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
> +               ret = kstrtol(timeout_setting, 0, &timeout);
> +               if (ret)
> +                       return ret;
>
> -                       switch (index++) {
> -                       case 0:
> -                               adev->gfx_timeout = timeout;
> -                               break;
> -                       case 1:
> -                               adev->compute_timeout = timeout;
> -                               break;
> -                       case 2:
> -                               adev->sdma_timeout = timeout;
> -                               break;
> -                       case 3:
> -                               adev->video_timeout = timeout;
> -                               break;
> -                       default:
> -                               break;
> -                       }
> +               if (timeout == 0) {
> +                       index++;
> +                       continue;
> +               } else if (timeout < 0) {
> +                       timeout = MAX_SCHEDULE_TIMEOUT;
> +                       dev_warn(adev->dev, "lockup timeout disabled");
> +                       add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
> +               } else {
> +                       timeout = msecs_to_jiffies(timeout);
>                 }
> -               /*
> -                * There is only one value specified and
> -                * it should apply to all non-compute jobs.
> -                */
> -               if (index == 1) {
> -                       adev->sdma_timeout = adev->video_timeout = 
> adev->gfx_timeout;
> -                       if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
> -                               adev->compute_timeout = adev->gfx_timeout;
> +
> +               switch (index++) {
> +               case 0:
> +                       adev->gfx_timeout = timeout;
> +                       break;
> +               case 1:
> +                       adev->compute_timeout = timeout;
> +                       break;
> +               case 2:
> +                       adev->sdma_timeout = timeout;
> +                       break;
> +               case 3:
> +                       adev->video_timeout = timeout;
> +                       break;
> +               default:
> +                       break;
>                 }
>         }
>
> +       /* When only one value specified apply it to all queues. */
> +       if (index == 1)
> +               adev->gfx_timeout = adev->compute_timeout = 
> adev->sdma_timeout =
> +                       adev->video_timeout = timeout;
> +
>         return ret;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index ece251cbe8c3..fe45dd1d979e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -353,22 +353,17 @@ module_param_named(svm_default_granularity, 
> amdgpu_svm_default_granularity, uint
>   * DOC: lockup_timeout (string)
>   * Set GPU scheduler timeout value in ms.
>   *
> - * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is 
> there can be one or
> - * multiple values specified. 0 and negative values are invalidated. They 
> will be adjusted
> - * to the default timeout.
> + * The format can be [single value] for setting all timeouts at once or
> + * [GFX,Compute,SDMA,Video] to set individual timeouts.
> + * Negative values mean infinity.
>   *
> - * - With one value specified, the setting will apply to all non-compute 
> jobs.
> - * - With multiple values specified, the first one will be for GFX.
> - *   The second one is for Compute. The third and fourth ones are
> - *   for SDMA and Video.
> - *
> - * By default(with no lockup_timeout settings), the timeout for all jobs is 
> 10000.
> + * By default(with no lockup_timeout settings), the timeout for all queues 
> is 2000.
>   */
>  MODULE_PARM_DESC(lockup_timeout,
> -                "GPU lockup timeout in ms (default: 10000 for all jobs. "
> -                "0: keep default value. negative: infinity timeout), format: 
> for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
> -                "for passthrough or sriov [all jobs] or 
> [GFX,Compute,SDMA,Video].");
> -module_param_string(lockup_timeout, amdgpu_lockup_timeout, 
> sizeof(amdgpu_lockup_timeout), 0444);
> +                "GPU lockup timeout in ms (default: 2000 for all queues. "
> +                "0: keep default value. negative: infinity timeout), format: 
> [single value for all] or [GFX,Compute,SDMA,Video].");
> +module_param_string(lockup_timeout, amdgpu_lockup_timeout,
> +                   sizeof(amdgpu_lockup_timeout), 0444);
>
>  /**
>   * DOC: dpm (int)
> --
> 2.43.0
>

Reply via email to