Ping?
On 25.09.25 15:03, Christian König wrote:
> There has been multiple complains that 10 seconds are usually to long.
>
> The original requirement for longer timeout came from compute tests on
> AMDVLK, since that is no longer a topic reduce the timeout back to 2
> seconds for all queues.
>
> While at it also remove any special handling for compute queues under
> SRIOV or pass through.
>
> Signed-off-by: Christian König <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 85 ++++++++++------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 21 ++----
> 2 files changed, 48 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index a77000c2e0bb..ceb3c616292c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4278,58 +4278,53 @@ static int
> amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
> long timeout;
> int ret = 0;
>
> - /*
> - * By default timeout for jobs is 10 sec
> - */
> - adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000);
> - adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
> + /* By default timeout for all queues is 2 sec */
> + adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
> + adev->video_timeout = msecs_to_jiffies(2000);
>
> - if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
> - while ((timeout_setting = strsep(&input, ",")) &&
> - strnlen(timeout_setting,
> AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
> - ret = kstrtol(timeout_setting, 0, &timeout);
> - if (ret)
> - return ret;
> + if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
> + return 0;
>
> - if (timeout == 0) {
> - index++;
> - continue;
> - } else if (timeout < 0) {
> - timeout = MAX_SCHEDULE_TIMEOUT;
> - dev_warn(adev->dev, "lockup timeout disabled");
> - add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
> - } else {
> - timeout = msecs_to_jiffies(timeout);
> - }
> + while ((timeout_setting = strsep(&input, ",")) &&
> + strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
> + ret = kstrtol(timeout_setting, 0, &timeout);
> + if (ret)
> + return ret;
>
> - switch (index++) {
> - case 0:
> - adev->gfx_timeout = timeout;
> - break;
> - case 1:
> - adev->compute_timeout = timeout;
> - break;
> - case 2:
> - adev->sdma_timeout = timeout;
> - break;
> - case 3:
> - adev->video_timeout = timeout;
> - break;
> - default:
> - break;
> - }
> + if (timeout == 0) {
> + index++;
> + continue;
> + } else if (timeout < 0) {
> + timeout = MAX_SCHEDULE_TIMEOUT;
> + dev_warn(adev->dev, "lockup timeout disabled");
> + add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
> + } else {
> + timeout = msecs_to_jiffies(timeout);
> }
> - /*
> - * There is only one value specified and
> - * it should apply to all non-compute jobs.
> - */
> - if (index == 1) {
> - adev->sdma_timeout = adev->video_timeout =
> adev->gfx_timeout;
> - if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
> - adev->compute_timeout = adev->gfx_timeout;
> +
> + switch (index++) {
> + case 0:
> + adev->gfx_timeout = timeout;
> + break;
> + case 1:
> + adev->compute_timeout = timeout;
> + break;
> + case 2:
> + adev->sdma_timeout = timeout;
> + break;
> + case 3:
> + adev->video_timeout = timeout;
> + break;
> + default:
> + break;
> }
> }
>
> + /* When only one value specified apply it to all queues. */
> + if (index == 1)
> + adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
> + adev->video_timeout = timeout;
> +
> return ret;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index ece251cbe8c3..fe45dd1d979e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -353,22 +353,17 @@ module_param_named(svm_default_granularity,
> amdgpu_svm_default_granularity, uint
> * DOC: lockup_timeout (string)
> * Set GPU scheduler timeout value in ms.
> *
> - * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is
> there can be one or
> - * multiple values specified. 0 and negative values are invalidated. They
> will be adjusted
> - * to the default timeout.
> + * The format can be [single value] for setting all timeouts at once or
> + * [GFX,Compute,SDMA,Video] to set individual timeouts.
> + * Negative values mean infinity.
> *
> - * - With one value specified, the setting will apply to all non-compute
> jobs.
> - * - With multiple values specified, the first one will be for GFX.
> - * The second one is for Compute. The third and fourth ones are
> - * for SDMA and Video.
> - *
> - * By default(with no lockup_timeout settings), the timeout for all jobs is
> 10000.
> + * By default(with no lockup_timeout settings), the timeout for all queues
> is 2000.
> */
> MODULE_PARM_DESC(lockup_timeout,
> - "GPU lockup timeout in ms (default: 10000 for all jobs. "
> - "0: keep default value. negative: infinity timeout), format:
> for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
> - "for passthrough or sriov [all jobs] or
> [GFX,Compute,SDMA,Video].");
> -module_param_string(lockup_timeout, amdgpu_lockup_timeout,
> sizeof(amdgpu_lockup_timeout), 0444);
> + "GPU lockup timeout in ms (default: 2000 for all queues. "
> + "0: keep default value. negative: infinity timeout), format:
> [single value for all] or [GFX,Compute,SDMA,Video].");
> +module_param_string(lockup_timeout, amdgpu_lockup_timeout,
> + sizeof(amdgpu_lockup_timeout), 0444);
>
> /**
> * DOC: dpm (int)