KFD suspend and resume routines have been disabled since commit 5d3a2d95224da ("drm/amdgpu: skip kfd suspend/resume for S0ix") which made sense at that time. However there is a problem that if there is any compute work running there may still be active fences. Running suspend without draining them can cause the system to hang.
The same problem can also occur with user queues too. So run KFD and user queue suspend/resume routines even in s0ix. Reviewed-by: Yifan Zhang <yifan1.zh...@amd.com> Signed-off-by: Mario Limonciello <mario.limoncie...@amd.com> --- v2: * handle user queues as well (Alex) * Add tag (Yifan) --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0fdfde3dcb9f..85b58e5edc7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5220,10 +5220,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) amdgpu_device_ip_suspend_phase1(adev); - if (!adev->in_s0ix) { - amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); - amdgpu_userq_suspend(adev); - } + amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); + amdgpu_userq_suspend(adev); r = amdgpu_device_evict_resources(adev); if (r) @@ -5318,15 +5316,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) goto exit; } - if (!adev->in_s0ix) { - r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); - if (r) - goto exit; + r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); + if (r) + goto exit; - r = amdgpu_userq_resume(adev); - if (r) - goto exit; - } + r = amdgpu_userq_resume(adev); + if (r) + goto exit; r = amdgpu_device_ip_late_init(adev); if (r) -- 2.50.1