Need to restore any good queues even if the suspend_all
failed for some. Always run remove_queue as that will
schedule a GPU reset is removing the queue fails.
v2: move resume_all after remove
Fixes: eb067d65c33e ("drm/amdkfd: Update BadOpcode Interrupt handling with MES")
Signed-off-by: Alex Deucher <[email protected]>
---
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 20 ++++++-------------
1 file changed, 6 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c351f7cff0553..08006ce8126c9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -3258,32 +3258,24 @@ int kfd_dqm_suspend_bad_queue_mes(struct kfd_node
*knode, u32 pasid, u32 doorbel
list_for_each_entry(q, &qpd->queues_list, list) {
if (q->doorbell_id == doorbell_id &&
q->properties.is_active) {
- ret = suspend_all_queues_mes(dqm);
- if (ret) {
- dev_err(dev, "Suspending all queues
failed");
- goto out;
- }
+ /* suspend all queues will save any good queues
and mark the rest as bad */
+ suspend_all_queues_mes(dqm);
q->properties.is_evicted = true;
q->properties.is_active = false;
decrement_queue_count(dqm, qpd, q);
+ /* this will remove the bad queue and sched a
GPU reset if needed */
ret = remove_queue_mes(dqm, q, qpd);
- if (ret) {
- dev_err(dev, "Removing bad queue
failed");
- goto out;
- }
-
- ret = resume_all_queues_mes(dqm);
if (ret)
- dev_err(dev, "Resuming all queues
failed");
-
+ dev_err(dev, "Removing bad queue
failed");
+ /* resume the good queues */
+ resume_all_queues_mes(dqm);
break;
}
}
}
-out:
dqm_unlock(dqm);
kfd_unref_process(p);
return ret;
--
2.54.0