amdgpu: schedule fault recover work to another CPU

Philip Yang Mon, 30 May 2022 07:57:59 -0700

Flooding GPU vm fault interrupt may hold CPU and delay the scheduled
recover work to handle vm fault, schedule recover work to another CPU of
same NUMA node.


Signed-off-by: Philip Yang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 13 +++++++++++--
 drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 13 +++++++++++--
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c 
b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index cdd599a08125..3a47107737a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -435,12 +435,21 @@ static int vega10_ih_self_irq(struct amdgpu_device *adev,
                              struct amdgpu_irq_src *source,
                              struct amdgpu_iv_entry *entry)
 {
+       int cpu, new_cpu;
+
+       cpu = new_cpu = smp_processor_id();
+       do {
+               new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
+               if (cpu_to_node(new_cpu) == numa_node_id())
+                       break;
+       } while (cpu != new_cpu);
+
        switch (entry->ring_id) {
        case 1:
-               schedule_work(&adev->irq.ih1_work);
+               schedule_work_on(new_cpu, &adev->irq.ih1_work);
                break;
        case 2:
-               schedule_work(&adev->irq.ih2_work);
+               schedule_work_on(new_cpu, &adev->irq.ih2_work);
                break;
        default: break;
        }
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c 
b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index 3b4eb8285943..a9465f0d8fbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -487,12 +487,21 @@ static int vega20_ih_self_irq(struct amdgpu_device *adev,
                              struct amdgpu_irq_src *source,
                              struct amdgpu_iv_entry *entry)
 {
+       int cpu, new_cpu;
+
+       cpu = new_cpu = smp_processor_id();
+       do {
+               new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
+               if (cpu_to_node(new_cpu) == numa_node_id())
+                       break;
+       } while (cpu != new_cpu);
+
        switch (entry->ring_id) {
        case 1:
-               schedule_work(&adev->irq.ih1_work);
+               schedule_work_on(new_cpu, &adev->irq.ih1_work);
                break;
        case 2:
-               schedule_work(&adev->irq.ih2_work);
+               schedule_work_on(new_cpu, &adev->irq.ih2_work);
                break;
        default: break;
        }
-- 
2.35.1

[PATCH 2/2] drm/amdgpu: schedule fault recover work to another CPU

Reply via email to