When poison is triggered multiple times, competition will occur.
Add a mutex lock to protect poison injection

Signed-off-by: Ce Sun <cesun...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 185b9e538f98..31850a47a41f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3503,6 +3503,7 @@ static int amdgpu_ras_page_retirement_thread(void *param)
                if (kthread_should_stop())
                        break;
 
+               mutex_lock(&con->poison_lock);
                gpu_reset = 0;
 
                do {
@@ -3560,6 +3561,7 @@ static int amdgpu_ras_page_retirement_thread(void *param)
                        /* Wake up work to save bad pages to eeprom */
                        schedule_delayed_work(&con->page_retirement_dwork, 0);
                }
+               mutex_unlock(&con->poison_lock);
 #endif
        }
 
@@ -3657,6 +3659,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev, 
bool init_bp_info)
        mutex_init(&con->page_rsv_lock);
        INIT_KFIFO(con->poison_fifo);
        mutex_init(&con->page_retirement_lock);
+       mutex_init(&con->poison_lock);
        init_waitqueue_head(&con->page_retirement_wq);
        atomic_set(&con->page_retirement_req_cnt, 0);
        atomic_set(&con->poison_creation_count, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 7f10a7402160..6265dac0e1c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -573,6 +573,9 @@ struct amdgpu_ras {
 
        pid_t init_task_pid;
        char init_task_comm[TASK_COMM_LEN];
+
+       /* Protect poison injection */
+       struct mutex poison_lock;
 };
 
 struct ras_fs_data {
-- 
2.34.1

Reply via email to