When poison is triggered multiple times, competition will occur. Add a mutex lock to protect poison injection
Signed-off-by: Ce Sun <cesun...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 185b9e538f98..31850a47a41f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3503,6 +3503,7 @@ static int amdgpu_ras_page_retirement_thread(void *param) if (kthread_should_stop()) break; + mutex_lock(&con->poison_lock); gpu_reset = 0; do { @@ -3560,6 +3561,7 @@ static int amdgpu_ras_page_retirement_thread(void *param) /* Wake up work to save bad pages to eeprom */ schedule_delayed_work(&con->page_retirement_dwork, 0); } + mutex_unlock(&con->poison_lock); #endif } @@ -3657,6 +3659,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info) mutex_init(&con->page_rsv_lock); INIT_KFIFO(con->poison_fifo); mutex_init(&con->page_retirement_lock); + mutex_init(&con->poison_lock); init_waitqueue_head(&con->page_retirement_wq); atomic_set(&con->page_retirement_req_cnt, 0); atomic_set(&con->poison_creation_count, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 7f10a7402160..6265dac0e1c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -573,6 +573,9 @@ struct amdgpu_ras { pid_t init_task_pid; char init_task_comm[TASK_COMM_LEN]; + + /* Protect poison injection */ + struct mutex poison_lock; }; struct ras_fs_data { -- 2.34.1