For MCA poison, if unmap queue fails, only gpu reset should be
triggered without page retirement handling, MCA notifier will do it.

Signed-off-by: Tao Zhou <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 27 +++++++++++++++----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 9494fa14db9a..dd1b1a612343 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -184,18 +184,23 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
                bool reset)
 {
        int ret;
-       struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-       struct ras_common_if head = {
-               .block = AMDGPU_RAS_BLOCK__UMC,
-       };
-       struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
 
-       ret =
-               amdgpu_umc_do_page_retirement(adev, ras_error_status, NULL, 
reset);
-
-       if (ret == AMDGPU_RAS_SUCCESS && obj) {
-               obj->err_data.ue_count += err_data->ue_count;
-               obj->err_data.ce_count += err_data->ce_count;
+       if (adev->gmc.xgmi.connected_to_cpu) {
+               ret = amdgpu_umc_poison_handler_mca(adev, ras_error_status, 
reset);
+       } else {
+               struct ras_err_data *err_data = (struct ras_err_data 
*)ras_error_status;
+               struct ras_common_if head = {
+                       .block = AMDGPU_RAS_BLOCK__UMC,
+               };
+               struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
+
+               ret =
+                       amdgpu_umc_do_page_retirement(adev, ras_error_status, 
NULL, reset);
+
+               if (ret == AMDGPU_RAS_SUCCESS && obj) {
+                       obj->err_data.ue_count += err_data->ue_count;
+                       obj->err_data.ce_count += err_data->ce_count;
+               }
        }
 
        return ret;
-- 
2.35.1

Reply via email to