Add err code check for enable_communication on resume path. When resume failed, 
we can no longer use the GPU, marking the GPU as wedged.

Signed-off-by: Zhanjun Dong <zhanjun.d...@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt_pm.c |  7 ++++++-
 drivers/gpu/drm/i915/gt/intel_reset.c | 19 ++++++++++++++++---
 drivers/gpu/drm/i915/gt/intel_reset.h |  1 +
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |  9 +++++++--
 4 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index e02cb90723ae..775ce511f810 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -373,8 +373,13 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
        intel_ggtt_restore_fences(gt->ggtt);
 
        ret = intel_uc_runtime_resume(&gt->uc);
-       if (ret)
+       if (ret && intel_uc_uses_guc_submission(&gt->uc)) {
+               /* Resume failed on GuC submission, we can no longer use the 
GPU, marking the GPU
+                * as wedged.
+                */
+               intel_gt_set_wedged_flag(gt);
                return ret;
+       }
 
        return 0;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
b/drivers/gpu/drm/i915/gt/intel_reset.c
index 195ff72d7a14..05142761770a 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -962,6 +962,20 @@ static void nop_submit_request(struct i915_request 
*request)
        }
 }
 
+void intel_gt_set_wedged_flag(struct intel_gt *gt)
+{
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       if (test_bit(I915_WEDGED, &gt->reset.flags))
+               return;
+
+       for_each_engine(engine, gt, id)
+               engine->submit_request = nop_submit_request;
+
+       set_bit(I915_WEDGED, &gt->reset.flags);
+}
+
 static void __intel_gt_set_wedged(struct intel_gt *gt)
 {
        struct intel_engine_cs *engine;
@@ -984,8 +998,8 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
        if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
                __intel_gt_reset(gt, ALL_ENGINES);
 
-       for_each_engine(engine, gt, id)
-               engine->submit_request = nop_submit_request;
+
+       intel_gt_set_wedged_flag(gt);
 
        /*
         * Make sure no request can slip through without getting completed by
@@ -993,7 +1007,6 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
         * in nop_submit_request.
         */
        synchronize_rcu_expedited();
-       set_bit(I915_WEDGED, &gt->reset.flags);
 
        /* Mark all executing requests as skipped */
        local_bh_disable();
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h 
b/drivers/gpu/drm/i915/gt/intel_reset.h
index 25c975b6e8fc..3796b8d877b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -42,6 +42,7 @@ int __must_check intel_gt_reset_trylock(struct intel_gt *gt, 
int *srcu);
 int __must_check intel_gt_reset_lock_interruptible(struct intel_gt *gt, int 
*srcu);
 void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
 
+void intel_gt_set_wedged_flag(struct intel_gt *gt);
 void intel_gt_set_wedged(struct intel_gt *gt);
 bool intel_gt_unset_wedged(struct intel_gt *gt);
 int intel_gt_terminally_wedged(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 4ccb4be4c9cb..62c5a953991c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -700,8 +700,13 @@ static int __uc_resume(struct intel_uc *uc, bool 
enable_communication)
        /* Make sure we enable communication if and only if it's disabled */
        GEM_BUG_ON(enable_communication == intel_guc_ct_enabled(&guc->ct));
 
-       if (enable_communication)
-               guc_enable_communication(guc);
+       if (enable_communication) {
+               err = guc_enable_communication(guc);
+               if (err) {
+                       DRM_DEBUG_DRIVER("Failed to enable communication, %pe", 
ERR_PTR(err));
+                       return err;
+               }
+       }
 
        /* If we are only resuming GuC communication but not reloading
         * GuC, we need to ensure the ARAT timer interrupt is enabled
-- 
2.34.1

Reply via email to