From: "arun.siluv...@linux.intel.com" <arun.siluv...@linux.intel.com>

Driver maintains count of how many times a given engine is reset, useful to
capture this in error state also. It gives an idea of how engine is coping
up with the workloads it is executing before this error state.

v2
  - rebase

Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
Signed-off-by: Arun Siluvery <arun.siluv...@linux.intel.com>
Signed-off-by: Matthew Auld <matthew.a...@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c       | 1 +
 drivers/gpu/drm/i915/i915_drv.h       | 9 +++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++
 3 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 8625207..0017ab5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1857,6 +1857,7 @@ int i915_reset_engine(struct intel_engine_cs *engine)
        intel_engine_reset_cancel(engine);
        intel_execlists_restart_submission(engine);
 
+       dev_priv->gpu_error.engine_reset_count[engine->id]++;
        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
        return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3dcf3f6..722aea3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -774,6 +774,7 @@ struct drm_i915_error_state {
                enum intel_engine_hangcheck_action hangcheck_action;
                struct i915_address_space *vm;
                int num_requests;
+               u32 reset_count;
 
                /* our own tracking of ring head and tail */
                u32 cpu_ring_head;
@@ -1431,6 +1432,8 @@ struct i915_gpu_error {
 #define I915_RESET_IN_PROGRESS 0
 #define I915_WEDGED            (BITS_PER_LONG - 1)
 
+       unsigned long engine_reset_count[I915_NUM_ENGINES];
+
        /**
         * Waitqueue to signal when a hang is detected. Used to for waiters
         * to release the struct_mutex for the reset to procede.
@@ -3268,6 +3271,12 @@ static inline u32 i915_reset_count(struct i915_gpu_error 
*error)
        return READ_ONCE(error->reset_count);
 }
 
+static inline u32 i915_engine_reset_count(struct i915_gpu_error *error,
+                                         struct intel_engine_cs *engine)
+{
+       return READ_ONCE(error->engine_reset_count[engine->id]);
+}
+
 void i915_gem_reset(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 void i915_gem_reset_engine(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 334f15d..905b649 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -296,6 +296,7 @@ static void error_print_engine(struct 
drm_i915_error_state_buf *m,
        err_printf(m, "  hangcheck: %s [%d]\n",
                   hangcheck_action_to_str(ee->hangcheck_action),
                   ee->hangcheck_score);
+       err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 }
 
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
@@ -1056,6 +1057,8 @@ static void error_record_engine_registers(struct 
drm_i915_error_state *error,
 
        ee->hangcheck_score = engine->hangcheck.score;
        ee->hangcheck_action = engine->hangcheck.action;
+       ee->reset_count = i915_engine_reset_count(&dev_priv->gpu_error,
+                                                 engine);
 
        if (USES_PPGTT(dev_priv)) {
                int i;
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to