Once upon a time before we had automated GPU state capture upon hangs,
we had intel_gpu_dump. Now we come almost full circle and reinstate that
view of the current GPU queues and registers by using the error capture
facility to snapshot the GPU state when debugfs/.../i915_gpu_info is
opened - which should provided useful debugging to both the error
capture routines (without having to cause a hang and avoid the error
state being eaten by igt) and generally.

v2: Rename drm_i915_error_state to i915_gpu_state to alleviate some name
collisions between the error state dump and inspecting the gpu state.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuopp...@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuopp...@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c   | 113 ++++++++++++-----------
 drivers/gpu/drm/i915/i915_drv.c       |   2 +-
 drivers/gpu/drm/i915/i915_drv.h       |  46 ++++++----
 drivers/gpu/drm/i915/i915_gpu_error.c | 165 ++++++++++++++++++----------------
 drivers/gpu/drm/i915/i915_sysfs.c     |  26 +++---
 drivers/gpu/drm/i915/intel_display.c  |   2 +-
 6 files changed, 190 insertions(+), 164 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index cd023fda1a3b..0de0596d41ac 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -988,89 +988,93 @@ static int i915_gem_fence_regs_info(struct seq_file *m, 
void *data)
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
-
-static ssize_t
-i915_error_state_write(struct file *filp,
-                      const char __user *ubuf,
-                      size_t cnt,
-                      loff_t *ppos)
+static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
+                             size_t count, loff_t *pos)
 {
-       struct i915_error_state_file_priv *error_priv = filp->private_data;
+       struct i915_gpu_state *error = file->private_data;
+       struct drm_i915_error_state_buf str;
+       ssize_t ret;
+       loff_t tmp;
 
-       DRM_DEBUG_DRIVER("Resetting error state\n");
-       i915_destroy_error_state(error_priv->i915);
-
-       return cnt;
-}
-
-static int i915_error_state_open(struct inode *inode, struct file *file)
-{
-       struct drm_i915_private *dev_priv = inode->i_private;
-       struct i915_error_state_file_priv *error_priv;
+       if (!error)
+               return 0;
 
-       error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL);
-       if (!error_priv)
-               return -ENOMEM;
+       ret = i915_error_state_buf_init(&str, error->i915, count, *pos);
+       if (ret)
+               return ret;
 
-       error_priv->i915 = dev_priv;
+       ret = i915_error_state_to_str(&str, error);
+       if (ret)
+               goto out;
 
-       i915_error_state_get(&dev_priv->drm, error_priv);
+       tmp = 0;
+       ret = simple_read_from_buffer(ubuf, count, &tmp, str.buf, str.bytes);
+       if (ret < 0)
+               goto out;
 
-       file->private_data = error_priv;
+       *pos = str.start + ret;
+out:
+       i915_error_state_buf_release(&str);
+       return ret;
+}
 
+static int gpu_state_release(struct inode *inode, struct file *file)
+{
+       i915_gpu_state_put(file->private_data);
        return 0;
 }
 
-static int i915_error_state_release(struct inode *inode, struct file *file)
+static int i915_gpu_info_open(struct inode *inode, struct file *file)
 {
-       struct i915_error_state_file_priv *error_priv = file->private_data;
+       struct i915_gpu_state *gpu;
 
-       i915_error_state_put(error_priv);
-       kfree(error_priv);
+       gpu = i915_capture_gpu_state(inode->i_private);
+       if (!gpu)
+               return -ENOMEM;
 
+       file->private_data = gpu;
        return 0;
 }
 
-static ssize_t i915_error_state_read(struct file *file, char __user *userbuf,
-                                    size_t count, loff_t *pos)
+static const struct file_operations i915_gpu_info_fops = {
+       .owner = THIS_MODULE,
+       .open = i915_gpu_info_open,
+       .read = gpu_state_read,
+       .llseek = default_llseek,
+       .release = gpu_state_release,
+};
+
+static ssize_t
+i915_error_state_write(struct file *filp,
+                      const char __user *ubuf,
+                      size_t cnt,
+                      loff_t *ppos)
 {
-       struct i915_error_state_file_priv *error_priv = file->private_data;
-       struct drm_i915_error_state_buf error_str;
-       loff_t tmp_pos = 0;
-       ssize_t ret_count = 0;
-       int ret;
+       struct i915_gpu_state *error = filp->private_data;
 
-       ret = i915_error_state_buf_init(&error_str, error_priv->i915,
-                                       count, *pos);
-       if (ret)
-               return ret;
+       if (!error)
+               return 0;
 
-       ret = i915_error_state_to_str(&error_str, error_priv);
-       if (ret)
-               goto out;
+       DRM_DEBUG_DRIVER("Resetting error state\n");
+       i915_reset_error_state(error->i915);
 
-       ret_count = simple_read_from_buffer(userbuf, count, &tmp_pos,
-                                           error_str.buf,
-                                           error_str.bytes);
+       return cnt;
+}
 
-       if (ret_count < 0)
-               ret = ret_count;
-       else
-               *pos = error_str.start + ret_count;
-out:
-       i915_error_state_buf_release(&error_str);
-       return ret ?: ret_count;
+static int i915_error_state_open(struct inode *inode, struct file *file)
+{
+       file->private_data = i915_first_error_state(inode->i_private);
+       return 0;
 }
 
 static const struct file_operations i915_error_state_fops = {
        .owner = THIS_MODULE,
        .open = i915_error_state_open,
-       .read = i915_error_state_read,
+       .read = gpu_state_read,
        .write = i915_error_state_write,
        .llseek = default_llseek,
-       .release = i915_error_state_release,
+       .release = gpu_state_release,
 };
-
 #endif
 
 static int
@@ -4811,6 +4815,7 @@ static const struct i915_debugfs_files {
        {"i915_gem_drop_caches", &i915_drop_caches_fops},
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
        {"i915_error_state", &i915_error_state_fops},
+       {"i915_gpu_info", &i915_gpu_info_fops},
 #endif
        {"i915_next_seqno", &i915_next_seqno_fops},
        {"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index b1457a896289..309c29c84c54 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1370,7 +1370,7 @@ void i915_driver_unload(struct drm_device *dev)
 
        /* Free error state after interrupts are fully disabled. */
        cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
-       i915_destroy_error_state(dev_priv);
+       i915_reset_error_state(dev_priv);
 
        /* Flush any outstanding unpin_work. */
        drain_workqueue(dev_priv->wq);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 41d8e66ad8c8..918118a268b8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -897,7 +897,7 @@ struct intel_device_info {
 
 struct intel_display_error_state;
 
-struct drm_i915_error_state {
+struct i915_gpu_state {
        struct kref ref;
        struct timeval time;
        struct timeval boottime;
@@ -917,7 +917,7 @@ struct drm_i915_error_state {
        u32 eir;
        u32 pgtbl_er;
        u32 ier;
-       u32 gtier[4];
+       u32 gtier[4], ngtier;
        u32 ccid;
        u32 derrmr;
        u32 forcewake;
@@ -931,6 +931,7 @@ struct drm_i915_error_state {
        u32 gab_ctl;
        u32 gfx_mode;
 
+       u32 nfence;
        u64 fence[I915_MAX_NUM_FENCES];
        struct intel_overlay_error_state *overlay;
        struct intel_display_error_state *display;
@@ -1512,11 +1513,6 @@ struct drm_i915_error_state_buf {
        loff_t pos;
 };
 
-struct i915_error_state_file_priv {
-       struct drm_i915_private *i915;
-       struct drm_i915_error_state *error;
-};
-
 #define I915_RESET_TIMEOUT (10 * HZ) /* 10s */
 #define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */
 
@@ -1533,7 +1529,7 @@ struct i915_gpu_error {
        /* For reset and error_state handling. */
        spinlock_t lock;
        /* Protected by the above dev->gpu_error.lock. */
-       struct drm_i915_error_state *first_error;
+       struct i915_gpu_state *first_error;
 
        unsigned long missed_irq_rings;
 
@@ -3580,7 +3576,7 @@ static inline void intel_display_crc_init(struct 
drm_i915_private *dev_priv) {}
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
-                           const struct i915_error_state_file_priv *error);
+                           const struct i915_gpu_state *gpu);
 int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
                              struct drm_i915_private *i915,
                              size_t count, loff_t pos);
@@ -3589,13 +3585,28 @@ static inline void i915_error_state_buf_release(
 {
        kfree(eb->buf);
 }
+
+struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
 void i915_capture_error_state(struct drm_i915_private *dev_priv,
                              u32 engine_mask,
                              const char *error_msg);
-void i915_error_state_get(struct drm_device *dev,
-                         struct i915_error_state_file_priv *error_priv);
-void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
-void i915_destroy_error_state(struct drm_i915_private *dev_priv);
+
+static inline struct i915_gpu_state *
+i915_gpu_state_get(struct i915_gpu_state *gpu)
+{
+       kref_get(&gpu->ref);
+       return gpu;
+}
+
+void __i915_gpu_state_free(struct kref *kref);
+static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
+{
+       if (gpu)
+               kref_put(&gpu->ref, __i915_gpu_state_free);
+}
+
+struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915);
+void i915_reset_error_state(struct drm_i915_private *i915);
 
 #else
 
@@ -3605,7 +3616,13 @@ static inline void i915_capture_error_state(struct 
drm_i915_private *dev_priv,
 {
 }
 
-static inline void i915_destroy_error_state(struct drm_i915_private *dev_priv)
+static inline struct i915_gpu_state *
+i915_first_error_state(struct drm_i915_private *i915)
+{
+       return NULL;
+}
+
+static inline void i915_reset_error_state(struct drm_i915_private *i915)
 {
 }
 
@@ -3761,7 +3778,6 @@ extern void intel_overlay_print_error_state(struct 
drm_i915_error_state_buf *e,
 extern struct intel_display_error_state *
 intel_display_capture_error_state(struct drm_i915_private *dev_priv);
 extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
-                                           struct drm_i915_private *dev_priv,
                                            struct intel_display_error_state 
*error);
 
 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 
*val);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index c28ccfef84ab..3a3c7c3c4931 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -342,7 +342,7 @@ static void print_error_buffers(struct 
drm_i915_error_state_buf *m,
 }
 
 static void error_print_instdone(struct drm_i915_error_state_buf *m,
-                                struct drm_i915_error_engine *ee)
+                                const struct drm_i915_error_engine *ee)
 {
        int slice;
        int subslice;
@@ -372,7 +372,7 @@ static void error_print_instdone(struct 
drm_i915_error_state_buf *m,
 
 static void error_print_request(struct drm_i915_error_state_buf *m,
                                const char *prefix,
-                               struct drm_i915_error_request *erq)
+                               const struct drm_i915_error_request *erq)
 {
        if (!erq->seqno)
                return;
@@ -386,7 +386,7 @@ static void error_print_request(struct 
drm_i915_error_state_buf *m,
 
 static void error_print_context(struct drm_i915_error_state_buf *m,
                                const char *header,
-                               struct drm_i915_error_context *ctx)
+                               const struct drm_i915_error_context *ctx)
 {
        err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d 
active %d\n",
                   header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
@@ -394,7 +394,7 @@ static void error_print_context(struct 
drm_i915_error_state_buf *m,
 }
 
 static void error_print_engine(struct drm_i915_error_state_buf *m,
-                              struct drm_i915_error_engine *ee)
+                              const struct drm_i915_error_engine *ee)
 {
        err_printf(m, "%s command stream:\n", engine_str(ee->engine_id));
        err_printf(m, "  START: 0x%08x\n", ee->start);
@@ -569,21 +569,32 @@ static void err_print_params(struct 
drm_i915_error_state_buf *m,
 #undef PRINT
 }
 
+static void err_print_pciid(struct drm_i915_error_state_buf *m,
+                           struct drm_i915_private *i915)
+{
+       struct pci_dev *pdev = i915->drm.pdev;
+
+       err_printf(m, "PCI ID: 0x%04x\n", pdev->device);
+       err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision);
+       err_printf(m, "PCI Subsystem: %04x:%04x\n",
+                  pdev->subsystem_vendor,
+                  pdev->subsystem_device);
+}
+
 int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
-                           const struct i915_error_state_file_priv *error_priv)
+                           const struct i915_gpu_state *error)
 {
-       struct drm_i915_private *dev_priv = error_priv->i915;
-       struct pci_dev *pdev = dev_priv->drm.pdev;
-       struct drm_i915_error_state *error = error_priv->error;
+       struct drm_i915_private *dev_priv = m->i915;
        struct drm_i915_error_object *obj;
        int i, j;
 
        if (!error) {
-               err_printf(m, "no error state collected\n");
-               goto out;
+               err_printf(m, "No error state collected\n");
+               return 0;
        }
 
-       err_printf(m, "%s\n", error->error_msg);
+       if (*error->error_msg)
+               err_printf(m, "%s\n", error->error_msg);
        err_printf(m, "Kernel: " UTS_RELEASE "\n");
        err_printf(m, "Time: %ld s %ld us\n",
                   error->time.tv_sec, error->time.tv_usec);
@@ -605,11 +616,7 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
        err_printf(m, "Reset count: %u\n", error->reset_count);
        err_printf(m, "Suspend count: %u\n", error->suspend_count);
        err_printf(m, "Platform: %s\n", 
intel_platform_name(error->device_info.platform));
-       err_printf(m, "PCI ID: 0x%04x\n", pdev->device);
-       err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision);
-       err_printf(m, "PCI Subsystem: %04x:%04x\n",
-                  pdev->subsystem_vendor,
-                  pdev->subsystem_device);
+       err_print_pciid(m, error->i915);
 
        err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
 
@@ -625,19 +632,15 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
 
        err_printf(m, "EIR: 0x%08x\n", error->eir);
        err_printf(m, "IER: 0x%08x\n", error->ier);
-       if (INTEL_GEN(dev_priv) >= 8) {
-               for (i = 0; i < 4; i++)
-                       err_printf(m, "GTIER gt %d: 0x%08x\n", i,
-                                  error->gtier[i]);
-       } else if (HAS_PCH_SPLIT(dev_priv) || IS_VALLEYVIEW(dev_priv))
-               err_printf(m, "GTIER: 0x%08x\n", error->gtier[0]);
+       for (i = 0; i < error->ngtier; i++)
+               err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]);
        err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
        err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
        err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
        err_printf(m, "CCID: 0x%08x\n", error->ccid);
        err_printf(m, "Missed interrupts: 0x%08lx\n", 
dev_priv->gpu_error.missed_irq_rings);
 
-       for (i = 0; i < dev_priv->num_fence_regs; i++)
+       for (i = 0; i < error->nfence; i++)
                err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
 
        if (INTEL_GEN(dev_priv) >= 6) {
@@ -686,7 +689,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf 
*m,
                            error->pinned_bo_count);
 
        for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
-               struct drm_i915_error_engine *ee = &error->engine[i];
+               const struct drm_i915_error_engine *ee = &error->engine[i];
 
                obj = ee->batchbuffer;
                if (obj) {
@@ -751,12 +754,11 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
                intel_overlay_print_error_state(m, error->overlay);
 
        if (error->display)
-               intel_display_print_error_state(m, dev_priv, error->display);
+               intel_display_print_error_state(m, error->display);
 
        err_print_capabilities(m, &error->device_info);
        err_print_params(m, &error->params);
 
-out:
        if (m->bytes == 0 && m->err)
                return m->err;
 
@@ -808,10 +810,10 @@ static void i915_error_object_free(struct 
drm_i915_error_object *obj)
        kfree(obj);
 }
 
-static void i915_error_state_free(struct kref *error_ref)
+void __i915_gpu_state_free(struct kref *error_ref)
 {
-       struct drm_i915_error_state *error = container_of(error_ref,
-                                                         typeof(*error), ref);
+       struct i915_gpu_state *error =
+               container_of(error_ref, typeof(*error), ref);
        int i;
 
        for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
@@ -976,7 +978,7 @@ static u32 capture_error_bo(struct drm_i915_error_buffer 
*err,
  * It's only a small step better than a random number in its current form.
  */
 static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv,
-                                        struct drm_i915_error_state *error,
+                                        struct i915_gpu_state *error,
                                         int *engine_id)
 {
        uint32_t error_code = 0;
@@ -1001,20 +1003,21 @@ static uint32_t i915_error_generate_code(struct 
drm_i915_private *dev_priv,
 }
 
 static void i915_gem_record_fences(struct drm_i915_private *dev_priv,
-                                  struct drm_i915_error_state *error)
+                                  struct i915_gpu_state *error)
 {
        int i;
 
-       if (IS_GEN3(dev_priv) || IS_GEN2(dev_priv)) {
+       if (INTEL_GEN(dev_priv) >= 6) {
                for (i = 0; i < dev_priv->num_fence_regs; i++)
-                       error->fence[i] = I915_READ(FENCE_REG(i));
-       } else if (IS_GEN5(dev_priv) || IS_GEN4(dev_priv)) {
+                       error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i));
+       } else if (INTEL_GEN(dev_priv) >= 4) {
                for (i = 0; i < dev_priv->num_fence_regs; i++)
                        error->fence[i] = I915_READ64(FENCE_REG_965_LO(i));
-       } else if (INTEL_GEN(dev_priv) >= 6) {
+       } else {
                for (i = 0; i < dev_priv->num_fence_regs; i++)
-                       error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i));
+                       error->fence[i] = I915_READ(FENCE_REG(i));
        }
+       error->nfence = i;
 }
 
 static inline u32
@@ -1038,7 +1041,7 @@ gen8_engine_sync_index(struct intel_engine_cs *engine,
        return idx;
 }
 
-static void gen8_record_semaphore_state(struct drm_i915_error_state *error,
+static void gen8_record_semaphore_state(struct i915_gpu_state *error,
                                        struct intel_engine_cs *engine,
                                        struct drm_i915_error_engine *ee)
 {
@@ -1131,7 +1134,7 @@ static void error_record_engine_waiters(struct 
intel_engine_cs *engine,
        spin_unlock_irq(&b->lock);
 }
 
-static void error_record_engine_registers(struct drm_i915_error_state *error,
+static void error_record_engine_registers(struct i915_gpu_state *error,
                                          struct intel_engine_cs *engine,
                                          struct drm_i915_error_engine *ee)
 {
@@ -1328,7 +1331,7 @@ static void record_context(struct drm_i915_error_context 
*e,
 }
 
 static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
-                                 struct drm_i915_error_state *error)
+                                 struct i915_gpu_state *error)
 {
        struct i915_ggtt *ggtt = &dev_priv->ggtt;
        int i;
@@ -1404,7 +1407,7 @@ static void i915_gem_record_rings(struct drm_i915_private 
*dev_priv,
 }
 
 static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
-                               struct drm_i915_error_state *error,
+                               struct i915_gpu_state *error,
                                struct i915_address_space *vm,
                                int idx)
 {
@@ -1430,7 +1433,7 @@ static void i915_gem_capture_vm(struct drm_i915_private 
*dev_priv,
 }
 
 static void i915_capture_active_buffers(struct drm_i915_private *dev_priv,
-                                       struct drm_i915_error_state *error)
+                                       struct i915_gpu_state *error)
 {
        int cnt = 0, i, j;
 
@@ -1455,7 +1458,7 @@ static void i915_capture_active_buffers(struct 
drm_i915_private *dev_priv,
 }
 
 static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
-                                       struct drm_i915_error_state *error)
+                                       struct i915_gpu_state *error)
 {
        struct i915_address_space *vm = &dev_priv->ggtt.base;
        struct drm_i915_error_buffer *bo;
@@ -1486,7 +1489,7 @@ static void i915_capture_pinned_buffers(struct 
drm_i915_private *dev_priv,
 }
 
 static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv,
-                                           struct drm_i915_error_state *error)
+                                           struct i915_gpu_state *error)
 {
        /* Capturing log buf contents won't be useful if logging was disabled */
        if (!dev_priv->guc.log.vma || (i915.guc_log_level < 0))
@@ -1498,7 +1501,7 @@ static void i915_gem_capture_guc_log_buffer(struct 
drm_i915_private *dev_priv,
 
 /* Capture all registers which don't fit into another category. */
 static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
-                                  struct drm_i915_error_state *error)
+                                  struct i915_gpu_state *error)
 {
        int i;
 
@@ -1555,9 +1558,11 @@ static void i915_capture_reg_state(struct 
drm_i915_private *dev_priv,
                error->ier = I915_READ(GEN8_DE_MISC_IER);
                for (i = 0; i < 4; i++)
                        error->gtier[i] = I915_READ(GEN8_GT_IER(i));
+               error->ngtier = 4;
        } else if (HAS_PCH_SPLIT(dev_priv)) {
                error->ier = I915_READ(DEIER);
                error->gtier[0] = I915_READ(GTIER);
+               error->ngtier = 1;
        } else if (IS_GEN2(dev_priv)) {
                error->ier = I915_READ16(IER);
        } else if (!IS_VALLEYVIEW(dev_priv)) {
@@ -1568,7 +1573,7 @@ static void i915_capture_reg_state(struct 
drm_i915_private *dev_priv,
 }
 
 static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
-                                  struct drm_i915_error_state *error,
+                                  struct i915_gpu_state *error,
                                   u32 engine_mask,
                                   const char *error_msg)
 {
@@ -1595,7 +1600,7 @@ static void i915_error_capture_msg(struct 
drm_i915_private *dev_priv,
 }
 
 static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
-                                  struct drm_i915_error_state *error)
+                                  struct i915_gpu_state *error)
 {
        error->iommu = -1;
 #ifdef CONFIG_INTEL_IOMMU
@@ -1611,7 +1616,7 @@ static void i915_capture_gen_state(struct 
drm_i915_private *dev_priv,
 
 static int capture(void *data)
 {
-       struct drm_i915_error_state *error = data;
+       struct i915_gpu_state *error = data;
 
        do_gettimeofday(&error->time);
        error->boottime = ktime_to_timeval(ktime_get_boottime());
@@ -1637,6 +1642,23 @@ static int capture(void *data)
 
 #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
 
+struct i915_gpu_state *
+i915_capture_gpu_state(struct drm_i915_private *i915)
+{
+       struct i915_gpu_state *error;
+
+       error = kzalloc(sizeof(*error), GFP_ATOMIC);
+       if (!error)
+               return NULL;
+
+       kref_init(&error->ref);
+       error->i915 = i915;
+
+       stop_machine(capture, error, NULL);
+
+       return error;
+}
+
 /**
  * i915_capture_error_state - capture an error record for later analysis
  * @dev: drm device
@@ -1651,7 +1673,7 @@ void i915_capture_error_state(struct drm_i915_private 
*dev_priv,
                              const char *error_msg)
 {
        static bool warned;
-       struct drm_i915_error_state *error;
+       struct i915_gpu_state *error;
        unsigned long flags;
 
        if (!i915.error_capture)
@@ -1660,18 +1682,12 @@ void i915_capture_error_state(struct drm_i915_private 
*dev_priv,
        if (READ_ONCE(dev_priv->gpu_error.first_error))
                return;
 
-       /* Account for pipe specific data like PIPE*STAT */
-       error = kzalloc(sizeof(*error), GFP_ATOMIC);
+       error = i915_capture_gpu_state(dev_priv);
        if (!error) {
                DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
                return;
        }
 
-       kref_init(&error->ref);
-       error->i915 = dev_priv;
-
-       stop_machine(capture, error, NULL);
-
        i915_error_capture_msg(dev_priv, error, engine_mask, error_msg);
        DRM_INFO("%s\n", error->error_msg);
 
@@ -1685,7 +1701,7 @@ void i915_capture_error_state(struct drm_i915_private 
*dev_priv,
        }
 
        if (error) {
-               i915_error_state_free(&error->ref);
+               __i915_gpu_state_free(&error->ref);
                return;
        }
 
@@ -1701,33 +1717,28 @@ void i915_capture_error_state(struct drm_i915_private 
*dev_priv,
        }
 }
 
-void i915_error_state_get(struct drm_device *dev,
-                         struct i915_error_state_file_priv *error_priv)
+struct i915_gpu_state *
+i915_first_error_state(struct drm_i915_private *i915)
 {
-       struct drm_i915_private *dev_priv = to_i915(dev);
+       struct i915_gpu_state *error;
 
-       spin_lock_irq(&dev_priv->gpu_error.lock);
-       error_priv->error = dev_priv->gpu_error.first_error;
-       if (error_priv->error)
-               kref_get(&error_priv->error->ref);
-       spin_unlock_irq(&dev_priv->gpu_error.lock);
-}
+       spin_lock_irq(&i915->gpu_error.lock);
+       error = i915->gpu_error.first_error;
+       if (error)
+               i915_gpu_state_get(error);
+       spin_unlock_irq(&i915->gpu_error.lock);
 
-void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
-{
-       if (error_priv->error)
-               kref_put(&error_priv->error->ref, i915_error_state_free);
+       return error;
 }
 
-void i915_destroy_error_state(struct drm_i915_private *dev_priv)
+void i915_reset_error_state(struct drm_i915_private *i915)
 {
-       struct drm_i915_error_state *error;
+       struct i915_gpu_state *error;
 
-       spin_lock_irq(&dev_priv->gpu_error.lock);
-       error = dev_priv->gpu_error.first_error;
-       dev_priv->gpu_error.first_error = NULL;
-       spin_unlock_irq(&dev_priv->gpu_error.lock);
+       spin_lock_irq(&i915->gpu_error.lock);
+       error = i915->gpu_error.first_error;
+       i915->gpu_error.first_error = NULL;
+       spin_unlock_irq(&i915->gpu_error.lock);
 
-       if (error)
-               kref_put(&error->ref, i915_error_state_free);
+       i915_gpu_state_put(error);
 }
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c 
b/drivers/gpu/drm/i915/i915_sysfs.c
index a721ff116101..af0ac9f261fd 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -522,33 +522,27 @@ static ssize_t error_state_read(struct file *filp, struct 
kobject *kobj,
 
        struct device *kdev = kobj_to_dev(kobj);
        struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-       struct drm_device *dev = &dev_priv->drm;
-       struct i915_error_state_file_priv error_priv;
        struct drm_i915_error_state_buf error_str;
-       ssize_t ret_count = 0;
-       int ret;
-
-       memset(&error_priv, 0, sizeof(error_priv));
+       struct i915_gpu_state *gpu;
+       ssize_t ret;
 
-       ret = i915_error_state_buf_init(&error_str, to_i915(dev), count, off);
+       ret = i915_error_state_buf_init(&error_str, dev_priv, count, off);
        if (ret)
                return ret;
 
-       error_priv.i915 = dev_priv;
-       i915_error_state_get(dev, &error_priv);
-
-       ret = i915_error_state_to_str(&error_str, &error_priv);
+       gpu = i915_first_error_state(dev_priv);
+       ret = i915_error_state_to_str(&error_str, gpu);
        if (ret)
                goto out;
 
-       ret_count = count < error_str.bytes ? count : error_str.bytes;
+       ret = count < error_str.bytes ? count : error_str.bytes;
+       memcpy(buf, error_str.buf, ret);
 
-       memcpy(buf, error_str.buf, ret_count);
 out:
-       i915_error_state_put(&error_priv);
+       i915_gpu_state_put(gpu);
        i915_error_state_buf_release(&error_str);
 
-       return ret ?: ret_count;
+       return ret;
 }
 
 static ssize_t error_state_write(struct file *file, struct kobject *kobj,
@@ -559,7 +553,7 @@ static ssize_t error_state_write(struct file *file, struct 
kobject *kobj,
        struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 
        DRM_DEBUG_DRIVER("Resetting error state\n");
-       i915_destroy_error_state(dev_priv);
+       i915_reset_error_state(dev_priv);
 
        return count;
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 02e6bb2d614c..660581fb329a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -15838,9 +15838,9 @@ intel_display_capture_error_state(struct 
drm_i915_private *dev_priv)
 
 void
 intel_display_print_error_state(struct drm_i915_error_state_buf *m,
-                               struct drm_i915_private *dev_priv,
                                struct intel_display_error_state *error)
 {
+       struct drm_i915_private *dev_priv = m->i915;
        int i;
 
        if (!error)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to