Re: [Intel-gfx] [PATCH v3] drm/i915/guc: capture GuC logs if FW fails to load

2017-05-10 Thread Daniele Ceraolo Spurio



On 05/05/17 16:23, Daniele Ceraolo Spurio wrote:

We're currently deleting the GuC logs if the FW fails to load, but those
are still useful to understand why the loading failed. Keeping the
object around allows us to access them after driver load is completed.

v2: keep the object around instead of using kernel memory (chris)
don't store the logs in the gpu_error struct (Chris)
add a check on guc_log_level to avoid snapshotting empty logs

v3: use separate debugfs for error log (Chris)

Cc: Chris Wilson 
Cc: Oscar Mateo 
Cc: Michal Wajdeczko 
Signed-off-by: Daniele Ceraolo Spurio 
---


Chris r-b'ed this patch on IRC. I'm going to wait for Oscar's guc stage 
pool dump patch to get merged to avoid conflicts in i915_debugfs.c then 
rebase and re-send.


Daniele


 drivers/gpu/drm/i915/i915_debugfs.c  | 35 ++-
 drivers/gpu/drm/i915/i915_drv.c  |  3 +++
 drivers/gpu/drm/i915/intel_guc_log.c | 17 +
 drivers/gpu/drm/i915/intel_uc.c  |  7 +--
 drivers/gpu/drm/i915/intel_uc.h  |  5 +
 5 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 870c470..4d39e08d3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2542,27 +2542,35 @@ static int i915_guc_info(struct seq_file *m, void *data)

 static int i915_guc_log_dump(struct seq_file *m, void *data)
 {
-   struct drm_i915_private *dev_priv = node_to_i915(m->private);
+   struct drm_info_node *node = m->private;
+   struct drm_i915_private *dev_priv = node_to_i915(node);
+   bool dump_err_log = !!node->info_ent->data;
struct drm_i915_gem_object *obj;
-   int i = 0, pg;
+   u32 *log;
+   int i = 0;

-   if (!dev_priv->guc.log.vma)
+   if (!dump_err_log && dev_priv->guc.log.vma)
+   obj = dev_priv->guc.log.vma->obj;
+   else if (dump_err_log && dev_priv->guc.err_load_log)
+   obj = dev_priv->guc.err_load_log;
+   else
return 0;

-   obj = dev_priv->guc.log.vma->obj;
-   for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
-   u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
-
-   for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
-   seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
-  *(log + i), *(log + i + 1),
-  *(log + i + 2), *(log + i + 3));
-
-   kunmap_atomic(log);
+   log = i915_gem_object_pin_map(obj, I915_MAP_WC);
+   if (IS_ERR(log)) {
+   DRM_ERROR("Failed to pin guc_log object\n");
+   return PTR_ERR(log);
}

+   for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
+   seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+  *(log + i), *(log + i + 1),
+  *(log + i + 2), *(log + i + 3));
+
seq_putc(m, '\n');

+   i915_gem_object_unpin_map(obj);
+
return 0;
 }

@@ -4774,6 +4782,7 @@ static int i915_hpd_storm_ctl_open(struct inode *inode, 
struct file *file)
{"i915_guc_info", i915_guc_info, 0},
{"i915_guc_load_status", i915_guc_load_status_info, 0},
{"i915_guc_log_dump", i915_guc_log_dump, 0},
+   {"i915_guc_err_load_log_dump", i915_guc_log_dump, 0, (void *)1},
{"i915_huc_load_status", i915_huc_load_status_info, 0},
{"i915_frequency_info", i915_frequency_info, 0},
{"i915_hangcheck_info", i915_hangcheck_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 452c265..d8c82ac 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1354,6 +1354,9 @@ void i915_driver_unload(struct drm_device *dev)
cancel_delayed_work_sync(_priv->gpu_error.hangcheck_work);
i915_reset_error_state(dev_priv);

+   /* release GuC error log (if any) */
+   i915_guc_load_error_log_free(_priv->guc);
+
/* Flush any outstanding unpin_work. */
drain_workqueue(dev_priv->wq);

diff --git a/drivers/gpu/drm/i915/intel_guc_log.c 
b/drivers/gpu/drm/i915/intel_guc_log.c
index 16d3b87..691da42 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -660,3 +660,20 @@ void i915_guc_log_unregister(struct drm_i915_private 
*dev_priv)
guc_log_runtime_destroy(_priv->guc);
mutex_unlock(_priv->drm.struct_mutex);
 }
+
+void i915_guc_load_error_log_capture(struct intel_guc *guc)
+{
+   if (!guc->log.vma || i915.guc_log_level < 0)
+   return;
+
+   if (!guc->err_load_log)
+   guc->err_load_log = i915_gem_object_get(guc->log.vma->obj);
+
+   return;
+}
+
+void i915_guc_load_error_log_free(struct intel_guc 

[Intel-gfx] [PATCH v3] drm/i915/guc: capture GuC logs if FW fails to load

2017-05-05 Thread Daniele Ceraolo Spurio
We're currently deleting the GuC logs if the FW fails to load, but those
are still useful to understand why the loading failed. Keeping the
object around allows us to access them after driver load is completed.

v2: keep the object around instead of using kernel memory (chris)
don't store the logs in the gpu_error struct (Chris)
add a check on guc_log_level to avoid snapshotting empty logs

v3: use separate debugfs for error log (Chris)

Cc: Chris Wilson 
Cc: Oscar Mateo 
Cc: Michal Wajdeczko 
Signed-off-by: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/i915_debugfs.c  | 35 ++-
 drivers/gpu/drm/i915/i915_drv.c  |  3 +++
 drivers/gpu/drm/i915/intel_guc_log.c | 17 +
 drivers/gpu/drm/i915/intel_uc.c  |  7 +--
 drivers/gpu/drm/i915/intel_uc.h  |  5 +
 5 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 870c470..4d39e08d3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2542,27 +2542,35 @@ static int i915_guc_info(struct seq_file *m, void *data)
 
 static int i915_guc_log_dump(struct seq_file *m, void *data)
 {
-   struct drm_i915_private *dev_priv = node_to_i915(m->private);
+   struct drm_info_node *node = m->private;
+   struct drm_i915_private *dev_priv = node_to_i915(node);
+   bool dump_err_log = !!node->info_ent->data;
struct drm_i915_gem_object *obj;
-   int i = 0, pg;
+   u32 *log;
+   int i = 0;
 
-   if (!dev_priv->guc.log.vma)
+   if (!dump_err_log && dev_priv->guc.log.vma)
+   obj = dev_priv->guc.log.vma->obj;
+   else if (dump_err_log && dev_priv->guc.err_load_log)
+   obj = dev_priv->guc.err_load_log;
+   else
return 0;
 
-   obj = dev_priv->guc.log.vma->obj;
-   for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
-   u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
-
-   for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
-   seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
-  *(log + i), *(log + i + 1),
-  *(log + i + 2), *(log + i + 3));
-
-   kunmap_atomic(log);
+   log = i915_gem_object_pin_map(obj, I915_MAP_WC);
+   if (IS_ERR(log)) {
+   DRM_ERROR("Failed to pin guc_log object\n");
+   return PTR_ERR(log);
}
 
+   for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
+   seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+  *(log + i), *(log + i + 1),
+  *(log + i + 2), *(log + i + 3));
+
seq_putc(m, '\n');
 
+   i915_gem_object_unpin_map(obj);
+
return 0;
 }
 
@@ -4774,6 +4782,7 @@ static int i915_hpd_storm_ctl_open(struct inode *inode, 
struct file *file)
{"i915_guc_info", i915_guc_info, 0},
{"i915_guc_load_status", i915_guc_load_status_info, 0},
{"i915_guc_log_dump", i915_guc_log_dump, 0},
+   {"i915_guc_err_load_log_dump", i915_guc_log_dump, 0, (void *)1},
{"i915_huc_load_status", i915_huc_load_status_info, 0},
{"i915_frequency_info", i915_frequency_info, 0},
{"i915_hangcheck_info", i915_hangcheck_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 452c265..d8c82ac 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1354,6 +1354,9 @@ void i915_driver_unload(struct drm_device *dev)
cancel_delayed_work_sync(_priv->gpu_error.hangcheck_work);
i915_reset_error_state(dev_priv);
 
+   /* release GuC error log (if any) */
+   i915_guc_load_error_log_free(_priv->guc);
+
/* Flush any outstanding unpin_work. */
drain_workqueue(dev_priv->wq);
 
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c 
b/drivers/gpu/drm/i915/intel_guc_log.c
index 16d3b87..691da42 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -660,3 +660,20 @@ void i915_guc_log_unregister(struct drm_i915_private 
*dev_priv)
guc_log_runtime_destroy(_priv->guc);
mutex_unlock(_priv->drm.struct_mutex);
 }
+
+void i915_guc_load_error_log_capture(struct intel_guc *guc)
+{
+   if (!guc->log.vma || i915.guc_log_level < 0)
+   return;
+
+   if (!guc->err_load_log)
+   guc->err_load_log = i915_gem_object_get(guc->log.vma->obj);
+
+   return;
+}
+
+void i915_guc_load_error_log_free(struct intel_guc *guc)
+{
+   if (guc->err_load_log)
+   i915_gem_object_put(guc->err_load_log);
+}
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 7fd75ca..d66ffab 100644
---