[PATCH 09/13] drm/msm/gpu: Capture the GPU state on a GPU hang

2018-07-24 Thread Jordan Crouse
Capture the GPU state on a GPU hang and store it for later playback
via the devcoredump facility. Only one crash state is stored at a
time on the assumption that the first hang is usually the most
interesting. The existing crash state can be cleared after capturing
it and then a new one will be captured on the next hang.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/Kconfig |  1 +
 drivers/gpu/drm/msm/adreno/a3xx_gpu.c   |  2 +-
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c   |  2 +-
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c   |  4 +-
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 36 +++
 drivers/gpu/drm/msm/adreno/adreno_gpu.h |  6 +-
 drivers/gpu/drm/msm/msm_debugfs.c   |  5 +-
 drivers/gpu/drm/msm/msm_gpu.c   | 83 -
 drivers/gpu/drm/msm/msm_gpu.h   | 38 ++-
 9 files changed, 154 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 38cbde971b48..843a9d40c05e 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -12,6 +12,7 @@ config DRM_MSM
select SHMEM
select TMPFS
select QCOM_SCM
+   select WANT_DEV_COREDUMP
select SND_SOC_HDMI_CODEC if SND_SOC
select SYNC_FILE
select PM_OPP
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 4cffec2b6adc..fc502e412132 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -454,7 +454,7 @@ static const struct adreno_gpu_funcs funcs = {
.active_ring = adreno_active_ring,
.irq = a3xx_irq,
.destroy = a3xx_destroy,
-#ifdef CONFIG_DEBUG_FS
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
.show = adreno_show,
 #endif
.gpu_state_get = a3xx_gpu_state_get,
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index 95f08c22e8d7..8129cf037db1 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -540,7 +540,7 @@ static const struct adreno_gpu_funcs funcs = {
.active_ring = adreno_active_ring,
.irq = a4xx_irq,
.destroy = a4xx_destroy,
-#ifdef CONFIG_DEBUG_FS
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
.show = adreno_show,
 #endif
.gpu_state_get = a4xx_gpu_state_get,
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 5f1aab3c1cb1..16074fa6bf1e 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1243,8 +1243,10 @@ static const struct adreno_gpu_funcs funcs = {
.active_ring = a5xx_active_ring,
.irq = a5xx_irq,
.destroy = a5xx_destroy,
-#ifdef CONFIG_DEBUG_FS
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
.show = adreno_show,
+#endif
+#if defined(CONFIG_DEBUG_FS)
.debugfs_init = a5xx_debugfs_init,
 #endif
.gpu_busy = a5xx_gpu_busy,
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 42828253ede5..c72e3afc43a8 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -378,6 +378,8 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu 
*gpu)
if (!state)
return ERR_PTR(-ENOMEM);
 
+   kref_init(>ref);
+
do_gettimeofday(>time);
 
for (i = 0; i < gpu->nr_rings; i++) {
@@ -414,18 +416,28 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu 
*gpu)
return state;
 }
 
-void adreno_gpu_state_put(struct msm_gpu_state *state)
+static void adreno_gpu_state_destroy(struct kref *kref)
 {
-   if (IS_ERR_OR_NULL(state))
-   return;
+   struct msm_gpu_state *state = container_of(kref,
+   struct msm_gpu_state, ref);
 
+   kfree(state->comm);
+   kfree(state->cmd);
kfree(state->registers);
kfree(state);
 }
 
-#ifdef CONFIG_DEBUG_FS
+int adreno_gpu_state_put(struct msm_gpu_state *state)
+{
+   if (IS_ERR_OR_NULL(state))
+   return 1;
+
+   return kref_put(>ref, adreno_gpu_state_destroy);
+}
+
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
-   struct seq_file *m)
+   struct drm_printer *p)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
int i;
@@ -433,23 +445,23 @@ void adreno_show(struct msm_gpu *gpu, struct 
msm_gpu_state *state,
if (IS_ERR_OR_NULL(state))
return;
 
-   seq_printf(m, "status:   %08x\n", state->rbbm_status);
-   seq_printf(m, "revision: %d (%d.%d.%d.%d)\n",
+   drm_printf(p, "status:   %08x\n", state->rbbm_status);
+   drm_printf(p, 

[PATCH 09/13] drm/msm/gpu: Capture the GPU state on a GPU hang

2018-07-12 Thread Jordan Crouse
Capture the GPU state on a GPU hang and store it for later playback
via the devcoredump facility. Only one crash state is stored at a
time on the assumption that the first hang is usually the most
interesting. The existing crash state can be cleared after capturing
it and then a new one will be captured on the next hang.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/Kconfig |  1 +
 drivers/gpu/drm/msm/adreno/a3xx_gpu.c   |  2 +-
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c   |  2 +-
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c   |  4 +-
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 36 +++
 drivers/gpu/drm/msm/adreno/adreno_gpu.h |  6 +-
 drivers/gpu/drm/msm/msm_debugfs.c   |  5 +-
 drivers/gpu/drm/msm/msm_gpu.c   | 83 -
 drivers/gpu/drm/msm/msm_gpu.h   | 38 ++-
 9 files changed, 154 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 38cbde971b48..843a9d40c05e 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -12,6 +12,7 @@ config DRM_MSM
select SHMEM
select TMPFS
select QCOM_SCM
+   select WANT_DEV_COREDUMP
select SND_SOC_HDMI_CODEC if SND_SOC
select SYNC_FILE
select PM_OPP
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 4cffec2b6adc..fc502e412132 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -454,7 +454,7 @@ static const struct adreno_gpu_funcs funcs = {
.active_ring = adreno_active_ring,
.irq = a3xx_irq,
.destroy = a3xx_destroy,
-#ifdef CONFIG_DEBUG_FS
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
.show = adreno_show,
 #endif
.gpu_state_get = a3xx_gpu_state_get,
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index 95f08c22e8d7..8129cf037db1 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -540,7 +540,7 @@ static const struct adreno_gpu_funcs funcs = {
.active_ring = adreno_active_ring,
.irq = a4xx_irq,
.destroy = a4xx_destroy,
-#ifdef CONFIG_DEBUG_FS
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
.show = adreno_show,
 #endif
.gpu_state_get = a4xx_gpu_state_get,
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 5f1aab3c1cb1..16074fa6bf1e 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1243,8 +1243,10 @@ static const struct adreno_gpu_funcs funcs = {
.active_ring = a5xx_active_ring,
.irq = a5xx_irq,
.destroy = a5xx_destroy,
-#ifdef CONFIG_DEBUG_FS
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
.show = adreno_show,
+#endif
+#if defined(CONFIG_DEBUG_FS)
.debugfs_init = a5xx_debugfs_init,
 #endif
.gpu_busy = a5xx_gpu_busy,
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 0e937eedcec5..163542487e2c 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -378,6 +378,8 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu 
*gpu)
if (!state)
return ERR_PTR(-ENOMEM);
 
+   kref_init(>ref);
+
do_gettimeofday(>time);
 
for (i = 0; i < gpu->nr_rings; i++) {
@@ -413,18 +415,28 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu 
*gpu)
return state;
 }
 
-void adreno_gpu_state_put(struct msm_gpu_state *state)
+static void adreno_gpu_state_destroy(struct kref *kref)
 {
-   if (IS_ERR_OR_NULL(state))
-   return;
+   struct msm_gpu_state *state = container_of(kref,
+   struct msm_gpu_state, ref);
 
+   kfree(state->comm);
+   kfree(state->cmd);
kfree(state->registers);
kfree(state);
 }
 
-#ifdef CONFIG_DEBUG_FS
+int adreno_gpu_state_put(struct msm_gpu_state *state)
+{
+   if (IS_ERR_OR_NULL(state))
+   return 1;
+
+   return kref_put(>ref, adreno_gpu_state_destroy);
+}
+
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
-   struct seq_file *m)
+   struct drm_printer *p)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
int i;
@@ -432,23 +444,23 @@ void adreno_show(struct msm_gpu *gpu, struct 
msm_gpu_state *state,
if (IS_ERR_OR_NULL(state))
return;
 
-   seq_printf(m, "status:   %08x\n", state->rbbm_status);
-   seq_printf(m, "revision: %d (%d.%d.%d.%d)\n",
+   drm_printf(p, "status:   %08x\n", state->rbbm_status);
+   drm_printf(p, 

Re: [PATCH 09/13] drm/msm/gpu: Capture the GPU state on a GPU hang

2018-06-29 Thread kbuild test robot
Hi Jordan,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on robclark/msm-next]
[also build test ERROR on v4.18-rc2 next-20180629]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Jordan-Crouse/drm-msm-Capture-and-dump-the-GPU-crash-state/20180630-015229
base:   git://people.freedesktop.org/~robclark/linux msm-next
config: arm64-defconfig (attached as .config)
compiler: aarch64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.2.0 make.cross ARCH=arm64 

All errors (new ones prefixed by >>):

>> ERROR: "__drm_puts_coredump" [drivers/gpu/drm/msm/msm.ko] undefined!

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 09/13] drm/msm/gpu: Capture the GPU state on a GPU hang

2018-06-29 Thread Jordan Crouse
Capture the GPU state on a GPU hang and store it for later playback
via the devcoredump facility. Only one crash state is stored at a
time on the assumption that the first hang is usually the most
interesting. The existing crash state can be cleared after capturing
it and then a new one will be captured on the next hang.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/Kconfig |  1 +
 drivers/gpu/drm/msm/adreno/a3xx_gpu.c   |  2 +-
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c   |  2 +-
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c   |  4 +-
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 36 +++
 drivers/gpu/drm/msm/adreno/adreno_gpu.h |  6 +-
 drivers/gpu/drm/msm/msm_debugfs.c   |  5 +-
 drivers/gpu/drm/msm/msm_gpu.c   | 83 -
 drivers/gpu/drm/msm/msm_gpu.h   | 38 ++-
 9 files changed, 154 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 38cbde971b48..843a9d40c05e 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -12,6 +12,7 @@ config DRM_MSM
select SHMEM
select TMPFS
select QCOM_SCM
+   select WANT_DEV_COREDUMP
select SND_SOC_HDMI_CODEC if SND_SOC
select SYNC_FILE
select PM_OPP
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 4cffec2b6adc..fc502e412132 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -454,7 +454,7 @@ static const struct adreno_gpu_funcs funcs = {
.active_ring = adreno_active_ring,
.irq = a3xx_irq,
.destroy = a3xx_destroy,
-#ifdef CONFIG_DEBUG_FS
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
.show = adreno_show,
 #endif
.gpu_state_get = a3xx_gpu_state_get,
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index 95f08c22e8d7..8129cf037db1 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -540,7 +540,7 @@ static const struct adreno_gpu_funcs funcs = {
.active_ring = adreno_active_ring,
.irq = a4xx_irq,
.destroy = a4xx_destroy,
-#ifdef CONFIG_DEBUG_FS
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
.show = adreno_show,
 #endif
.gpu_state_get = a4xx_gpu_state_get,
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 5f1aab3c1cb1..16074fa6bf1e 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1243,8 +1243,10 @@ static const struct adreno_gpu_funcs funcs = {
.active_ring = a5xx_active_ring,
.irq = a5xx_irq,
.destroy = a5xx_destroy,
-#ifdef CONFIG_DEBUG_FS
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
.show = adreno_show,
+#endif
+#if defined(CONFIG_DEBUG_FS)
.debugfs_init = a5xx_debugfs_init,
 #endif
.gpu_busy = a5xx_gpu_busy,
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 0e937eedcec5..163542487e2c 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -378,6 +378,8 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu 
*gpu)
if (!state)
return ERR_PTR(-ENOMEM);
 
+   kref_init(>ref);
+
do_gettimeofday(>time);
 
for (i = 0; i < gpu->nr_rings; i++) {
@@ -413,18 +415,28 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu 
*gpu)
return state;
 }
 
-void adreno_gpu_state_put(struct msm_gpu_state *state)
+static void adreno_gpu_state_destroy(struct kref *kref)
 {
-   if (IS_ERR_OR_NULL(state))
-   return;
+   struct msm_gpu_state *state = container_of(kref,
+   struct msm_gpu_state, ref);
 
+   kfree(state->comm);
+   kfree(state->cmd);
kfree(state->registers);
kfree(state);
 }
 
-#ifdef CONFIG_DEBUG_FS
+int adreno_gpu_state_put(struct msm_gpu_state *state)
+{
+   if (IS_ERR_OR_NULL(state))
+   return 1;
+
+   return kref_put(>ref, adreno_gpu_state_destroy);
+}
+
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
-   struct seq_file *m)
+   struct drm_printer *p)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
int i;
@@ -432,23 +444,23 @@ void adreno_show(struct msm_gpu *gpu, struct 
msm_gpu_state *state,
if (IS_ERR_OR_NULL(state))
return;
 
-   seq_printf(m, "status:   %08x\n", state->rbbm_status);
-   seq_printf(m, "revision: %d (%d.%d.%d.%d)\n",
+   drm_printf(p, "status:   %08x\n", state->rbbm_status);
+   drm_printf(p,