[PATCH 09/13] drm/msm/gpu: Capture the GPU state on a GPU hang
Capture the GPU state on a GPU hang and store it for later playback via the devcoredump facility. Only one crash state is stored at a time on the assumption that the first hang is usually the most interesting. The existing crash state can be cleared after capturing it and then a new one will be captured on the next hang. Signed-off-by: Jordan Crouse --- drivers/gpu/drm/msm/Kconfig | 1 + drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 2 +- drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 2 +- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 +- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 36 +++ drivers/gpu/drm/msm/adreno/adreno_gpu.h | 6 +- drivers/gpu/drm/msm/msm_debugfs.c | 5 +- drivers/gpu/drm/msm/msm_gpu.c | 83 - drivers/gpu/drm/msm/msm_gpu.h | 38 ++- 9 files changed, 154 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 38cbde971b48..843a9d40c05e 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -12,6 +12,7 @@ config DRM_MSM select SHMEM select TMPFS select QCOM_SCM + select WANT_DEV_COREDUMP select SND_SOC_HDMI_CODEC if SND_SOC select SYNC_FILE select PM_OPP diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 4cffec2b6adc..fc502e412132 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -454,7 +454,7 @@ static const struct adreno_gpu_funcs funcs = { .active_ring = adreno_active_ring, .irq = a3xx_irq, .destroy = a3xx_destroy, -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) .show = adreno_show, #endif .gpu_state_get = a3xx_gpu_state_get, diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 95f08c22e8d7..8129cf037db1 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -540,7 +540,7 @@ static const struct adreno_gpu_funcs funcs = { .active_ring = adreno_active_ring, .irq = a4xx_irq, .destroy = a4xx_destroy, -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) .show = adreno_show, #endif .gpu_state_get = a4xx_gpu_state_get, diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 5f1aab3c1cb1..16074fa6bf1e 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1243,8 +1243,10 @@ static const struct adreno_gpu_funcs funcs = { .active_ring = a5xx_active_ring, .irq = a5xx_irq, .destroy = a5xx_destroy, -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) .show = adreno_show, +#endif +#if defined(CONFIG_DEBUG_FS) .debugfs_init = a5xx_debugfs_init, #endif .gpu_busy = a5xx_gpu_busy, diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 42828253ede5..c72e3afc43a8 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -378,6 +378,8 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu) if (!state) return ERR_PTR(-ENOMEM); + kref_init(>ref); + do_gettimeofday(>time); for (i = 0; i < gpu->nr_rings; i++) { @@ -414,18 +416,28 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu) return state; } -void adreno_gpu_state_put(struct msm_gpu_state *state) +static void adreno_gpu_state_destroy(struct kref *kref) { - if (IS_ERR_OR_NULL(state)) - return; + struct msm_gpu_state *state = container_of(kref, + struct msm_gpu_state, ref); + kfree(state->comm); + kfree(state->cmd); kfree(state->registers); kfree(state); } -#ifdef CONFIG_DEBUG_FS +int adreno_gpu_state_put(struct msm_gpu_state *state) +{ + if (IS_ERR_OR_NULL(state)) + return 1; + + return kref_put(>ref, adreno_gpu_state_destroy); +} + +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, - struct seq_file *m) + struct drm_printer *p) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); int i; @@ -433,23 +445,23 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, if (IS_ERR_OR_NULL(state)) return; - seq_printf(m, "status: %08x\n", state->rbbm_status); - seq_printf(m, "revision: %d (%d.%d.%d.%d)\n", + drm_printf(p, "status: %08x\n", state->rbbm_status); + drm_printf(p,
[PATCH 09/13] drm/msm/gpu: Capture the GPU state on a GPU hang
Capture the GPU state on a GPU hang and store it for later playback via the devcoredump facility. Only one crash state is stored at a time on the assumption that the first hang is usually the most interesting. The existing crash state can be cleared after capturing it and then a new one will be captured on the next hang. Signed-off-by: Jordan Crouse --- drivers/gpu/drm/msm/Kconfig | 1 + drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 2 +- drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 2 +- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 +- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 36 +++ drivers/gpu/drm/msm/adreno/adreno_gpu.h | 6 +- drivers/gpu/drm/msm/msm_debugfs.c | 5 +- drivers/gpu/drm/msm/msm_gpu.c | 83 - drivers/gpu/drm/msm/msm_gpu.h | 38 ++- 9 files changed, 154 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 38cbde971b48..843a9d40c05e 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -12,6 +12,7 @@ config DRM_MSM select SHMEM select TMPFS select QCOM_SCM + select WANT_DEV_COREDUMP select SND_SOC_HDMI_CODEC if SND_SOC select SYNC_FILE select PM_OPP diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 4cffec2b6adc..fc502e412132 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -454,7 +454,7 @@ static const struct adreno_gpu_funcs funcs = { .active_ring = adreno_active_ring, .irq = a3xx_irq, .destroy = a3xx_destroy, -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) .show = adreno_show, #endif .gpu_state_get = a3xx_gpu_state_get, diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 95f08c22e8d7..8129cf037db1 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -540,7 +540,7 @@ static const struct adreno_gpu_funcs funcs = { .active_ring = adreno_active_ring, .irq = a4xx_irq, .destroy = a4xx_destroy, -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) .show = adreno_show, #endif .gpu_state_get = a4xx_gpu_state_get, diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 5f1aab3c1cb1..16074fa6bf1e 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1243,8 +1243,10 @@ static const struct adreno_gpu_funcs funcs = { .active_ring = a5xx_active_ring, .irq = a5xx_irq, .destroy = a5xx_destroy, -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) .show = adreno_show, +#endif +#if defined(CONFIG_DEBUG_FS) .debugfs_init = a5xx_debugfs_init, #endif .gpu_busy = a5xx_gpu_busy, diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 0e937eedcec5..163542487e2c 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -378,6 +378,8 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu) if (!state) return ERR_PTR(-ENOMEM); + kref_init(>ref); + do_gettimeofday(>time); for (i = 0; i < gpu->nr_rings; i++) { @@ -413,18 +415,28 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu) return state; } -void adreno_gpu_state_put(struct msm_gpu_state *state) +static void adreno_gpu_state_destroy(struct kref *kref) { - if (IS_ERR_OR_NULL(state)) - return; + struct msm_gpu_state *state = container_of(kref, + struct msm_gpu_state, ref); + kfree(state->comm); + kfree(state->cmd); kfree(state->registers); kfree(state); } -#ifdef CONFIG_DEBUG_FS +int adreno_gpu_state_put(struct msm_gpu_state *state) +{ + if (IS_ERR_OR_NULL(state)) + return 1; + + return kref_put(>ref, adreno_gpu_state_destroy); +} + +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, - struct seq_file *m) + struct drm_printer *p) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); int i; @@ -432,23 +444,23 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, if (IS_ERR_OR_NULL(state)) return; - seq_printf(m, "status: %08x\n", state->rbbm_status); - seq_printf(m, "revision: %d (%d.%d.%d.%d)\n", + drm_printf(p, "status: %08x\n", state->rbbm_status); + drm_printf(p,
Re: [PATCH 09/13] drm/msm/gpu: Capture the GPU state on a GPU hang
Hi Jordan, Thank you for the patch! Yet something to improve: [auto build test ERROR on robclark/msm-next] [also build test ERROR on v4.18-rc2 next-20180629] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Jordan-Crouse/drm-msm-Capture-and-dump-the-GPU-crash-state/20180630-015229 base: git://people.freedesktop.org/~robclark/linux msm-next config: arm64-defconfig (attached as .config) compiler: aarch64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree GCC_VERSION=7.2.0 make.cross ARCH=arm64 All errors (new ones prefixed by >>): >> ERROR: "__drm_puts_coredump" [drivers/gpu/drm/msm/msm.ko] undefined! --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH 09/13] drm/msm/gpu: Capture the GPU state on a GPU hang
Capture the GPU state on a GPU hang and store it for later playback via the devcoredump facility. Only one crash state is stored at a time on the assumption that the first hang is usually the most interesting. The existing crash state can be cleared after capturing it and then a new one will be captured on the next hang. Signed-off-by: Jordan Crouse --- drivers/gpu/drm/msm/Kconfig | 1 + drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 2 +- drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 2 +- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 +- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 36 +++ drivers/gpu/drm/msm/adreno/adreno_gpu.h | 6 +- drivers/gpu/drm/msm/msm_debugfs.c | 5 +- drivers/gpu/drm/msm/msm_gpu.c | 83 - drivers/gpu/drm/msm/msm_gpu.h | 38 ++- 9 files changed, 154 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 38cbde971b48..843a9d40c05e 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -12,6 +12,7 @@ config DRM_MSM select SHMEM select TMPFS select QCOM_SCM + select WANT_DEV_COREDUMP select SND_SOC_HDMI_CODEC if SND_SOC select SYNC_FILE select PM_OPP diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 4cffec2b6adc..fc502e412132 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -454,7 +454,7 @@ static const struct adreno_gpu_funcs funcs = { .active_ring = adreno_active_ring, .irq = a3xx_irq, .destroy = a3xx_destroy, -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) .show = adreno_show, #endif .gpu_state_get = a3xx_gpu_state_get, diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 95f08c22e8d7..8129cf037db1 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -540,7 +540,7 @@ static const struct adreno_gpu_funcs funcs = { .active_ring = adreno_active_ring, .irq = a4xx_irq, .destroy = a4xx_destroy, -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) .show = adreno_show, #endif .gpu_state_get = a4xx_gpu_state_get, diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 5f1aab3c1cb1..16074fa6bf1e 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1243,8 +1243,10 @@ static const struct adreno_gpu_funcs funcs = { .active_ring = a5xx_active_ring, .irq = a5xx_irq, .destroy = a5xx_destroy, -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) .show = adreno_show, +#endif +#if defined(CONFIG_DEBUG_FS) .debugfs_init = a5xx_debugfs_init, #endif .gpu_busy = a5xx_gpu_busy, diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 0e937eedcec5..163542487e2c 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -378,6 +378,8 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu) if (!state) return ERR_PTR(-ENOMEM); + kref_init(>ref); + do_gettimeofday(>time); for (i = 0; i < gpu->nr_rings; i++) { @@ -413,18 +415,28 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu) return state; } -void adreno_gpu_state_put(struct msm_gpu_state *state) +static void adreno_gpu_state_destroy(struct kref *kref) { - if (IS_ERR_OR_NULL(state)) - return; + struct msm_gpu_state *state = container_of(kref, + struct msm_gpu_state, ref); + kfree(state->comm); + kfree(state->cmd); kfree(state->registers); kfree(state); } -#ifdef CONFIG_DEBUG_FS +int adreno_gpu_state_put(struct msm_gpu_state *state) +{ + if (IS_ERR_OR_NULL(state)) + return 1; + + return kref_put(>ref, adreno_gpu_state_destroy); +} + +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, - struct seq_file *m) + struct drm_printer *p) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); int i; @@ -432,23 +444,23 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, if (IS_ERR_OR_NULL(state)) return; - seq_printf(m, "status: %08x\n", state->rbbm_status); - seq_printf(m, "revision: %d (%d.%d.%d.%d)\n", + drm_printf(p, "status: %08x\n", state->rbbm_status); + drm_printf(p,