Re: [PATCH] drm: mali-dp: Add debugfs file for reporting internal errors

2018-05-14 Thread Alexandru-Cosmin Gheorghe
On Mon, May 14, 2018 at 10:19:27AM +0100, Brian Starkey wrote:
> Hi Alex,
> 
> On Fri, May 11, 2018 at 06:56:03PM +0100, Alexandru Gheorghe wrote:
> >Status register contains a lot of bits for reporting internal errors
> >inside Mali DP. Currently, we just silently ignore all of the erorrs,
> 
> Being picky: s/erorrs/errors/
> 
> >that doesn't help when we are investigating different bugs, especially
> >on the FPGA models which have a lot of constrains, so we could easily
> 
> s/constrains/constraints/
> 
> >end up in AXI or underrun errors.
> >
> >Add a new file called debug that contains an agregate of the
> 
> s/agregate/aggregate/
> 
> >errors reported by the Mali DP hardware.
> >
> >E.g:
> >[root@alarm ~]# cat /sys/kernel/debug/dri/1/debug
> >[DE] num_errors : 167
> >[DE] last_error_status  : 0x0001
> >[DE] last_error_vblank : 385
> >[SE] num_errors : 3
> >[SE] last_error_status  : 0x00e23001
> >[SE] last_error_vblank : 201
> >
> >This a morphosis of the patch presented here [1], where the errors
> >where reported continuously via trace_printk.
> >
> >[1] 
> >https://lists.freedesktop.org/archives/dri-devel/2018-February/167042.html
> >
> >Signed-off-by: Alexandru Gheorghe 
> >---
> >drivers/gpu/drm/arm/malidp_drv.c  | 61 
> >+++
> >drivers/gpu/drm/arm/malidp_drv.h  | 15 ++
> >drivers/gpu/drm/arm/malidp_hw.c   | 46 -
> >drivers/gpu/drm/arm/malidp_hw.h   |  1 +
> >drivers/gpu/drm/arm/malidp_regs.h |  6 
> >5 files changed, 122 insertions(+), 7 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/arm/malidp_drv.c 
> >b/drivers/gpu/drm/arm/malidp_drv.c
> >index 8d20faa..70ce19a 100644
> >--- a/drivers/gpu/drm/arm/malidp_drv.c
> >+++ b/drivers/gpu/drm/arm/malidp_drv.c
> >@@ -17,6 +17,7 @@
> >#include 
> >#include 
> >#include 
> >+#include 
> >
> >#include 
> >#include 
> >@@ -29,6 +30,7 @@
> >#include 
> >#include 
> >#include 
> >+#include 
> >
> >#include "malidp_drv.h"
> >#include "malidp_regs.h"
> >@@ -327,6 +329,62 @@ static int malidp_dumb_create(struct drm_file 
> >*file_priv,
> > return drm_gem_cma_dumb_create_internal(file_priv, drm, args);
> >}
> >
> >+#ifdef CONFIG_DEBUG_FS
> >+
> >+static void malidp_error_stats_init(struct malidp_error_stats *error_stats)
> >+{
> >+atomic_set(_stats->num_errors, 0);
> >+atomic_set(_stats->last_error_status, 0);
> >+atomic64_set(_stats->last_error_vblank, -1);
> >+}
> >+
> >+void malidp_error(struct malidp_error_stats *error_stats, u32 status,
> >+  u64 vblank)
> >+{
> >+atomic_set(_stats->last_error_status, status);
> >+atomic64_set(_stats->last_error_vblank, vblank);
> >+atomic_inc(_stats->num_errors);
> >+}
> >+
> 
> Do we already have a lock we could use to make sure the status printed
> is consistent? I know this is "only debug", but it could be annoying
> if we can't trust that the last_error_status does actually match the
> last_error_vblank.
>

No, we don't have any lock. Yes that would be annoying, I will add
one.
 
> >+void malidp_error_stats_dump(const char *prefix,
> >+ struct malidp_error_stats *error_stats,
> >+ struct seq_file *m)
> >+{
> >+seq_printf(m, "[%s] num_errors : %d\n", prefix,
> >+   atomic_read(_stats->num_errors));
> >+seq_printf(m, "[%s] last_error_status  : 0x%08x\n", prefix,
> >+   atomic_read(_stats->last_error_status));
> >+seq_printf(m, "[%s] last_error_vblank : %ld\n", prefix,
> >+   atomic64_read(_stats->last_error_vblank));
> >+}
> >+
> >+static int malidp_show_stats(struct seq_file *m, void *arg)
> >+{
> >+struct drm_info_node *node = (struct drm_info_node *)m->private;
> >+struct drm_device *drm = node->minor->dev;
> >+struct malidp_drm *malidp = drm->dev_private;
> >+
> >+malidp_error_stats_dump("DE", >de_errors, m);
> >+malidp_error_stats_dump("SE", >se_errors, m);
> >+return 0;
> >+}
> >+
> >+static struct drm_info_list malidp_debugfs_list[] = {
> >+{ "debug", malidp_show_stats, 0 },
> >+};
> 
> I think it would be pretty useful to have a way to reset the counters.
> Maybe by just writing anything to the file?
> 

drm_debugfs_fops doesn't allow writing. I see no reason using our own
debugs_fops, but do you think worth the trouble?

> Thanks,
> -Brian
> 
> >+
> >+static int malidp_debugfs_init(struct drm_minor *minor)
> >+{
> >+struct malidp_drm *malidp = minor->dev->dev_private;
> >+
> >+malidp_error_stats_init(>de_errors);
> >+malidp_error_stats_init(>se_errors);
> >+return drm_debugfs_create_files(malidp_debugfs_list,
> >+ARRAY_SIZE(malidp_debugfs_list), minor->debugfs_root, minor);
> >+}
> >+
> >+#endif //CONFIG_DEBUG_FS
> >+
> >static struct drm_driver malidp_driver = {
> > .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC |
> >DRIVER_PRIME,
> >@@ -343,6 +401,9 @@ static 

Re: [PATCH] drm: mali-dp: Add debugfs file for reporting internal errors

2018-05-14 Thread Brian Starkey

Hi Alex,

On Fri, May 11, 2018 at 06:56:03PM +0100, Alexandru Gheorghe wrote:

Status register contains a lot of bits for reporting internal errors
inside Mali DP. Currently, we just silently ignore all of the erorrs,


Being picky: s/erorrs/errors/


that doesn't help when we are investigating different bugs, especially
on the FPGA models which have a lot of constrains, so we could easily


s/constrains/constraints/


end up in AXI or underrun errors.

Add a new file called debug that contains an agregate of the


s/agregate/aggregate/


errors reported by the Mali DP hardware.

E.g:
[root@alarm ~]# cat /sys/kernel/debug/dri/1/debug
[DE] num_errors : 167
[DE] last_error_status  : 0x0001
[DE] last_error_vblank : 385
[SE] num_errors : 3
[SE] last_error_status  : 0x00e23001
[SE] last_error_vblank : 201

This a morphosis of the patch presented here [1], where the errors
where reported continuously via trace_printk.

[1] https://lists.freedesktop.org/archives/dri-devel/2018-February/167042.html

Signed-off-by: Alexandru Gheorghe 
---
drivers/gpu/drm/arm/malidp_drv.c  | 61 +++
drivers/gpu/drm/arm/malidp_drv.h  | 15 ++
drivers/gpu/drm/arm/malidp_hw.c   | 46 -
drivers/gpu/drm/arm/malidp_hw.h   |  1 +
drivers/gpu/drm/arm/malidp_regs.h |  6 
5 files changed, 122 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c
index 8d20faa..70ce19a 100644
--- a/drivers/gpu/drm/arm/malidp_drv.c
+++ b/drivers/gpu/drm/arm/malidp_drv.c
@@ -17,6 +17,7 @@
#include 
#include 
#include 
+#include 

#include 
#include 
@@ -29,6 +30,7 @@
#include 
#include 
#include 
+#include 

#include "malidp_drv.h"
#include "malidp_regs.h"
@@ -327,6 +329,62 @@ static int malidp_dumb_create(struct drm_file *file_priv,
return drm_gem_cma_dumb_create_internal(file_priv, drm, args);
}

+#ifdef CONFIG_DEBUG_FS
+
+static void malidp_error_stats_init(struct malidp_error_stats *error_stats)
+{
+   atomic_set(_stats->num_errors, 0);
+   atomic_set(_stats->last_error_status, 0);
+   atomic64_set(_stats->last_error_vblank, -1);
+}
+
+void malidp_error(struct malidp_error_stats *error_stats, u32 status,
+ u64 vblank)
+{
+   atomic_set(_stats->last_error_status, status);
+   atomic64_set(_stats->last_error_vblank, vblank);
+   atomic_inc(_stats->num_errors);
+}
+


Do we already have a lock we could use to make sure the status printed
is consistent? I know this is "only debug", but it could be annoying
if we can't trust that the last_error_status does actually match the
last_error_vblank.


+void malidp_error_stats_dump(const char *prefix,
+struct malidp_error_stats *error_stats,
+struct seq_file *m)
+{
+   seq_printf(m, "[%s] num_errors : %d\n", prefix,
+  atomic_read(_stats->num_errors));
+   seq_printf(m, "[%s] last_error_status  : 0x%08x\n", prefix,
+  atomic_read(_stats->last_error_status));
+   seq_printf(m, "[%s] last_error_vblank : %ld\n", prefix,
+  atomic64_read(_stats->last_error_vblank));
+}
+
+static int malidp_show_stats(struct seq_file *m, void *arg)
+{
+   struct drm_info_node *node = (struct drm_info_node *)m->private;
+   struct drm_device *drm = node->minor->dev;
+   struct malidp_drm *malidp = drm->dev_private;
+
+   malidp_error_stats_dump("DE", >de_errors, m);
+   malidp_error_stats_dump("SE", >se_errors, m);
+   return 0;
+}
+
+static struct drm_info_list malidp_debugfs_list[] = {
+   { "debug", malidp_show_stats, 0 },
+};


I think it would be pretty useful to have a way to reset the counters.
Maybe by just writing anything to the file?

Thanks,
-Brian


+
+static int malidp_debugfs_init(struct drm_minor *minor)
+{
+   struct malidp_drm *malidp = minor->dev->dev_private;
+
+   malidp_error_stats_init(>de_errors);
+   malidp_error_stats_init(>se_errors);
+   return drm_debugfs_create_files(malidp_debugfs_list,
+   ARRAY_SIZE(malidp_debugfs_list), minor->debugfs_root, minor);
+}
+
+#endif //CONFIG_DEBUG_FS
+
static struct drm_driver malidp_driver = {
.driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC |
   DRIVER_PRIME,
@@ -343,6 +401,9 @@ static struct drm_driver malidp_driver = {
.gem_prime_vmap = drm_gem_cma_prime_vmap,
.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
.gem_prime_mmap = drm_gem_cma_prime_mmap,
+#ifdef CONFIG_DEBUG_FS
+   .debugfs_init = malidp_debugfs_init,
+#endif
.fops = ,
.name = "mali-dp",
.desc = "ARM Mali Display Processor driver",
diff --git a/drivers/gpu/drm/arm/malidp_drv.h b/drivers/gpu/drm/arm/malidp_drv.h
index c70989b..c49056c 100644
--- a/drivers/gpu/drm/arm/malidp_drv.h
+++ b/drivers/gpu/drm/arm/malidp_drv.h
@@ -18,6 +18,12 @@

Re: [PATCH] drm: mali-dp: Add debugfs file for reporting internal errors

2018-05-14 Thread Liviu Dudau
On Fri, May 11, 2018 at 06:56:03PM +0100, Alexandru Gheorghe wrote:
> Status register contains a lot of bits for reporting internal errors
> inside Mali DP. Currently, we just silently ignore all of the erorrs,
> that doesn't help when we are investigating different bugs, especially
> on the FPGA models which have a lot of constrains, so we could easily
> end up in AXI or underrun errors.
> 
> Add a new file called debug that contains an agregate of the
> errors reported by the Mali DP hardware.
> 
> E.g:
> [root@alarm ~]# cat /sys/kernel/debug/dri/1/debug
> [DE] num_errors : 167
> [DE] last_error_status  : 0x0001
> [DE] last_error_vblank : 385
> [SE] num_errors : 3
> [SE] last_error_status  : 0x00e23001
> [SE] last_error_vblank : 201
> 
> This a morphosis of the patch presented here [1], where the errors
> where reported continuously via trace_printk.
> 
> [1] https://lists.freedesktop.org/archives/dri-devel/2018-February/167042.html
> 
> Signed-off-by: Alexandru Gheorghe 

Acked-by: Liviu Dudau 

> ---
>  drivers/gpu/drm/arm/malidp_drv.c  | 61 
> +++
>  drivers/gpu/drm/arm/malidp_drv.h  | 15 ++
>  drivers/gpu/drm/arm/malidp_hw.c   | 46 -
>  drivers/gpu/drm/arm/malidp_hw.h   |  1 +
>  drivers/gpu/drm/arm/malidp_regs.h |  6 
>  5 files changed, 122 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/arm/malidp_drv.c 
> b/drivers/gpu/drm/arm/malidp_drv.c
> index 8d20faa..70ce19a 100644
> --- a/drivers/gpu/drm/arm/malidp_drv.c
> +++ b/drivers/gpu/drm/arm/malidp_drv.c
> @@ -17,6 +17,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -29,6 +30,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "malidp_drv.h"
>  #include "malidp_regs.h"
> @@ -327,6 +329,62 @@ static int malidp_dumb_create(struct drm_file *file_priv,
>   return drm_gem_cma_dumb_create_internal(file_priv, drm, args);
>  }
>  
> +#ifdef CONFIG_DEBUG_FS
> +
> +static void malidp_error_stats_init(struct malidp_error_stats *error_stats)
> +{
> + atomic_set(_stats->num_errors, 0);
> + atomic_set(_stats->last_error_status, 0);
> + atomic64_set(_stats->last_error_vblank, -1);
> +}
> +
> +void malidp_error(struct malidp_error_stats *error_stats, u32 status,
> +   u64 vblank)
> +{
> + atomic_set(_stats->last_error_status, status);
> + atomic64_set(_stats->last_error_vblank, vblank);
> + atomic_inc(_stats->num_errors);
> +}
> +
> +void malidp_error_stats_dump(const char *prefix,
> +  struct malidp_error_stats *error_stats,
> +  struct seq_file *m)
> +{
> + seq_printf(m, "[%s] num_errors : %d\n", prefix,
> +atomic_read(_stats->num_errors));
> + seq_printf(m, "[%s] last_error_status  : 0x%08x\n", prefix,
> +atomic_read(_stats->last_error_status));
> + seq_printf(m, "[%s] last_error_vblank : %ld\n", prefix,
> +atomic64_read(_stats->last_error_vblank));
> +}
> +
> +static int malidp_show_stats(struct seq_file *m, void *arg)
> +{
> + struct drm_info_node *node = (struct drm_info_node *)m->private;
> + struct drm_device *drm = node->minor->dev;
> + struct malidp_drm *malidp = drm->dev_private;
> +
> + malidp_error_stats_dump("DE", >de_errors, m);
> + malidp_error_stats_dump("SE", >se_errors, m);
> + return 0;
> +}
> +
> +static struct drm_info_list malidp_debugfs_list[] = {
> + { "debug", malidp_show_stats, 0 },
> +};
> +
> +static int malidp_debugfs_init(struct drm_minor *minor)
> +{
> + struct malidp_drm *malidp = minor->dev->dev_private;
> +
> + malidp_error_stats_init(>de_errors);
> + malidp_error_stats_init(>se_errors);
> + return drm_debugfs_create_files(malidp_debugfs_list,
> + ARRAY_SIZE(malidp_debugfs_list), minor->debugfs_root, minor);
> +}
> +
> +#endif //CONFIG_DEBUG_FS
> +
>  static struct drm_driver malidp_driver = {
>   .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC |
>  DRIVER_PRIME,
> @@ -343,6 +401,9 @@ static struct drm_driver malidp_driver = {
>   .gem_prime_vmap = drm_gem_cma_prime_vmap,
>   .gem_prime_vunmap = drm_gem_cma_prime_vunmap,
>   .gem_prime_mmap = drm_gem_cma_prime_mmap,
> +#ifdef CONFIG_DEBUG_FS
> + .debugfs_init = malidp_debugfs_init,
> +#endif
>   .fops = ,
>   .name = "mali-dp",
>   .desc = "ARM Mali Display Processor driver",
> diff --git a/drivers/gpu/drm/arm/malidp_drv.h 
> b/drivers/gpu/drm/arm/malidp_drv.h
> index c70989b..c49056c 100644
> --- a/drivers/gpu/drm/arm/malidp_drv.h
> +++ b/drivers/gpu/drm/arm/malidp_drv.h
> @@ -18,6 +18,12 @@
>  #include 
>  #include "malidp_hw.h"
>  
> +struct malidp_error_stats {
> + atomic_t num_errors;
> + atomic_t last_error_status;
> + atomic64_t last_error_vblank;
> +};
> +
>  struct malidp_drm 

[PATCH] drm: mali-dp: Add debugfs file for reporting internal errors

2018-05-11 Thread Alexandru Gheorghe
Status register contains a lot of bits for reporting internal errors
inside Mali DP. Currently, we just silently ignore all of the erorrs,
that doesn't help when we are investigating different bugs, especially
on the FPGA models which have a lot of constrains, so we could easily
end up in AXI or underrun errors.

Add a new file called debug that contains an agregate of the
errors reported by the Mali DP hardware.

E.g:
[root@alarm ~]# cat /sys/kernel/debug/dri/1/debug
[DE] num_errors : 167
[DE] last_error_status  : 0x0001
[DE] last_error_vblank : 385
[SE] num_errors : 3
[SE] last_error_status  : 0x00e23001
[SE] last_error_vblank : 201

This a morphosis of the patch presented here [1], where the errors
where reported continuously via trace_printk.

[1] https://lists.freedesktop.org/archives/dri-devel/2018-February/167042.html

Signed-off-by: Alexandru Gheorghe 
---
 drivers/gpu/drm/arm/malidp_drv.c  | 61 +++
 drivers/gpu/drm/arm/malidp_drv.h  | 15 ++
 drivers/gpu/drm/arm/malidp_hw.c   | 46 -
 drivers/gpu/drm/arm/malidp_hw.h   |  1 +
 drivers/gpu/drm/arm/malidp_regs.h |  6 
 5 files changed, 122 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c
index 8d20faa..70ce19a 100644
--- a/drivers/gpu/drm/arm/malidp_drv.c
+++ b/drivers/gpu/drm/arm/malidp_drv.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -29,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "malidp_drv.h"
 #include "malidp_regs.h"
@@ -327,6 +329,62 @@ static int malidp_dumb_create(struct drm_file *file_priv,
return drm_gem_cma_dumb_create_internal(file_priv, drm, args);
 }
 
+#ifdef CONFIG_DEBUG_FS
+
+static void malidp_error_stats_init(struct malidp_error_stats *error_stats)
+{
+   atomic_set(_stats->num_errors, 0);
+   atomic_set(_stats->last_error_status, 0);
+   atomic64_set(_stats->last_error_vblank, -1);
+}
+
+void malidp_error(struct malidp_error_stats *error_stats, u32 status,
+ u64 vblank)
+{
+   atomic_set(_stats->last_error_status, status);
+   atomic64_set(_stats->last_error_vblank, vblank);
+   atomic_inc(_stats->num_errors);
+}
+
+void malidp_error_stats_dump(const char *prefix,
+struct malidp_error_stats *error_stats,
+struct seq_file *m)
+{
+   seq_printf(m, "[%s] num_errors : %d\n", prefix,
+  atomic_read(_stats->num_errors));
+   seq_printf(m, "[%s] last_error_status  : 0x%08x\n", prefix,
+  atomic_read(_stats->last_error_status));
+   seq_printf(m, "[%s] last_error_vblank : %ld\n", prefix,
+  atomic64_read(_stats->last_error_vblank));
+}
+
+static int malidp_show_stats(struct seq_file *m, void *arg)
+{
+   struct drm_info_node *node = (struct drm_info_node *)m->private;
+   struct drm_device *drm = node->minor->dev;
+   struct malidp_drm *malidp = drm->dev_private;
+
+   malidp_error_stats_dump("DE", >de_errors, m);
+   malidp_error_stats_dump("SE", >se_errors, m);
+   return 0;
+}
+
+static struct drm_info_list malidp_debugfs_list[] = {
+   { "debug", malidp_show_stats, 0 },
+};
+
+static int malidp_debugfs_init(struct drm_minor *minor)
+{
+   struct malidp_drm *malidp = minor->dev->dev_private;
+
+   malidp_error_stats_init(>de_errors);
+   malidp_error_stats_init(>se_errors);
+   return drm_debugfs_create_files(malidp_debugfs_list,
+   ARRAY_SIZE(malidp_debugfs_list), minor->debugfs_root, minor);
+}
+
+#endif //CONFIG_DEBUG_FS
+
 static struct drm_driver malidp_driver = {
.driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC |
   DRIVER_PRIME,
@@ -343,6 +401,9 @@ static struct drm_driver malidp_driver = {
.gem_prime_vmap = drm_gem_cma_prime_vmap,
.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
.gem_prime_mmap = drm_gem_cma_prime_mmap,
+#ifdef CONFIG_DEBUG_FS
+   .debugfs_init = malidp_debugfs_init,
+#endif
.fops = ,
.name = "mali-dp",
.desc = "ARM Mali Display Processor driver",
diff --git a/drivers/gpu/drm/arm/malidp_drv.h b/drivers/gpu/drm/arm/malidp_drv.h
index c70989b..c49056c 100644
--- a/drivers/gpu/drm/arm/malidp_drv.h
+++ b/drivers/gpu/drm/arm/malidp_drv.h
@@ -18,6 +18,12 @@
 #include 
 #include "malidp_hw.h"
 
+struct malidp_error_stats {
+   atomic_t num_errors;
+   atomic_t last_error_status;
+   atomic64_t last_error_vblank;
+};
+
 struct malidp_drm {
struct malidp_hw_device *dev;
struct drm_crtc crtc;
@@ -25,6 +31,10 @@ struct malidp_drm {
struct drm_pending_vblank_event *event;
atomic_t config_valid;
u32 core_id;
+#ifdef CONFIG_DEBUG_FS
+   struct malidp_error_stats de_errors;
+   struct malidp_error_stats se_errors;