Re: [Intel-gfx] [CI 1/5] drm/i915: Allow disabling error capture

2016-10-11 Thread Daniel Vetter
On Tue, Oct 11, 2016 at 12:56 PM, Jani Nikula
 wrote:
> Fair enough. Please copy-paste some of the elaboration to the commit
> message. Ack from me, but it wouldn't hurt to get an ack from Daniel as
> well.

Would be nice if we can trade in some of the #ifdefry with a
conditional compiliation of i915_gpu_error.c (and shuffle some parts
around, or maybe intel_display_error.c extraction to make that
feasible). Besides that wishlist item, ack.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [CI 1/5] drm/i915: Allow disabling error capture

2016-10-11 Thread Jani Nikula
On Tue, 11 Oct 2016, Chris Wilson  wrote:
> On Tue, Oct 11, 2016 at 01:16:42PM +0300, Jani Nikula wrote:
>> On Tue, 11 Oct 2016, Chris Wilson  wrote:
>> > On Tue, Oct 11, 2016 at 12:52:00PM +0300, Jani Nikula wrote:
>> >> On Tue, 11 Oct 2016, Chris Wilson  wrote:
>> >> > We currently capture the GPU state after we detect a hang. This is vital
>> >> > for us to both triage and debug hangs in the wild (post-mortem
>> >> > debugging). However, it comes at the cost of running some potentially
>> >> > dangerous code (since it has to make very few assumption about the state
>> >> > of the driver) that is quite resource intensive.
>> >> >
>> >> > Signed-off-by: Chris Wilson 
>> >> > Reviewed-by: Joonas Lahtinen 
>> >> > ---
>> >> >  drivers/gpu/drm/i915/Kconfig  | 10 ++
>> >> >  drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++
>> >> >  drivers/gpu/drm/i915/i915_drv.h   | 16 
>> >> >  drivers/gpu/drm/i915/i915_gpu_error.c |  7 +++
>> >> >  drivers/gpu/drm/i915/i915_params.c|  9 +
>> >> >  drivers/gpu/drm/i915/i915_params.h|  1 +
>> >> >  drivers/gpu/drm/i915/i915_sysfs.c |  8 
>> >> >  drivers/gpu/drm/i915/intel_display.c  |  4 
>> >> >  drivers/gpu/drm/i915/intel_overlay.c  |  4 
>> >> >  9 files changed, 65 insertions(+)
>> >> >
>> >> > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
>> >> > index 7769e469118f..10a6ac11b6a9 100644
>> >> > --- a/drivers/gpu/drm/i915/Kconfig
>> >> > +++ b/drivers/gpu/drm/i915/Kconfig
>> >> > @@ -46,6 +46,16 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
>> >> >  
>> >> >   If in doubt, say "N".
>> >> >  
>> >> > +config DRM_I915_CAPTURE_ERROR
>> >> > +   bool "Enable capturing GPU state following a hang"
>> >> > +   depends on DRM_I915
>> >> > +   default y
>> >> > +   help
>> >> > + This option enables capturing the GPU state when a hang is 
>> >> > detected.
>> >> > + This information is vital for triaging hangs and assists in 
>> >> > debugging.
>> >> > +
>> >> > + If in doubt, say "Y".
>> >> > +
>> >> >  config DRM_I915_USERPTR
>> >> > bool "Always enable userptr support"
>> >> > depends on DRM_I915
>> >> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>> >> > b/drivers/gpu/drm/i915/i915_debugfs.c
>> >> > index 20689f1cd719..e4b5ba771bea 100644
>> >> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> >> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> >> > @@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void 
>> >> > *data)
>> >> > return 0;
>> >> >  }
>> >> >  
>> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> >> > +
>> >> >  static ssize_t
>> >> >  i915_error_state_write(struct file *filp,
>> >> >const char __user *ubuf,
>> >> > @@ -1042,6 +1044,8 @@ static const struct file_operations 
>> >> > i915_error_state_fops = {
>> >> > .release = i915_error_state_release,
>> >> >  };
>> >> >  
>> >> > +#endif
>> >> > +
>> >> >  static int
>> >> >  i915_next_seqno_get(void *data, u64 *val)
>> >> >  {
>> >> > @@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
>> >> > {"i915_ring_missed_irq", _ring_missed_irq_fops},
>> >> > {"i915_ring_test_irq", _ring_test_irq_fops},
>> >> > {"i915_gem_drop_caches", _drop_caches_fops},
>> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> >> > {"i915_error_state", _error_state_fops},
>> >> > +#endif
>> >> > {"i915_next_seqno", _next_seqno_fops},
>> >> > {"i915_display_crc_ctl", _display_crc_ctl_fops},
>> >> > {"i915_pri_wm_latency", _pri_wm_latency_fops},
>> >> > diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> >> > b/drivers/gpu/drm/i915/i915_drv.h
>> >> > index 54d860e1c0fc..4570c4fa0287 100644
>> >> > --- a/drivers/gpu/drm/i915/i915_drv.h
>> >> > +++ b/drivers/gpu/drm/i915/i915_drv.h
>> >> > @@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct 
>> >> > drm_i915_private *dev_priv) {}
>> >> >  #endif
>> >> >  
>> >> >  /* i915_gpu_error.c */
>> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> >> > +
>> >> >  __printf(2, 3)
>> >> >  void i915_error_printf(struct drm_i915_error_state_buf *e, const char 
>> >> > *f, ...);
>> >> >  int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
>> >> > @@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
>> >> >  void i915_error_state_put(struct i915_error_state_file_priv 
>> >> > *error_priv);
>> >> >  void i915_destroy_error_state(struct drm_device *dev);
>> >> >  
>> >> > +#else
>> >> > +
>> >> > +static inline void i915_capture_error_state(struct drm_i915_private 
>> >> > *dev_priv,
>> >> > +   u32 engine_mask,
>> >> > +   const char *error_msg)
>> >> > +{
>> >> > +}
>> >> > +

Re: [Intel-gfx] [CI 1/5] drm/i915: Allow disabling error capture

2016-10-11 Thread Chris Wilson
On Tue, Oct 11, 2016 at 01:16:42PM +0300, Jani Nikula wrote:
> On Tue, 11 Oct 2016, Chris Wilson  wrote:
> > On Tue, Oct 11, 2016 at 12:52:00PM +0300, Jani Nikula wrote:
> >> On Tue, 11 Oct 2016, Chris Wilson  wrote:
> >> > We currently capture the GPU state after we detect a hang. This is vital
> >> > for us to both triage and debug hangs in the wild (post-mortem
> >> > debugging). However, it comes at the cost of running some potentially
> >> > dangerous code (since it has to make very few assumption about the state
> >> > of the driver) that is quite resource intensive.
> >> >
> >> > Signed-off-by: Chris Wilson 
> >> > Reviewed-by: Joonas Lahtinen 
> >> > ---
> >> >  drivers/gpu/drm/i915/Kconfig  | 10 ++
> >> >  drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++
> >> >  drivers/gpu/drm/i915/i915_drv.h   | 16 
> >> >  drivers/gpu/drm/i915/i915_gpu_error.c |  7 +++
> >> >  drivers/gpu/drm/i915/i915_params.c|  9 +
> >> >  drivers/gpu/drm/i915/i915_params.h|  1 +
> >> >  drivers/gpu/drm/i915/i915_sysfs.c |  8 
> >> >  drivers/gpu/drm/i915/intel_display.c  |  4 
> >> >  drivers/gpu/drm/i915/intel_overlay.c  |  4 
> >> >  9 files changed, 65 insertions(+)
> >> >
> >> > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> >> > index 7769e469118f..10a6ac11b6a9 100644
> >> > --- a/drivers/gpu/drm/i915/Kconfig
> >> > +++ b/drivers/gpu/drm/i915/Kconfig
> >> > @@ -46,6 +46,16 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
> >> >  
> >> >If in doubt, say "N".
> >> >  
> >> > +config DRM_I915_CAPTURE_ERROR
> >> > +bool "Enable capturing GPU state following a hang"
> >> > +depends on DRM_I915
> >> > +default y
> >> > +help
> >> > +  This option enables capturing the GPU state when a hang is 
> >> > detected.
> >> > +  This information is vital for triaging hangs and assists in 
> >> > debugging.
> >> > +
> >> > +  If in doubt, say "Y".
> >> > +
> >> >  config DRM_I915_USERPTR
> >> >  bool "Always enable userptr support"
> >> >  depends on DRM_I915
> >> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> >> > b/drivers/gpu/drm/i915/i915_debugfs.c
> >> > index 20689f1cd719..e4b5ba771bea 100644
> >> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> >> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >> > @@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void 
> >> > *data)
> >> >  return 0;
> >> >  }
> >> >  
> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> >> > +
> >> >  static ssize_t
> >> >  i915_error_state_write(struct file *filp,
> >> > const char __user *ubuf,
> >> > @@ -1042,6 +1044,8 @@ static const struct file_operations 
> >> > i915_error_state_fops = {
> >> >  .release = i915_error_state_release,
> >> >  };
> >> >  
> >> > +#endif
> >> > +
> >> >  static int
> >> >  i915_next_seqno_get(void *data, u64 *val)
> >> >  {
> >> > @@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
> >> >  {"i915_ring_missed_irq", _ring_missed_irq_fops},
> >> >  {"i915_ring_test_irq", _ring_test_irq_fops},
> >> >  {"i915_gem_drop_caches", _drop_caches_fops},
> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> >> >  {"i915_error_state", _error_state_fops},
> >> > +#endif
> >> >  {"i915_next_seqno", _next_seqno_fops},
> >> >  {"i915_display_crc_ctl", _display_crc_ctl_fops},
> >> >  {"i915_pri_wm_latency", _pri_wm_latency_fops},
> >> > diff --git a/drivers/gpu/drm/i915/i915_drv.h 
> >> > b/drivers/gpu/drm/i915/i915_drv.h
> >> > index 54d860e1c0fc..4570c4fa0287 100644
> >> > --- a/drivers/gpu/drm/i915/i915_drv.h
> >> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> >> > @@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct 
> >> > drm_i915_private *dev_priv) {}
> >> >  #endif
> >> >  
> >> >  /* i915_gpu_error.c */
> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> >> > +
> >> >  __printf(2, 3)
> >> >  void i915_error_printf(struct drm_i915_error_state_buf *e, const char 
> >> > *f, ...);
> >> >  int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
> >> > @@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
> >> >  void i915_error_state_put(struct i915_error_state_file_priv 
> >> > *error_priv);
> >> >  void i915_destroy_error_state(struct drm_device *dev);
> >> >  
> >> > +#else
> >> > +
> >> > +static inline void i915_capture_error_state(struct drm_i915_private 
> >> > *dev_priv,
> >> > +u32 engine_mask,
> >> > +const char *error_msg)
> >> > +{
> >> > +}
> >> > +
> >> > +static inline void i915_destroy_error_state(struct drm_device *dev)
> >> > +{
> >> > +}
> >> > +
> >> > +#endif
> >> > +
> >> >  void 

Re: [Intel-gfx] [CI 1/5] drm/i915: Allow disabling error capture

2016-10-11 Thread Jani Nikula
On Tue, 11 Oct 2016, Chris Wilson  wrote:
> On Tue, Oct 11, 2016 at 12:52:00PM +0300, Jani Nikula wrote:
>> On Tue, 11 Oct 2016, Chris Wilson  wrote:
>> > We currently capture the GPU state after we detect a hang. This is vital
>> > for us to both triage and debug hangs in the wild (post-mortem
>> > debugging). However, it comes at the cost of running some potentially
>> > dangerous code (since it has to make very few assumption about the state
>> > of the driver) that is quite resource intensive.
>> >
>> > Signed-off-by: Chris Wilson 
>> > Reviewed-by: Joonas Lahtinen 
>> > ---
>> >  drivers/gpu/drm/i915/Kconfig  | 10 ++
>> >  drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++
>> >  drivers/gpu/drm/i915/i915_drv.h   | 16 
>> >  drivers/gpu/drm/i915/i915_gpu_error.c |  7 +++
>> >  drivers/gpu/drm/i915/i915_params.c|  9 +
>> >  drivers/gpu/drm/i915/i915_params.h|  1 +
>> >  drivers/gpu/drm/i915/i915_sysfs.c |  8 
>> >  drivers/gpu/drm/i915/intel_display.c  |  4 
>> >  drivers/gpu/drm/i915/intel_overlay.c  |  4 
>> >  9 files changed, 65 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
>> > index 7769e469118f..10a6ac11b6a9 100644
>> > --- a/drivers/gpu/drm/i915/Kconfig
>> > +++ b/drivers/gpu/drm/i915/Kconfig
>> > @@ -46,6 +46,16 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
>> >  
>> >  If in doubt, say "N".
>> >  
>> > +config DRM_I915_CAPTURE_ERROR
>> > +  bool "Enable capturing GPU state following a hang"
>> > +  depends on DRM_I915
>> > +  default y
>> > +  help
>> > +This option enables capturing the GPU state when a hang is detected.
>> > +This information is vital for triaging hangs and assists in debugging.
>> > +
>> > +If in doubt, say "Y".
>> > +
>> >  config DRM_I915_USERPTR
>> >bool "Always enable userptr support"
>> >depends on DRM_I915
>> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>> > b/drivers/gpu/drm/i915/i915_debugfs.c
>> > index 20689f1cd719..e4b5ba771bea 100644
>> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> > @@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void 
>> > *data)
>> >return 0;
>> >  }
>> >  
>> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> > +
>> >  static ssize_t
>> >  i915_error_state_write(struct file *filp,
>> >   const char __user *ubuf,
>> > @@ -1042,6 +1044,8 @@ static const struct file_operations 
>> > i915_error_state_fops = {
>> >.release = i915_error_state_release,
>> >  };
>> >  
>> > +#endif
>> > +
>> >  static int
>> >  i915_next_seqno_get(void *data, u64 *val)
>> >  {
>> > @@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
>> >{"i915_ring_missed_irq", _ring_missed_irq_fops},
>> >{"i915_ring_test_irq", _ring_test_irq_fops},
>> >{"i915_gem_drop_caches", _drop_caches_fops},
>> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> >{"i915_error_state", _error_state_fops},
>> > +#endif
>> >{"i915_next_seqno", _next_seqno_fops},
>> >{"i915_display_crc_ctl", _display_crc_ctl_fops},
>> >{"i915_pri_wm_latency", _pri_wm_latency_fops},
>> > diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> > b/drivers/gpu/drm/i915/i915_drv.h
>> > index 54d860e1c0fc..4570c4fa0287 100644
>> > --- a/drivers/gpu/drm/i915/i915_drv.h
>> > +++ b/drivers/gpu/drm/i915/i915_drv.h
>> > @@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct 
>> > drm_i915_private *dev_priv) {}
>> >  #endif
>> >  
>> >  /* i915_gpu_error.c */
>> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> > +
>> >  __printf(2, 3)
>> >  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, 
>> > ...);
>> >  int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
>> > @@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
>> >  void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
>> >  void i915_destroy_error_state(struct drm_device *dev);
>> >  
>> > +#else
>> > +
>> > +static inline void i915_capture_error_state(struct drm_i915_private 
>> > *dev_priv,
>> > +  u32 engine_mask,
>> > +  const char *error_msg)
>> > +{
>> > +}
>> > +
>> > +static inline void i915_destroy_error_state(struct drm_device *dev)
>> > +{
>> > +}
>> > +
>> > +#endif
>> > +
>> >  void i915_get_engine_instdone(struct drm_i915_private *dev_priv,
>> >  enum intel_engine_id engine_id,
>> >  struct intel_instdone *instdone);
>> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
>> > b/drivers/gpu/drm/i915/i915_gpu_error.c
>> > index b5b58692ac5a..9b395ffa3b6a 100644
>> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> > @@ -30,6 +30,8 @@
>> >  

Re: [Intel-gfx] [CI 1/5] drm/i915: Allow disabling error capture

2016-10-11 Thread Chris Wilson
On Tue, Oct 11, 2016 at 12:52:00PM +0300, Jani Nikula wrote:
> On Tue, 11 Oct 2016, Chris Wilson  wrote:
> > We currently capture the GPU state after we detect a hang. This is vital
> > for us to both triage and debug hangs in the wild (post-mortem
> > debugging). However, it comes at the cost of running some potentially
> > dangerous code (since it has to make very few assumption about the state
> > of the driver) that is quite resource intensive.
> >
> > Signed-off-by: Chris Wilson 
> > Reviewed-by: Joonas Lahtinen 
> > ---
> >  drivers/gpu/drm/i915/Kconfig  | 10 ++
> >  drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++
> >  drivers/gpu/drm/i915/i915_drv.h   | 16 
> >  drivers/gpu/drm/i915/i915_gpu_error.c |  7 +++
> >  drivers/gpu/drm/i915/i915_params.c|  9 +
> >  drivers/gpu/drm/i915/i915_params.h|  1 +
> >  drivers/gpu/drm/i915/i915_sysfs.c |  8 
> >  drivers/gpu/drm/i915/intel_display.c  |  4 
> >  drivers/gpu/drm/i915/intel_overlay.c  |  4 
> >  9 files changed, 65 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> > index 7769e469118f..10a6ac11b6a9 100644
> > --- a/drivers/gpu/drm/i915/Kconfig
> > +++ b/drivers/gpu/drm/i915/Kconfig
> > @@ -46,6 +46,16 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
> >  
> >   If in doubt, say "N".
> >  
> > +config DRM_I915_CAPTURE_ERROR
> > +   bool "Enable capturing GPU state following a hang"
> > +   depends on DRM_I915
> > +   default y
> > +   help
> > + This option enables capturing the GPU state when a hang is detected.
> > + This information is vital for triaging hangs and assists in debugging.
> > +
> > + If in doubt, say "Y".
> > +
> >  config DRM_I915_USERPTR
> > bool "Always enable userptr support"
> > depends on DRM_I915
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> > b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 20689f1cd719..e4b5ba771bea 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void *data)
> > return 0;
> >  }
> >  
> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> > +
> >  static ssize_t
> >  i915_error_state_write(struct file *filp,
> >const char __user *ubuf,
> > @@ -1042,6 +1044,8 @@ static const struct file_operations 
> > i915_error_state_fops = {
> > .release = i915_error_state_release,
> >  };
> >  
> > +#endif
> > +
> >  static int
> >  i915_next_seqno_get(void *data, u64 *val)
> >  {
> > @@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
> > {"i915_ring_missed_irq", _ring_missed_irq_fops},
> > {"i915_ring_test_irq", _ring_test_irq_fops},
> > {"i915_gem_drop_caches", _drop_caches_fops},
> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> > {"i915_error_state", _error_state_fops},
> > +#endif
> > {"i915_next_seqno", _next_seqno_fops},
> > {"i915_display_crc_ctl", _display_crc_ctl_fops},
> > {"i915_pri_wm_latency", _pri_wm_latency_fops},
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h 
> > b/drivers/gpu/drm/i915/i915_drv.h
> > index 54d860e1c0fc..4570c4fa0287 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct 
> > drm_i915_private *dev_priv) {}
> >  #endif
> >  
> >  /* i915_gpu_error.c */
> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> > +
> >  __printf(2, 3)
> >  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, 
> > ...);
> >  int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
> > @@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
> >  void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
> >  void i915_destroy_error_state(struct drm_device *dev);
> >  
> > +#else
> > +
> > +static inline void i915_capture_error_state(struct drm_i915_private 
> > *dev_priv,
> > +   u32 engine_mask,
> > +   const char *error_msg)
> > +{
> > +}
> > +
> > +static inline void i915_destroy_error_state(struct drm_device *dev)
> > +{
> > +}
> > +
> > +#endif
> > +
> >  void i915_get_engine_instdone(struct drm_i915_private *dev_priv,
> >   enum intel_engine_id engine_id,
> >   struct intel_instdone *instdone);
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
> > b/drivers/gpu/drm/i915/i915_gpu_error.c
> > index b5b58692ac5a..9b395ffa3b6a 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > @@ -30,6 +30,8 @@
> >  #include 
> >  #include "i915_drv.h"
> >  
> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> > +
> >  static const char *engine_str(int engine)
> >  {
> > switch (engine) {
> > 

Re: [Intel-gfx] [CI 1/5] drm/i915: Allow disabling error capture

2016-10-11 Thread Jani Nikula
On Tue, 11 Oct 2016, Chris Wilson  wrote:
> We currently capture the GPU state after we detect a hang. This is vital
> for us to both triage and debug hangs in the wild (post-mortem
> debugging). However, it comes at the cost of running some potentially
> dangerous code (since it has to make very few assumption about the state
> of the driver) that is quite resource intensive.
>
> Signed-off-by: Chris Wilson 
> Reviewed-by: Joonas Lahtinen 
> ---
>  drivers/gpu/drm/i915/Kconfig  | 10 ++
>  drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++
>  drivers/gpu/drm/i915/i915_drv.h   | 16 
>  drivers/gpu/drm/i915/i915_gpu_error.c |  7 +++
>  drivers/gpu/drm/i915/i915_params.c|  9 +
>  drivers/gpu/drm/i915/i915_params.h|  1 +
>  drivers/gpu/drm/i915/i915_sysfs.c |  8 
>  drivers/gpu/drm/i915/intel_display.c  |  4 
>  drivers/gpu/drm/i915/intel_overlay.c  |  4 
>  9 files changed, 65 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 7769e469118f..10a6ac11b6a9 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -46,6 +46,16 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
>  
> If in doubt, say "N".
>  
> +config DRM_I915_CAPTURE_ERROR
> + bool "Enable capturing GPU state following a hang"
> + depends on DRM_I915
> + default y
> + help
> +   This option enables capturing the GPU state when a hang is detected.
> +   This information is vital for triaging hangs and assists in debugging.
> +
> +   If in doubt, say "Y".
> +
>  config DRM_I915_USERPTR
>   bool "Always enable userptr support"
>   depends on DRM_I915
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 20689f1cd719..e4b5ba771bea 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void *data)
>   return 0;
>  }
>  
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> +
>  static ssize_t
>  i915_error_state_write(struct file *filp,
>  const char __user *ubuf,
> @@ -1042,6 +1044,8 @@ static const struct file_operations 
> i915_error_state_fops = {
>   .release = i915_error_state_release,
>  };
>  
> +#endif
> +
>  static int
>  i915_next_seqno_get(void *data, u64 *val)
>  {
> @@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
>   {"i915_ring_missed_irq", _ring_missed_irq_fops},
>   {"i915_ring_test_irq", _ring_test_irq_fops},
>   {"i915_gem_drop_caches", _drop_caches_fops},
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>   {"i915_error_state", _error_state_fops},
> +#endif
>   {"i915_next_seqno", _next_seqno_fops},
>   {"i915_display_crc_ctl", _display_crc_ctl_fops},
>   {"i915_pri_wm_latency", _pri_wm_latency_fops},
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 54d860e1c0fc..4570c4fa0287 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct 
> drm_i915_private *dev_priv) {}
>  #endif
>  
>  /* i915_gpu_error.c */
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> +
>  __printf(2, 3)
>  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, 
> ...);
>  int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
> @@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
>  void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
>  void i915_destroy_error_state(struct drm_device *dev);
>  
> +#else
> +
> +static inline void i915_capture_error_state(struct drm_i915_private 
> *dev_priv,
> + u32 engine_mask,
> + const char *error_msg)
> +{
> +}
> +
> +static inline void i915_destroy_error_state(struct drm_device *dev)
> +{
> +}
> +
> +#endif
> +
>  void i915_get_engine_instdone(struct drm_i915_private *dev_priv,
> enum intel_engine_id engine_id,
> struct intel_instdone *instdone);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
> b/drivers/gpu/drm/i915/i915_gpu_error.c
> index b5b58692ac5a..9b395ffa3b6a 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -30,6 +30,8 @@
>  #include 
>  #include "i915_drv.h"
>  
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> +
>  static const char *engine_str(int engine)
>  {
>   switch (engine) {
> @@ -1464,6 +1466,9 @@ void i915_capture_error_state(struct drm_i915_private 
> *dev_priv,
>   struct drm_i915_error_state *error;
>   unsigned long flags;
>  
> + if (!i915.error_capture)
> + return;
> +
>   if 

[Intel-gfx] [CI 1/5] drm/i915: Allow disabling error capture

2016-10-11 Thread Chris Wilson
We currently capture the GPU state after we detect a hang. This is vital
for us to both triage and debug hangs in the wild (post-mortem
debugging). However, it comes at the cost of running some potentially
dangerous code (since it has to make very few assumption about the state
of the driver) that is quite resource intensive.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/Kconfig  | 10 ++
 drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++
 drivers/gpu/drm/i915/i915_drv.h   | 16 
 drivers/gpu/drm/i915/i915_gpu_error.c |  7 +++
 drivers/gpu/drm/i915/i915_params.c|  9 +
 drivers/gpu/drm/i915/i915_params.h|  1 +
 drivers/gpu/drm/i915/i915_sysfs.c |  8 
 drivers/gpu/drm/i915/intel_display.c  |  4 
 drivers/gpu/drm/i915/intel_overlay.c  |  4 
 9 files changed, 65 insertions(+)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 7769e469118f..10a6ac11b6a9 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -46,6 +46,16 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
 
  If in doubt, say "N".
 
+config DRM_I915_CAPTURE_ERROR
+   bool "Enable capturing GPU state following a hang"
+   depends on DRM_I915
+   default y
+   help
+ This option enables capturing the GPU state when a hang is detected.
+ This information is vital for triaging hangs and assists in debugging.
+
+ If in doubt, say "Y".
+
 config DRM_I915_USERPTR
bool "Always enable userptr support"
depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 20689f1cd719..e4b5ba771bea 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void *data)
return 0;
 }
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 static ssize_t
 i915_error_state_write(struct file *filp,
   const char __user *ubuf,
@@ -1042,6 +1044,8 @@ static const struct file_operations i915_error_state_fops 
= {
.release = i915_error_state_release,
 };
 
+#endif
+
 static int
 i915_next_seqno_get(void *data, u64 *val)
 {
@@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
{"i915_ring_missed_irq", _ring_missed_irq_fops},
{"i915_ring_test_irq", _ring_test_irq_fops},
{"i915_gem_drop_caches", _drop_caches_fops},
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
{"i915_error_state", _error_state_fops},
+#endif
{"i915_next_seqno", _next_seqno_fops},
{"i915_display_crc_ctl", _display_crc_ctl_fops},
{"i915_pri_wm_latency", _pri_wm_latency_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 54d860e1c0fc..4570c4fa0287 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct 
drm_i915_private *dev_priv) {}
 #endif
 
 /* i915_gpu_error.c */
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
@@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
 void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
 void i915_destroy_error_state(struct drm_device *dev);
 
+#else
+
+static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
+   u32 engine_mask,
+   const char *error_msg)
+{
+}
+
+static inline void i915_destroy_error_state(struct drm_device *dev)
+{
+}
+
+#endif
+
 void i915_get_engine_instdone(struct drm_i915_private *dev_priv,
  enum intel_engine_id engine_id,
  struct intel_instdone *instdone);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index b5b58692ac5a..9b395ffa3b6a 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -30,6 +30,8 @@
 #include 
 #include "i915_drv.h"
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 static const char *engine_str(int engine)
 {
switch (engine) {
@@ -1464,6 +1466,9 @@ void i915_capture_error_state(struct drm_i915_private 
*dev_priv,
struct drm_i915_error_state *error;
unsigned long flags;
 
+   if (!i915.error_capture)
+   return;
+
if (READ_ONCE(dev_priv->gpu_error.first_error))
return;
 
@@ -1549,6 +1554,8 @@ void i915_destroy_error_state(struct drm_device *dev)
kref_put(>ref, i915_error_state_free);
 }
 
+#endif
+
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
 {
switch