[PATCH v8 02/12] drm/i915: Add i915 perf infrastructure

2016-11-07 Thread sourab gupta
On Fri, 2016-11-04 at 06:19 -0700, Robert Bragg wrote:
> 
> 
> On Fri, Nov 4, 2016 at 8:59 AM, sourab gupta 
> wrote:
> On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote:
> > Adds base i915 perf infrastructure for Gen performance
> metrics.
> >
> > This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an
> array of uint64
> > properties to configure a stream of metrics and returns a
> new fd usable
> > with standard VFS system calls including read() to read
> typed and sized
> > records; ioctl() to enable or disable capture and poll() to
> wait for
> > data.
> >
> > A stream is opened something like:
> >
> >   uint64_t properties[] = {
> >   /* Single context sampling */
> >   DRM_I915_PERF_PROP_CTX_HANDLE,ctx_handle,
> >
> >   /* Include OA reports in samples */
> >   DRM_I915_PERF_PROP_SAMPLE_OA, true,
> >
> >   /* OA unit configuration */
> >   DRM_I915_PERF_PROP_OA_METRICS_SET,metrics_set_id,
> >   DRM_I915_PERF_PROP_OA_FORMAT, report_format,
> >   DRM_I915_PERF_PROP_OA_EXPONENT,   period_exponent,
> >};
> >struct drm_i915_perf_open_param parm = {
> >   .flags = I915_PERF_FLAG_FD_CLOEXEC |
> >I915_PERF_FLAG_FD_NONBLOCK |
> >I915_PERF_FLAG_DISABLED,
> >   .properties_ptr = (uint64_t)properties,
> >   .num_properties = sizeof(properties) / 16,
> >};
> >int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN,
> );
> >
> > Records read all start with a common { type, size } header
> with
> > DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample
> records
> > contain an extensible number of fields and it's the
> > DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening
> that
> > determine what's included in every sample.
> >
> > No specific streams are supported yet so any attempt to open
> a stream
> > will return an error.
> >
> > v2:
> > use i915_gem_context_get() - Chris Wilson
> > v3:
> > update read() interface to avoid passing state struct -
> Chris Wilson
> > fix some rebase fallout, with i915-perf init/deinit
> > v4:
> > s/DRM_IORW/DRM_IOW/ - Emil Velikov
> >
> > Signed-off-by: Robert Bragg 
> > ---
> >  drivers/gpu/drm/i915/Makefile|   3 +
> >  drivers/gpu/drm/i915/i915_drv.c  |   4 +
> >  drivers/gpu/drm/i915/i915_drv.h  |  91 
> >  drivers/gpu/drm/i915/i915_perf.c | 443
> +++
> >  include/uapi/drm/i915_drm.h  |  67 ++
> >  5 files changed, 608 insertions(+)
> >  create mode 100644 drivers/gpu/drm/i915/i915_perf.c
> >
> > diff --git a/drivers/gpu/drm/i915/Makefile
> b/drivers/gpu/drm/i915/Makefile
> > index 6123400..8d4e25f 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -113,6 +113,9 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) +=
> i915_gpu_error.o
> >  # virtual gpu code
> >  i915-y += i915_vgpu.o
> >
> > +# perf code
> > +i915-y += i915_perf.o
> > +
> >  ifeq ($(CONFIG_DRM_I915_GVT),y)
> >  i915-y += intel_gvt.o
> >  include $(src)/gvt/Makefile
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c
> b/drivers/gpu/drm/i915/i915_drv.c
> > index af3559d..685c96e 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -836,6 +836,8 @@ static int i915_driver_init_early(struct
> drm_i915_private *dev_priv,
> >
> >   intel_detect_preproduction_hw(dev_priv);
> >
> > + i915_perf_init(dev_priv);
> > +
> >   return 0;
> >
> >  err_workqueues:
> > @@ -849,6 +851,7 @@ static int i915_driver_init_early(struct
> drm_i915_private *dev_priv,
> >   */
> >  static void i915_driver_cleanup_early(struct
> drm_i915_private *dev_priv)
> >  {
> > + i915_perf_fini(dev_priv);
> >   i915_gem_load_cleanup(_priv->drm);
> >   i915_workqueues_cleanup(dev_priv);
> >  }
> > @@ -2556,6 +2559,7 @@ static const struct drm_ioctl_desc
> i915_ioctls[] = {
> >   DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR,
> i915_gem_userptr_ioctl, DRM_RENDER_ALLOW),
> >   DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM,
> 

[PATCH v8 02/12] drm/i915: Add i915 perf infrastructure

2016-11-04 Thread sourab gupta
On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote:
> Adds base i915 perf infrastructure for Gen performance metrics.
> 
> This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
> properties to configure a stream of metrics and returns a new fd usable
> with standard VFS system calls including read() to read typed and sized
> records; ioctl() to enable or disable capture and poll() to wait for
> data.
> 
> A stream is opened something like:
> 
>   uint64_t properties[] = {
>   /* Single context sampling */
>   DRM_I915_PERF_PROP_CTX_HANDLE,ctx_handle,
> 
>   /* Include OA reports in samples */
>   DRM_I915_PERF_PROP_SAMPLE_OA, true,
> 
>   /* OA unit configuration */
>   DRM_I915_PERF_PROP_OA_METRICS_SET,metrics_set_id,
>   DRM_I915_PERF_PROP_OA_FORMAT, report_format,
>   DRM_I915_PERF_PROP_OA_EXPONENT,   period_exponent,
>};
>struct drm_i915_perf_open_param parm = {
>   .flags = I915_PERF_FLAG_FD_CLOEXEC |
>I915_PERF_FLAG_FD_NONBLOCK |
>I915_PERF_FLAG_DISABLED,
>   .properties_ptr = (uint64_t)properties,
>   .num_properties = sizeof(properties) / 16,
>};
>int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, );
> 
> Records read all start with a common { type, size } header with
> DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
> contain an extensible number of fields and it's the
> DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
> determine what's included in every sample.
> 
> No specific streams are supported yet so any attempt to open a stream
> will return an error.
> 
> v2:
> use i915_gem_context_get() - Chris Wilson
> v3:
> update read() interface to avoid passing state struct - Chris Wilson
> fix some rebase fallout, with i915-perf init/deinit
> v4:
> s/DRM_IORW/DRM_IOW/ - Emil Velikov
> 
> Signed-off-by: Robert Bragg 
> ---
>  drivers/gpu/drm/i915/Makefile|   3 +
>  drivers/gpu/drm/i915/i915_drv.c  |   4 +
>  drivers/gpu/drm/i915/i915_drv.h  |  91 
>  drivers/gpu/drm/i915/i915_perf.c | 443 
> +++
>  include/uapi/drm/i915_drm.h  |  67 ++
>  5 files changed, 608 insertions(+)
>  create mode 100644 drivers/gpu/drm/i915/i915_perf.c
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 6123400..8d4e25f 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -113,6 +113,9 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
>  # virtual gpu code
>  i915-y += i915_vgpu.o
>  
> +# perf code
> +i915-y += i915_perf.o
> +
>  ifeq ($(CONFIG_DRM_I915_GVT),y)
>  i915-y += intel_gvt.o
>  include $(src)/gvt/Makefile
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index af3559d..685c96e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -836,6 +836,8 @@ static int i915_driver_init_early(struct drm_i915_private 
> *dev_priv,
>  
>   intel_detect_preproduction_hw(dev_priv);
>  
> + i915_perf_init(dev_priv);
> +
>   return 0;
>  
>  err_workqueues:
> @@ -849,6 +851,7 @@ static int i915_driver_init_early(struct drm_i915_private 
> *dev_priv,
>   */
>  static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
>  {
> + i915_perf_fini(dev_priv);
>   i915_gem_load_cleanup(_priv->drm);
>   i915_workqueues_cleanup(dev_priv);
>  }
> @@ -2556,6 +2559,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
>   DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, 
> DRM_RENDER_ALLOW),
>   DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, 
> i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW),
>   DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, 
> i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, 
> DRM_RENDER_ALLOW),
>  };
>  
>  static struct drm_driver driver = {
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5a260db..7a65c0b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1767,6 +1767,84 @@ struct intel_wm_config {
>   bool sprites_scaled;
>  };
>  
> +struct i915_perf_stream;
> +
> +struct i915_perf_stream_ops {
> + /* Enables the collection of HW samples, either in response to
> +  * I915_PERF_IOCTL_ENABLE or implicitly called when stream is
> +  * opened without I915_PERF_FLAG_DISABLED.
> +  */
> + void (*enable)(struct i915_perf_stream *stream);
> +
> + /* Disables the collection of HW samples, either in response to
> +  * I915_PERF_IOCTL_DISABLE or implicitly called before
> +  * destroying the stream.
> +  */
> + void (*disable)(struct i915_perf_stream *stream);
> +
> + /* Return: true if any i915 perf records are ready to read()
> +  * for this stream.
> +  */
> + 

[PATCH v8 02/12] drm/i915: Add i915 perf infrastructure

2016-11-04 Thread Robert Bragg
On Fri, Nov 4, 2016 at 8:59 AM, sourab gupta  wrote:

> On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote:
> > Adds base i915 perf infrastructure for Gen performance metrics.
> >
> > This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
> > properties to configure a stream of metrics and returns a new fd usable
> > with standard VFS system calls including read() to read typed and sized
> > records; ioctl() to enable or disable capture and poll() to wait for
> > data.
> >
> > A stream is opened something like:
> >
> >   uint64_t properties[] = {
> >   /* Single context sampling */
> >   DRM_I915_PERF_PROP_CTX_HANDLE,ctx_handle,
> >
> >   /* Include OA reports in samples */
> >   DRM_I915_PERF_PROP_SAMPLE_OA, true,
> >
> >   /* OA unit configuration */
> >   DRM_I915_PERF_PROP_OA_METRICS_SET,metrics_set_id,
> >   DRM_I915_PERF_PROP_OA_FORMAT, report_format,
> >   DRM_I915_PERF_PROP_OA_EXPONENT,   period_exponent,
> >};
> >struct drm_i915_perf_open_param parm = {
> >   .flags = I915_PERF_FLAG_FD_CLOEXEC |
> >I915_PERF_FLAG_FD_NONBLOCK |
> >I915_PERF_FLAG_DISABLED,
> >   .properties_ptr = (uint64_t)properties,
> >   .num_properties = sizeof(properties) / 16,
> >};
> >int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, );
> >
> > Records read all start with a common { type, size } header with
> > DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
> > contain an extensible number of fields and it's the
> > DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
> > determine what's included in every sample.
> >
> > No specific streams are supported yet so any attempt to open a stream
> > will return an error.
> >
> > v2:
> > use i915_gem_context_get() - Chris Wilson
> > v3:
> > update read() interface to avoid passing state struct - Chris Wilson
> > fix some rebase fallout, with i915-perf init/deinit
> > v4:
> > s/DRM_IORW/DRM_IOW/ - Emil Velikov
> >
> > Signed-off-by: Robert Bragg 
> > ---
> >  drivers/gpu/drm/i915/Makefile|   3 +
> >  drivers/gpu/drm/i915/i915_drv.c  |   4 +
> >  drivers/gpu/drm/i915/i915_drv.h  |  91 
> >  drivers/gpu/drm/i915/i915_perf.c | 443 ++
> +
> >  include/uapi/drm/i915_drm.h  |  67 ++
> >  5 files changed, 608 insertions(+)
> >  create mode 100644 drivers/gpu/drm/i915/i915_perf.c
> >
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/
> Makefile
> > index 6123400..8d4e25f 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -113,6 +113,9 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) +=
> i915_gpu_error.o
> >  # virtual gpu code
> >  i915-y += i915_vgpu.o
> >
> > +# perf code
> > +i915-y += i915_perf.o
> > +
> >  ifeq ($(CONFIG_DRM_I915_GVT),y)
> >  i915-y += intel_gvt.o
> >  include $(src)/gvt/Makefile
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c
> b/drivers/gpu/drm/i915/i915_drv.c
> > index af3559d..685c96e 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -836,6 +836,8 @@ static int i915_driver_init_early(struct
> drm_i915_private *dev_priv,
> >
> >   intel_detect_preproduction_hw(dev_priv);
> >
> > + i915_perf_init(dev_priv);
> > +
> >   return 0;
> >
> >  err_workqueues:
> > @@ -849,6 +851,7 @@ static int i915_driver_init_early(struct
> drm_i915_private *dev_priv,
> >   */
> >  static void i915_driver_cleanup_early(struct drm_i915_private
> *dev_priv)
> >  {
> > + i915_perf_fini(dev_priv);
> >   i915_gem_load_cleanup(_priv->drm);
> >   i915_workqueues_cleanup(dev_priv);
> >  }
> > @@ -2556,6 +2559,7 @@ static const struct drm_ioctl_desc i915_ioctls[] =
> {
> >   DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl,
> DRM_RENDER_ALLOW),
> >   DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM,
> i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW),
> >   DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM,
> i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW),
> > + DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl,
> DRM_RENDER_ALLOW),
> >  };
> >
> >  static struct drm_driver driver = {
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> > index 5a260db..7a65c0b 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1767,6 +1767,84 @@ struct intel_wm_config {
> >   bool sprites_scaled;
> >  };
> >
> > +struct i915_perf_stream;
> > +
> > +struct i915_perf_stream_ops {
> > + /* Enables the collection of HW samples, either in response to
> > +  * I915_PERF_IOCTL_ENABLE or implicitly called when stream is
> > +  * opened without I915_PERF_FLAG_DISABLED.
> > +  */
> > + void (*enable)(struct i915_perf_stream *stream);
> > +
> > + /* Disables the collection of HW samples, either in response to
> > +

[PATCH v8 02/12] drm/i915: Add i915 perf infrastructure

2016-10-31 Thread Robert Bragg
On Mon, Oct 31, 2016 at 5:13 PM, Matthew Auld <
matthew.william.auld at gmail.com> wrote:

> On 31 October 2016 at 16:27, Robert Bragg  wrote:
> >
> >
> > On Fri, Oct 28, 2016 at 3:27 PM, Matthew Auld
> >  wrote:
> >>
> >> > +/* Note we copy the properties from userspace outside of the i915
> perf
> >> > + * mutex to avoid an awkward lockdep with mmap_sem.
> >> > + *
> >> > + * Note this function only validates properties in isolation it
> doesn't
> >> > + * validate that the combination of properties makes sense or that
> all
> >> > + * properties necessary for a particular kind of stream have been
> set.
> >> > + */
> >> > +static int read_properties_unlocked(struct drm_i915_private
> *dev_priv,
> >> > +   u64 __user *uprops,
> >> > +   u32 n_props,
> >> > +   struct perf_open_properties
> *props)
> >> > +{
> >> > +   u64 __user *uprop = uprops;
> >> > +   int i;
> >> > +
> >> > +   memset(props, 0, sizeof(struct perf_open_properties));
> >> > +
> >> > +   if (!n_props) {
> >> > +   DRM_ERROR("No i915 perf properties given");
> >> > +   return -EINVAL;
> >> > +   }
> >> > +
> >> > +   if (n_props > DRM_I915_PERF_PROP_MAX) {
> >> Ah but DRM_I915_PERF_PROP_MAX is not a property itself.
> >
> >
> > I'm not sure I follow what your implied concern is?
> >
> > This is just a sanity check for the number properties given by userspace,
> > based on the assumption that there's currently no reason for multiple
> values
> > with a particular property id.
> >
> All I meant was should it not be n_props >= DRM_I915_PERF_PROP_MAX ?


> So with that fixed, or if I'm completely mad:
> Reviewed-by: Matthew Auld 
>

Ah, I see. Actually tbh I think either is reasonable...

The check is mainly about ruling out the silly large values that could be
given, imposing a upper-bound to the number of properties expected from
userspace. It might help catch userspace giving garbage/undefined data, or
block attempts to get the kernel parsing huge amounts of property data
which should never be necessary for configuring a stream. It doesn't e.g.
stop userspace specifying duplicate property IDs even if they supply less
than the maximum allowed. So even if it allowed say 2x the number of
properties I think it would still pretty much do its job.

I could imagine in the future the same check might become much more fuzzy
if we have a case where userspace might need to legitimately specify the
same property ID multiple times (where the sequential order is relevant).

_PERF_PROP_MAX is the last in the enum whereby we can interpret it as an
upper bound on the number of properties while we don't currently expect to
see property IDs duplicated.

The detail here though is that ID 0 is reserved so _PERF_PROP_MAX is more
like ('the maximum number of properties' + 1) - and so this is what you're
essentially highlighting.

I can change this - maybe with a comment about ID 0 being reserved and
explaining the assumption that property ID duplicates aren't currently
expected

Thanks for the review!
-- next part --
An HTML attachment was scrubbed...
URL: 



[PATCH v8 02/12] drm/i915: Add i915 perf infrastructure

2016-10-31 Thread Matthew Auld
On 31 October 2016 at 16:27, Robert Bragg  wrote:
>
>
> On Fri, Oct 28, 2016 at 3:27 PM, Matthew Auld
>  wrote:
>>
>> > +/* Note we copy the properties from userspace outside of the i915 perf
>> > + * mutex to avoid an awkward lockdep with mmap_sem.
>> > + *
>> > + * Note this function only validates properties in isolation it doesn't
>> > + * validate that the combination of properties makes sense or that all
>> > + * properties necessary for a particular kind of stream have been set.
>> > + */
>> > +static int read_properties_unlocked(struct drm_i915_private *dev_priv,
>> > +   u64 __user *uprops,
>> > +   u32 n_props,
>> > +   struct perf_open_properties *props)
>> > +{
>> > +   u64 __user *uprop = uprops;
>> > +   int i;
>> > +
>> > +   memset(props, 0, sizeof(struct perf_open_properties));
>> > +
>> > +   if (!n_props) {
>> > +   DRM_ERROR("No i915 perf properties given");
>> > +   return -EINVAL;
>> > +   }
>> > +
>> > +   if (n_props > DRM_I915_PERF_PROP_MAX) {
>> Ah but DRM_I915_PERF_PROP_MAX is not a property itself.
>
>
> I'm not sure I follow what your implied concern is?
>
> This is just a sanity check for the number properties given by userspace,
> based on the assumption that there's currently no reason for multiple values
> with a particular property id.
>
All I meant was should it not be n_props >= DRM_I915_PERF_PROP_MAX ?

So with that fixed, or if I'm completely mad:
Reviewed-by: Matthew Auld 


[PATCH v8 02/12] drm/i915: Add i915 perf infrastructure

2016-10-31 Thread Robert Bragg
On Fri, Oct 28, 2016 at 3:27 PM, Matthew Auld <
matthew.william.auld at gmail.com> wrote:

> > +/* Note we copy the properties from userspace outside of the i915 perf
> > + * mutex to avoid an awkward lockdep with mmap_sem.
> > + *
> > + * Note this function only validates properties in isolation it doesn't
> > + * validate that the combination of properties makes sense or that all
> > + * properties necessary for a particular kind of stream have been set.
> > + */
> > +static int read_properties_unlocked(struct drm_i915_private *dev_priv,
> > +   u64 __user *uprops,
> > +   u32 n_props,
> > +   struct perf_open_properties *props)
> > +{
> > +   u64 __user *uprop = uprops;
> > +   int i;
> > +
> > +   memset(props, 0, sizeof(struct perf_open_properties));
> > +
> > +   if (!n_props) {
> > +   DRM_ERROR("No i915 perf properties given");
> > +   return -EINVAL;
> > +   }
> > +
> > +   if (n_props > DRM_I915_PERF_PROP_MAX) {
> Ah but DRM_I915_PERF_PROP_MAX is not a property itself.
>

I'm not sure I follow what your implied concern is?

This is just a sanity check for the number properties given by userspace,
based on the assumption that there's currently no reason for multiple
values with a particular property id.
-- next part --
An HTML attachment was scrubbed...
URL: 



[PATCH v8 02/12] drm/i915: Add i915 perf infrastructure

2016-10-28 Thread Matthew Auld
> +/* Note we copy the properties from userspace outside of the i915 perf
> + * mutex to avoid an awkward lockdep with mmap_sem.
> + *
> + * Note this function only validates properties in isolation it doesn't
> + * validate that the combination of properties makes sense or that all
> + * properties necessary for a particular kind of stream have been set.
> + */
> +static int read_properties_unlocked(struct drm_i915_private *dev_priv,
> +   u64 __user *uprops,
> +   u32 n_props,
> +   struct perf_open_properties *props)
> +{
> +   u64 __user *uprop = uprops;
> +   int i;
> +
> +   memset(props, 0, sizeof(struct perf_open_properties));
> +
> +   if (!n_props) {
> +   DRM_ERROR("No i915 perf properties given");
> +   return -EINVAL;
> +   }
> +
> +   if (n_props > DRM_I915_PERF_PROP_MAX) {
Ah but DRM_I915_PERF_PROP_MAX is not a property itself.


[PATCH v8 02/12] drm/i915: Add i915 perf infrastructure

2016-10-28 Thread Robert Bragg
Adds base i915 perf infrastructure for Gen performance metrics.

This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.

A stream is opened something like:

  uint64_t properties[] = {
  /* Single context sampling */
  DRM_I915_PERF_PROP_CTX_HANDLE,ctx_handle,

  /* Include OA reports in samples */
  DRM_I915_PERF_PROP_SAMPLE_OA, true,

  /* OA unit configuration */
  DRM_I915_PERF_PROP_OA_METRICS_SET,metrics_set_id,
  DRM_I915_PERF_PROP_OA_FORMAT, report_format,
  DRM_I915_PERF_PROP_OA_EXPONENT,   period_exponent,
   };
   struct drm_i915_perf_open_param parm = {
  .flags = I915_PERF_FLAG_FD_CLOEXEC |
   I915_PERF_FLAG_FD_NONBLOCK |
   I915_PERF_FLAG_DISABLED,
  .properties_ptr = (uint64_t)properties,
  .num_properties = sizeof(properties) / 16,
   };
   int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, );

Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.

No specific streams are supported yet so any attempt to open a stream
will return an error.

v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov

Signed-off-by: Robert Bragg 
---
 drivers/gpu/drm/i915/Makefile|   3 +
 drivers/gpu/drm/i915/i915_drv.c  |   4 +
 drivers/gpu/drm/i915/i915_drv.h  |  91 
 drivers/gpu/drm/i915/i915_perf.c | 443 +++
 include/uapi/drm/i915_drm.h  |  67 ++
 5 files changed, 608 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_perf.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 6123400..8d4e25f 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -113,6 +113,9 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
 # virtual gpu code
 i915-y += i915_vgpu.o

+# perf code
+i915-y += i915_perf.o
+
 ifeq ($(CONFIG_DRM_I915_GVT),y)
 i915-y += intel_gvt.o
 include $(src)/gvt/Makefile
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index af3559d..685c96e 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -836,6 +836,8 @@ static int i915_driver_init_early(struct drm_i915_private 
*dev_priv,

intel_detect_preproduction_hw(dev_priv);

+   i915_perf_init(dev_priv);
+
return 0;

 err_workqueues:
@@ -849,6 +851,7 @@ static int i915_driver_init_early(struct drm_i915_private 
*dev_priv,
  */
 static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
 {
+   i915_perf_fini(dev_priv);
i915_gem_load_cleanup(_priv->drm);
i915_workqueues_cleanup(dev_priv);
 }
@@ -2556,6 +2559,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, 
DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, 
i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, 
i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, 
DRM_RENDER_ALLOW),
 };

 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5a260db..7a65c0b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1767,6 +1767,84 @@ struct intel_wm_config {
bool sprites_scaled;
 };

+struct i915_perf_stream;
+
+struct i915_perf_stream_ops {
+   /* Enables the collection of HW samples, either in response to
+* I915_PERF_IOCTL_ENABLE or implicitly called when stream is
+* opened without I915_PERF_FLAG_DISABLED.
+*/
+   void (*enable)(struct i915_perf_stream *stream);
+
+   /* Disables the collection of HW samples, either in response to
+* I915_PERF_IOCTL_DISABLE or implicitly called before
+* destroying the stream.
+*/
+   void (*disable)(struct i915_perf_stream *stream);
+
+   /* Return: true if any i915 perf records are ready to read()
+* for this stream.
+*/
+   bool (*can_read)(struct i915_perf_stream *stream);
+
+   /* Call poll_wait, passing a wait queue that will be woken
+* once there is something ready to read() for the stream
+*/
+   void (*poll_wait)(struct i915_perf_stream *stream,
+