On Wed, Jul 15, 2015 at 02:21:46PM +0530, [email protected] wrote:
> From: Sourab Gupta <[email protected]>
> 
> This patch adds support for retrieving MMIO register values alongwith
> timestamps and forwarding them to userspace through perf.
> The userspace can request upto 8 MMIO register values to be dumped.
> The addresses of upto 8 MMIO registers can be passed through perf attr
> config. The registers are checked against a whitelist before passing them
> on. The commands to dump the values of these MMIO registers are then
> inserted into the ring alongwith commands to dump the timestamps.
> 
> Signed-off-by: Sourab Gupta <[email protected]>
> ---
>  drivers/gpu/drm/i915/i915_drv.h     |  2 +
>  drivers/gpu/drm/i915/i915_oa_perf.c | 87 
> ++++++++++++++++++++++++++++++++++---
>  include/uapi/drm/i915_drm.h         | 10 ++++-
>  3 files changed, 92 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f2fe8d0..e114175 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2017,7 +2017,9 @@ struct drm_i915_private {
>  #define I915_GEN_PMU_SAMPLE_RING             (1<<0)
>  #define I915_GEN_PMU_SAMPLE_PID                      (1<<1)
>  #define I915_GEN_PMU_SAMPLE_TAG                      (1<<2)
> +#define I915_GEN_PMU_SAMPLE_MMIO             (1<<3)
>               int sample_info_flags;
> +             u32 mmio_list[8];
>       } gen_pmu;
>  
>       void (*insert_profile_cmd[I915_PROFILE_MAX])
> diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c 
> b/drivers/gpu/drm/i915/i915_oa_perf.c
> index 1cc16ef..a9d44e0 100644
> --- a/drivers/gpu/drm/i915/i915_oa_perf.c
> +++ b/drivers/gpu/drm/i915/i915_oa_perf.c
> @@ -113,8 +113,8 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer 
> *ringbuf, u32 ctx_id,
>       struct drm_i915_gem_object *obj = dev_priv->gen_pmu.buffer.obj;
>       struct i915_gen_pmu_node *entry;
>       unsigned long lock_flags;
> -     u32 addr = 0;
> -     int ret;
> +     u32 mmio_addr, addr = 0;
> +     int ret, i;
>  
>       entry = kzalloc(sizeof(*entry), GFP_KERNEL);
>       if (entry == NULL) {
> @@ -150,6 +150,7 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer 
> *ringbuf, u32 ctx_id,
>       spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
>  
>       addr = i915_gem_obj_ggtt_offset(obj) + entry->offset;
> +     mmio_addr = addr + sizeof(struct drm_i915_ts_data);
>  
>       if (ring->id == RCS) {
>               ret = intel_ring_begin(ring, 6);
> @@ -177,6 +178,25 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer 
> *ringbuf, u32 ctx_id,
>               intel_ring_emit(ring, 0); /* imm high, must be zero */
>               intel_ring_advance(ring);
>       }
> +     for (i = 0; i < 8; i++) {
> +             if (0 == dev_priv->gen_pmu.mmio_list[i])
> +                     break;
> +
> +             addr = mmio_addr +
> +                     i * sizeof(dev_priv->gen_pmu.mmio_list[i]);
> +
> +             ret = intel_ring_begin(ring, 4);
> +             if (ret)
> +                     return;
> +
> +             intel_ring_emit(ring,
> +                             MI_STORE_REGISTER_MEM(1) |
> +                             MI_SRM_LRM_GLOBAL_GTT);
> +             intel_ring_emit(ring, dev_priv->gen_pmu.mmio_list[i]);
> +             intel_ring_emit(ring, addr);
> +             intel_ring_emit(ring, MI_NOOP);
> +             intel_ring_advance(ring);

Premature optimisation, but can we store the register array with a single 
command?

for (i = 0; i < 8; i++) {
        if (0 == dev_priv->gen_pmu.mmio_list[i])
                break;

ret = intel_ring_begin(ring, 2*i + 2);
if (ret)
        return;

intel_ring_emit(MI_STORE_REGISTER_MEM(i));
        for (i = 0; i < 8; i++) {
                if (0 == dev_priv->gen_pmu.mmio_list[i])
                        break;
                intel_ring_emit(dev_priv->gen_pmu.mmio_list[i]);
                intel_ring_emit(addr + i*4);
        }
intel_ring_emit(MI_NOOP);

You should also consider ensuring that we have a command barrier between
these and 3d operations. If only we had 
engine->emit_flush(I915_COMMAND_BARRIER).

> +/* Some embargoed entries missing from whitelist */
> +static const struct register_whitelist {
> +     uint64_t offset;
> +     uint32_t size;
> +     /* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
> +     uint32_t gen_bitmask;
> +} whitelist[] = {
> +     { GEN6_GT_GFX_RC6, 4, GEN_RANGE(7, 9) },
> +     { GEN6_GT_GFX_RC6p, 4, GEN_RANGE(7, 9) },
> +};
> +
> +static int check_mmio_whitelist(struct drm_i915_private *dev_priv,
> +                             struct drm_i915_gen_pmu_attr *gen_attr)

Just what I came looking for.

> +{
> +     struct register_whitelist const *entry = whitelist;
> +     int i, count;
> +
> +     for (count = 0; count < 8; count++) {

hardcoded value, ARRAY_SIZE(gen_attr->mmio_list)

> +             if (!gen_attr->mmio_list[count])
> +                     break;
> +
> +             for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {

Interesting choice of continuation in the ABI. Seems a litte forced.
Move the whitelist into the function, no one else should think of
accessing it (right?), then just use whitelist[i].

> +                     if (entry->offset == gen_attr->mmio_list[count] &&
> +                         (1 << INTEL_INFO(dev_priv->dev)->gen &

dev_priv->dev->dev_priv->info, you have to be kidding me.

INTEL_INFO(dev_priv) is the natural form.

> +                                     entry->gen_bitmask))
> +                             break;
> +             }
> +
> +             if (i == ARRAY_SIZE(whitelist))
> +                     return -EINVAL;
> +     }
> +     return 0;
> +}
> +
>  static int i915_gen_event_init(struct perf_event *event)
>  {
>       struct drm_i915_private *dev_priv =
> @@ -1674,6 +1740,17 @@ static int i915_gen_event_init(struct perf_event 
> *event)
>       if (gen_attr.sample_tag)
>               dev_priv->gen_pmu.sample_info_flags |= I915_GEN_PMU_SAMPLE_TAG;
>  
> +     if (gen_attr.sample_mmio) {
> +             ret = check_mmio_whitelist(dev_priv, &gen_attr);
> +             if (ret)
> +                     return ret;

Global hw state should only be inspectable by root (unless the system
has relaxed perf permissions).

> +             dev_priv->gen_pmu.sample_info_flags |=
> +                             I915_GEN_PMU_SAMPLE_MMIO;
> +             memcpy(dev_priv->gen_pmu.mmio_list, gen_attr.mmio_list,
> +                             sizeof(dev_priv->gen_pmu.mmio_list));
> +     }
> +
>       /* To avoid the complexity of having to accurately filter
>        * data and marshal to the appropriate client
>        * we currently only allow exclusive access */
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7ab4972..65bc39d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -81,7 +81,7 @@
>  
>  #define I915_OA_ATTR_SIZE_VER0               32  /* sizeof first published 
> struct */
>  
> -#define I915_GEN_PMU_ATTR_SIZE_VER0  8  /* sizeof first published struct */
> +#define I915_GEN_PMU_ATTR_SIZE_VER0  40  /* sizeof first published struct */
>  
>  typedef struct _drm_i915_oa_attr {
>       __u32 size;
> @@ -105,7 +105,9 @@ struct drm_i915_gen_pmu_attr {
>       __u32 sample_ring:1,
>               sample_pid:1,
>               sample_tag:1,
> -             __reserved_1:29;
> +             sample_mmio:1,
> +             __reserved_1:28;
> +     __u32 mmio_list[8];
>  };

seems like having a
BUILD_BUG_ON(sizeof(struct _drm_i915_oa_attr) != I915_GEN_PMU_ATTR_SIZE_VER0) 
is in order.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to