From: Sourab Gupta <[email protected]>

This patch adds support for retrieving MMIO register values alongwith
timestamps and forwarding them to userspace through perf.
The userspace can request upto 8 MMIO register values to be dumped.
The addresses of upto 8 MMIO registers can be passed through perf attr
config. The registers are checked against a whitelist before passing them
on. The commands to dump the values of these MMIO registers are then
inserted into the ring alongwith commands to dump the timestamps.

Signed-off-by: Sourab Gupta <[email protected]>
---
 drivers/gpu/drm/i915/i915_drv.h     |  2 +
 drivers/gpu/drm/i915/i915_oa_perf.c | 87 ++++++++++++++++++++++++++++++++++---
 include/uapi/drm/i915_drm.h         | 10 ++++-
 3 files changed, 92 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f2fe8d0..e114175 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2017,7 +2017,9 @@ struct drm_i915_private {
 #define I915_GEN_PMU_SAMPLE_RING               (1<<0)
 #define I915_GEN_PMU_SAMPLE_PID                        (1<<1)
 #define I915_GEN_PMU_SAMPLE_TAG                        (1<<2)
+#define I915_GEN_PMU_SAMPLE_MMIO               (1<<3)
                int sample_info_flags;
+               u32 mmio_list[8];
        } gen_pmu;
 
        void (*insert_profile_cmd[I915_PROFILE_MAX])
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c 
b/drivers/gpu/drm/i915/i915_oa_perf.c
index 1cc16ef..a9d44e0 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -113,8 +113,8 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer 
*ringbuf, u32 ctx_id,
        struct drm_i915_gem_object *obj = dev_priv->gen_pmu.buffer.obj;
        struct i915_gen_pmu_node *entry;
        unsigned long lock_flags;
-       u32 addr = 0;
-       int ret;
+       u32 mmio_addr, addr = 0;
+       int ret, i;
 
        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
        if (entry == NULL) {
@@ -150,6 +150,7 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer 
*ringbuf, u32 ctx_id,
        spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
 
        addr = i915_gem_obj_ggtt_offset(obj) + entry->offset;
+       mmio_addr = addr + sizeof(struct drm_i915_ts_data);
 
        if (ring->id == RCS) {
                ret = intel_ring_begin(ring, 6);
@@ -177,6 +178,25 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer 
*ringbuf, u32 ctx_id,
                intel_ring_emit(ring, 0); /* imm high, must be zero */
                intel_ring_advance(ring);
        }
+       for (i = 0; i < 8; i++) {
+               if (0 == dev_priv->gen_pmu.mmio_list[i])
+                       break;
+
+               addr = mmio_addr +
+                       i * sizeof(dev_priv->gen_pmu.mmio_list[i]);
+
+               ret = intel_ring_begin(ring, 4);
+               if (ret)
+                       return;
+
+               intel_ring_emit(ring,
+                               MI_STORE_REGISTER_MEM(1) |
+                               MI_SRM_LRM_GLOBAL_GTT);
+               intel_ring_emit(ring, dev_priv->gen_pmu.mmio_list[i]);
+               intel_ring_emit(ring, addr);
+               intel_ring_emit(ring, MI_NOOP);
+               intel_ring_advance(ring);
+       }
 
        obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
        i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring);
@@ -556,7 +576,7 @@ static void forward_one_gen_pmu_sample(struct 
drm_i915_private *dev_priv,
 {
        struct perf_sample_data data;
        struct perf_event *event = dev_priv->gen_pmu.exclusive_event;
-       int ts_size, snapshot_size;
+       int ts_size, mmio_size, snapshot_size;
        u8 *snapshot, *current_ptr;
        struct drm_i915_ts_node_ctx_id *ctx_info;
        struct drm_i915_ts_node_ring_id *ring_info;
@@ -565,10 +585,17 @@ static void forward_one_gen_pmu_sample(struct 
drm_i915_private *dev_priv,
        struct perf_raw_record raw;
 
        ts_size = sizeof(struct drm_i915_ts_data);
-       snapshot_size = ts_size + sizeof(*ctx_info);
+
+       if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_MMIO)
+               mmio_size = sizeof(struct drm_i915_mmio_data);
+       else
+               mmio_size = 0;
+
        snapshot = dev_priv->gen_pmu.buffer.addr + node->offset;
+       snapshot_size = ts_size + mmio_size + sizeof(*ctx_info);
 
-       ctx_info = (struct drm_i915_ts_node_ctx_id *)(snapshot + ts_size);
+       ctx_info = (struct drm_i915_ts_node_ctx_id *)
+                       (snapshot + mmio_size + ts_size);
        ctx_info->ctx_id = node->ctx_id;
        current_ptr = snapshot + snapshot_size;
 
@@ -1053,6 +1080,9 @@ static int init_gen_pmu_buffer(struct perf_event *event)
        if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_TAG)
                node_size += sizeof(struct drm_i915_ts_node_tag);
 
+       if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_MMIO)
+               node_size += sizeof(struct drm_i915_mmio_data);
+
        /* size has to be aligned to 8 bytes (required by relevant gpu cmds) */
        node_size = ALIGN(node_size, 8);
        dev_priv->gen_pmu.buffer.node_size = node_size;
@@ -1648,6 +1678,42 @@ err_size:
        goto out;
 }
 
+#define GEN_RANGE(l, h) GENMASK(h, l)
+
+/* Some embargoed entries missing from whitelist */
+static const struct register_whitelist {
+       uint64_t offset;
+       uint32_t size;
+       /* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
+       uint32_t gen_bitmask;
+} whitelist[] = {
+       { GEN6_GT_GFX_RC6, 4, GEN_RANGE(7, 9) },
+       { GEN6_GT_GFX_RC6p, 4, GEN_RANGE(7, 9) },
+};
+
+static int check_mmio_whitelist(struct drm_i915_private *dev_priv,
+                               struct drm_i915_gen_pmu_attr *gen_attr)
+{
+       struct register_whitelist const *entry = whitelist;
+       int i, count;
+
+       for (count = 0; count < 8; count++) {
+               if (!gen_attr->mmio_list[count])
+                       break;
+
+               for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
+                       if (entry->offset == gen_attr->mmio_list[count] &&
+                           (1 << INTEL_INFO(dev_priv->dev)->gen &
+                                       entry->gen_bitmask))
+                               break;
+               }
+
+               if (i == ARRAY_SIZE(whitelist))
+                       return -EINVAL;
+       }
+       return 0;
+}
+
 static int i915_gen_event_init(struct perf_event *event)
 {
        struct drm_i915_private *dev_priv =
@@ -1674,6 +1740,17 @@ static int i915_gen_event_init(struct perf_event *event)
        if (gen_attr.sample_tag)
                dev_priv->gen_pmu.sample_info_flags |= I915_GEN_PMU_SAMPLE_TAG;
 
+       if (gen_attr.sample_mmio) {
+               ret = check_mmio_whitelist(dev_priv, &gen_attr);
+               if (ret)
+                       return ret;
+
+               dev_priv->gen_pmu.sample_info_flags |=
+                               I915_GEN_PMU_SAMPLE_MMIO;
+               memcpy(dev_priv->gen_pmu.mmio_list, gen_attr.mmio_list,
+                               sizeof(dev_priv->gen_pmu.mmio_list));
+       }
+
        /* To avoid the complexity of having to accurately filter
         * data and marshal to the appropriate client
         * we currently only allow exclusive access */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7ab4972..65bc39d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -81,7 +81,7 @@
 
 #define I915_OA_ATTR_SIZE_VER0         32  /* sizeof first published struct */
 
-#define I915_GEN_PMU_ATTR_SIZE_VER0    8  /* sizeof first published struct */
+#define I915_GEN_PMU_ATTR_SIZE_VER0    40  /* sizeof first published struct */
 
 typedef struct _drm_i915_oa_attr {
        __u32 size;
@@ -105,7 +105,9 @@ struct drm_i915_gen_pmu_attr {
        __u32 sample_ring:1,
                sample_pid:1,
                sample_tag:1,
-               __reserved_1:29;
+               sample_mmio:1,
+               __reserved_1:28;
+       __u32 mmio_list[8];
 };
 
 /* Header for PERF_RECORD_DEVICE type events */
@@ -155,6 +157,10 @@ struct drm_i915_ts_data {
        __u32 ts_high;
 };
 
+struct drm_i915_mmio_data {
+       __u32 mmio[8];
+};
+
 struct drm_i915_ts_node_ctx_id {
        __u32 ctx_id;
        __u32 pad;
-- 
1.8.5.1

_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to