From: Sourab Gupta <[email protected]>

This patch adds support for retrieving MMIO register values through Gen Perf PMU
interface. Through this interface, now the userspace can request upto 8 MMIO
register values to be dumped, alongwith the timestamp values which were dumped
earlier across the batchbuffer boundaries.
Userspace can pass the addresses of upto 8 MMIO registers through perf attr
config. The commands to dump the values of these MMIO registers are then
inserted into the ring alongwith commands to dump the timestamps.

Signed-off-by: Sourab Gupta <[email protected]>
---
 drivers/gpu/drm/i915/i915_drv.h     |   4 +-
 drivers/gpu/drm/i915/i915_oa_perf.c | 119 ++++++++++++++++++++++++++++++++----
 include/uapi/drm/i915_drm.h         |   9 ++-
 3 files changed, 117 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a0e1d17..1f86358 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1718,9 +1718,10 @@ struct drm_i915_ts_node_info {
        struct drm_i915_gem_request *req;
 };
 
-struct drm_i915_ts_node {
+struct drm_i915_ts_mmio_node {
        /* ensure timestamp starts on a qword boundary */
        struct drm_i915_ts_data timestamp;
+       __u32 mmio[8];
        struct drm_i915_ts_node_info node_info;
 };
 #endif
@@ -2024,6 +2025,7 @@ struct drm_i915_private {
                struct work_struct work_timer;
                struct work_struct work_event_stop;
                struct completion complete;
+               u32 mmio_list[8];
        } gen_pmu;
 
        struct list_head profile_cmd;
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c 
b/drivers/gpu/drm/i915/i915_oa_perf.c
index ed0bdc9..465e823 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -113,10 +113,10 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer 
*ringbuf, u32 ctx_id,
                        dev_priv->gen_pmu.buffer.addr;
        void *data_ptr = (u8 *)queue_hdr + queue_hdr->data_offset;
        int data_size = (queue_hdr->size_in_bytes - queue_hdr->data_offset);
-       u32 node_offset, timestamp_offset, addr = 0;
-       int ret;
+       u32 node_offset, timestamp_offset, mmio_offset, addr = 0;
+       int ret, i = 0;
 
-       struct drm_i915_ts_node *nodes = data_ptr;
+       struct drm_i915_ts_mmio_node *nodes = data_ptr;
        int num_nodes = 0;
        int index = 0;
 
@@ -124,12 +124,14 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer 
*ringbuf, u32 ctx_id,
        index = queue_hdr->node_count % num_nodes;
 
        timestamp_offset = offsetof(struct drm_i915_ts_data, ts_low);
+       mmio_offset =
+               offsetof(struct drm_i915_ts_mmio_node, mmio);
 
        node_offset = i915_gem_obj_ggtt_offset(dev_priv->gen_pmu.buffer.obj) +
                        queue_hdr->data_offset +
-                       index * sizeof(struct drm_i915_ts_node);
+                       index * sizeof(struct drm_i915_ts_mmio_node);
        addr = node_offset +
-               offsetof(struct drm_i915_ts_node, timestamp) +
+               offsetof(struct drm_i915_ts_mmio_node, timestamp) +
                timestamp_offset;
 
        if (ring->id == RCS) {
@@ -158,6 +160,27 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer 
*ringbuf, u32 ctx_id,
                intel_ring_emit(ring, 0); /* imm high, must be zero */
                intel_ring_advance(ring);
        }
+
+       for (i = 0; i < 8; i++) {
+               if (0 == dev_priv->gen_pmu.mmio_list[i])
+                       break;
+
+               addr = node_offset + mmio_offset +
+                       i * sizeof(dev_priv->gen_pmu.mmio_list[i]);
+
+               ret = intel_ring_begin(ring, 4);
+               if (ret)
+                       return;
+
+               intel_ring_emit(ring,
+                               MI_STORE_REGISTER_MEM(1) |
+                               MI_SRM_LRM_GLOBAL_GTT);
+               intel_ring_emit(ring, dev_priv->gen_pmu.mmio_list[i]);
+               intel_ring_emit(ring, addr);
+               intel_ring_emit(ring, MI_NOOP);
+               intel_ring_advance(ring);
+       }
+
        node_info = &nodes[index].node_info;
        i915_gem_request_assign(&node_info->req,
                                ring->outstanding_lazy_request);
@@ -314,11 +337,11 @@ static void init_gen_pmu_buf_queue(struct 
drm_i915_private *dev_priv)
 }
 
 static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
-                               struct drm_i915_ts_node *node)
+                               struct drm_i915_ts_mmio_node *node)
 {
        struct perf_sample_data data;
        struct perf_event *event = dev_priv->gen_pmu.exclusive_event;
-       int snapshot_size = sizeof(struct drm_i915_ts_usernode);
+       int snapshot_size = sizeof(struct drm_i915_ts_mmio_usernode);
        struct perf_raw_record raw;
 
        perf_sample_data_init(&data, 0, event->hw.last_period);
@@ -338,11 +361,11 @@ void i915_gen_pmu_wait_gpu(struct drm_i915_private 
*dev_priv)
        struct drm_i915_ts_queue_header *hdr =
                (struct drm_i915_ts_queue_header *)
                dev_priv->gen_pmu.buffer.addr;
-       struct drm_i915_ts_node *first_node, *node;
+       struct drm_i915_ts_mmio_node *first_node, *node;
        int head, tail, num_nodes, ret;
        struct drm_i915_gem_request *req;
 
-       first_node = (struct drm_i915_ts_node *)
+       first_node = (struct drm_i915_ts_mmio_node *)
                        ((char *)hdr + hdr->data_offset);
        num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
                        sizeof(*node);
@@ -375,14 +398,14 @@ void forward_gen_pmu_snapshots_work(struct work_struct 
*__work)
        struct drm_i915_ts_queue_header *hdr =
                (struct drm_i915_ts_queue_header *)
                dev_priv->gen_pmu.buffer.addr;
-       struct drm_i915_ts_node *first_node, *node;
+       struct drm_i915_ts_mmio_node *first_node, *node;
        int head, tail, num_nodes, ret;
        struct drm_i915_gem_request *req;
 
        if (dev_priv->gen_pmu.event_active == false)
                return;
 
-       first_node = (struct drm_i915_ts_node *)
+       first_node = (struct drm_i915_ts_mmio_node *)
                        ((char *)hdr + hdr->data_offset);
        num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
                        sizeof(*node);
@@ -421,11 +444,11 @@ void i915_gen_pmu_stop_work_fn(struct work_struct *__work)
        struct drm_i915_ts_queue_header *hdr =
                (struct drm_i915_ts_queue_header *)
                dev_priv->gen_pmu.buffer.addr;
-       struct drm_i915_ts_node *first_node, *node;
+       struct drm_i915_ts_mmio_node *first_node, *node;
        int head, tail, num_nodes, ret;
        struct drm_i915_gem_request *req;
 
-       first_node = (struct drm_i915_ts_node *)
+       first_node = (struct drm_i915_ts_mmio_node *)
                        ((char *)hdr + hdr->data_offset);
        num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
                        sizeof(*node);
@@ -1467,15 +1490,85 @@ static int i915_oa_event_event_idx(struct perf_event 
*event)
        return 0;
 }
 
+static int i915_gen_pmu_copy_attr(struct drm_i915_gen_pmu_attr __user *uattr,
+                            struct drm_i915_gen_pmu_attr *attr)
+{
+       u32 size;
+       int ret;
+
+       if (!access_ok(VERIFY_WRITE, uattr, I915_GEN_PMU_ATTR_SIZE_VER0))
+               return -EFAULT;
+
+       /*
+        * zero the full structure, so that a short copy will be nice.
+        */
+       memset(attr, 0, sizeof(*attr));
+
+       ret = get_user(size, &uattr->size);
+       if (ret)
+               return ret;
+
+       if (size > PAGE_SIZE)   /* silly large */
+               goto err_size;
+
+       if (size < I915_GEN_PMU_ATTR_SIZE_VER0)
+               goto err_size;
+
+       /*
+        * If we're handed a bigger struct than we know of,
+        * ensure all the unknown bits are 0 - i.e. new
+        * user-space does not rely on any kernel feature
+        * extensions we dont know about yet.
+        */
+       if (size > sizeof(*attr)) {
+               unsigned char __user *addr;
+               unsigned char __user *end;
+               unsigned char val;
+
+               addr = (void __user *)uattr + sizeof(*attr);
+               end  = (void __user *)uattr + size;
+
+               for (; addr < end; addr++) {
+                       ret = get_user(val, addr);
+                       if (ret)
+                               return ret;
+                       if (val)
+                               goto err_size;
+               }
+               size = sizeof(*attr);
+       }
+
+       ret = copy_from_user(attr, uattr, size);
+       if (ret)
+               return -EFAULT;
+
+out:
+       return ret;
+
+err_size:
+       put_user(sizeof(*attr), &uattr->size);
+       ret = -E2BIG;
+       goto out;
+}
+
 static int i915_gen_event_init(struct perf_event *event)
 {
        struct drm_i915_private *dev_priv =
                container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+       struct drm_i915_gen_pmu_attr gen_attr;
        int ret = 0;
 
        if (event->attr.type != event->pmu->type)
                return -ENOENT;
 
+       ret = i915_gen_pmu_copy_attr(to_user_ptr(event->attr.config),
+                               &gen_attr);
+       if (ret)
+               return ret;
+
+       memcpy(dev_priv->gen_pmu.mmio_list, gen_attr.mmio_list,
+                       sizeof(dev_priv->gen_pmu.mmio_list));
+
        /* To avoid the complexity of having to accurately filter
         * data and marshal to the appropriate client
         * we currently only allow exclusive access */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a7da421..8d4deec 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -80,6 +80,7 @@
 #define I915_OA_METRICS_SET_MAX                        
I915_OA_METRICS_SET_SAMPLER_BALANCE
 
 #define I915_OA_ATTR_SIZE_VER0         32  /* sizeof first published struct */
+#define I915_GEN_PMU_ATTR_SIZE_VER0    36  /* sizeof first published struct */
 
 typedef struct _drm_i915_oa_attr {
        __u32 size;
@@ -97,6 +98,11 @@ typedef struct _drm_i915_oa_attr {
        __reserved_2:31;
 } drm_i915_oa_attr_t;
 
+struct drm_i915_gen_pmu_attr {
+       __u32 size;
+       __u32 mmio_list[8];
+};
+
 /* Header for PERF_RECORD_DEVICE type events */
 typedef struct _drm_i915_oa_event_header {
        __u32 type;
@@ -143,9 +149,10 @@ struct drm_i915_ts_data {
        __u32 ts_high;
 };
 
-struct drm_i915_ts_usernode {
+struct drm_i915_ts_mmio_usernode {
        /* ensure timestamp starts on a qword boundary */
        struct drm_i915_ts_data timestamp;
+       __u32 mmio[8];
        struct drm_i915_ts_node_footer node_info;
 };
 
-- 
1.8.5.1

_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to