From: Sourab Gupta <[email protected]>

This patch introduces the routines which insert commands for capturing OA
snapshots, into the ringbuffer for RCS engine.
The command MI_REPORT_PERF_COUNT can be used to capture snapshots of OA
counters. The routines introduced in this patch can be called to insert these
commands at appropriate points during workload submission

Signed-off-by: Sourab Gupta <[email protected]>
---
 drivers/gpu/drm/i915/i915_dma.c     |  1 +
 drivers/gpu/drm/i915/i915_drv.h     |  3 ++
 drivers/gpu/drm/i915/i915_oa_perf.c | 86 +++++++++++++++++++++++++++++++++++++
 3 files changed, 90 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0553f20..f12feaa 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -821,6 +821,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long 
flags)
        /* Must at least be registered before trying to pin any context
         * otherwise i915_oa_context_pin_notify() will lock an un-initialized
         * spinlock, upsetting lockdep checks */
+       INIT_LIST_HEAD(&dev_priv->profile_cmd);
        i915_oa_pmu_register(dev);
 
        intel_pm_setup(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5453842..798da49 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1982,6 +1982,8 @@ struct drm_i915_private {
                struct work_struct work_event_stop;
                struct completion complete;
        } oa_pmu;
+
+       struct list_head profile_cmd;
 #endif
 
        /* Abstract the submission mechanism (legacy ringbuffer or execlists) 
away */
@@ -3162,6 +3164,7 @@ void i915_oa_context_pin_notify(struct drm_i915_private 
*dev_priv,
                                struct intel_context *context);
 void i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
                                  struct intel_context *context);
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id);
 #else
 static inline void
 i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c 
b/drivers/gpu/drm/i915/i915_oa_perf.c
index 5d63dab..b02850c 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -25,6 +25,76 @@ static int hsw_perf_format_sizes[] = {
        64   /* C4_B8_HSW */
 };
 
+struct drm_i915_insert_cmd {
+       struct list_head list;
+       void (*insert_cmd)(struct intel_ringbuffer *ringbuf, u32 ctx_id);
+};
+
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+{
+       struct intel_engine_cs *ring = ringbuf->ring;
+       struct drm_i915_private *dev_priv = ring->dev->dev_private;
+       struct drm_i915_insert_cmd *entry;
+
+       list_for_each_entry(entry, &dev_priv->profile_cmd, list)
+               entry->insert_cmd(ringbuf, ctx_id);
+}
+
+void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+{
+       struct intel_engine_cs *ring = ringbuf->ring;
+       struct drm_i915_private *dev_priv = ring->dev->dev_private;
+       struct drm_i915_oa_async_node_info *node_info = NULL;
+       struct drm_i915_oa_async_queue_header *queue_hdr =
+                       (struct drm_i915_oa_async_queue_header *)
+                       dev_priv->oa_pmu.oa_async_buffer.addr;
+       void *data_ptr = (u8 *)queue_hdr + queue_hdr->data_offset;
+       int data_size = (queue_hdr->size_in_bytes - queue_hdr->data_offset);
+       u32 data_offset, addr = 0;
+       int ret;
+
+       struct drm_i915_oa_async_node *nodes = data_ptr;
+       int num_nodes = 0;
+       int index = 0;
+
+       /* OA counters are only supported on the render ring */
+       if (ring->id != RCS)
+               return;
+
+       num_nodes = data_size / sizeof(*nodes);
+       index = queue_hdr->node_count % num_nodes;
+
+       data_offset = offsetof(struct drm_i915_oa_async_node, report_perf);
+
+       addr = i915_gem_obj_ggtt_offset(dev_priv->oa_pmu.oa_async_buffer.obj) +
+               queue_hdr->data_offset +
+               index * sizeof(struct drm_i915_oa_async_node) +
+               data_offset;
+
+       /* addr should be 64 byte aligned */
+       BUG_ON(addr & 0x3f);
+
+       ret = intel_ring_begin(ring, 4);
+       if (ret)
+               return;
+
+       intel_ring_emit(ring, MI_REPORT_PERF_COUNT | (1<<0));
+       intel_ring_emit(ring, addr | MI_REPORT_PERF_COUNT_GGTT);
+       intel_ring_emit(ring, ring->outstanding_lazy_request->seqno);
+       intel_ring_emit(ring, MI_NOOP);
+       intel_ring_advance(ring);
+
+       node_info = &nodes[index].node_info;
+       i915_gem_request_assign(&node_info->req,
+                               ring->outstanding_lazy_request);
+
+       node_info->pid = current->pid;
+       node_info->ctx_id = ctx_id;
+       queue_hdr->node_count++;
+       if (queue_hdr->node_count > num_nodes)
+               queue_hdr->wrap_count++;
+}
+
 static void init_oa_async_buf_queue(struct drm_i915_private *dev_priv)
 {
        struct drm_i915_oa_async_queue_header *hdr =
@@ -865,6 +935,7 @@ void i915_oa_async_stop_work_fn(struct work_struct *__work)
                container_of(__work, typeof(*dev_priv),
                        oa_pmu.work_event_stop);
        struct perf_event *event = dev_priv->oa_pmu.exclusive_event;
+       struct drm_i915_insert_cmd *entry, *next;
        struct drm_i915_oa_async_queue_header *hdr =
                (struct drm_i915_oa_async_queue_header *)
                dev_priv->oa_pmu.oa_async_buffer.addr;
@@ -882,6 +953,13 @@ void i915_oa_async_stop_work_fn(struct work_struct *__work)
        if (ret)
                return;
 
+       list_for_each_entry_safe(entry, next, &dev_priv->profile_cmd, list) {
+               if (entry->insert_cmd == i915_oa_insert_cmd) {
+                       list_del(&entry->list);
+                       kfree(entry);
+               }
+       }
+
        dev_priv->oa_pmu.event_active = false;
 
        i915_oa_async_wait_gpu(dev_priv);
@@ -920,8 +998,14 @@ static void i915_oa_event_start(struct perf_event *event, 
int flags)
        struct drm_i915_private *dev_priv =
                container_of(event->pmu, typeof(*dev_priv), oa_pmu.pmu);
        unsigned long lock_flags;
+       struct drm_i915_insert_cmd *entry;
        u32 oastatus1, tail;
 
+       entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+       if (!entry)
+               return;
+       entry->insert_cmd = i915_oa_insert_cmd;
+
        if (dev_priv->oa_pmu.metrics_set == I915_OA_METRICS_SET_3D) {
                config_oa_regs(dev_priv, i915_oa_3d_mux_config_hsw,
                                i915_oa_3d_mux_config_hsw_len);
@@ -976,6 +1060,8 @@ static void i915_oa_event_start(struct perf_event *event, 
int flags)
        dev_priv->oa_pmu.event_active = true;
        update_oacontrol(dev_priv);
 
+       list_add_tail(&entry->list, &dev_priv->profile_cmd);
+
        /* Reset the head ptr to ensure we don't forward reports relating
         * to a previous perf event */
        oastatus1 = I915_READ(GEN7_OASTATUS1);
-- 
1.8.5.1

_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to