Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

v2: No need for locking on object OA config object creation (Chris)
    Flush cpu mapping of OA config (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   1 +
 drivers/gpu/drm/i915/i915_drv.h              |  22 ++-
 drivers/gpu/drm/i915/i915_perf.c             | 171 +++++++++++++++----
 3 files changed, 162 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index eec31e36aca7..e7eff9db343e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -126,6 +126,7 @@
  */
 #define MI_LOAD_REGISTER_IMM(x)        MI_INSTR(0x22, 2*(x)-1)
 #define   MI_LRI_FORCE_POSTED          (1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
 #define   MI_SRM_LRM_GLOBAL_GTT                (1<<22)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 76f2bf90ed86..f1e51307253a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1276,6 +1276,10 @@ struct i915_oa_config {
        struct attribute *attrs[2];
        struct device_attribute sysfs_metric_id;
 
+       struct drm_i915_gem_object *obj;
+
+       struct list_head vma_link;
+
        atomic_t ref_count;
 };
 
@@ -1865,11 +1869,21 @@ struct drm_i915_private {
                struct mutex metrics_lock;
 
                /*
-                * List of dynamic configurations, you need to hold
-                * dev_priv->perf.metrics_lock to access it.
+                * List of dynamic configurations (struct i915_oa_config), you
+                * need to hold dev_priv->perf.metrics_lock to access it.
                 */
                struct idr metrics_idr;
 
+               /*
+                * List of dynamic configurations (struct i915_oa_config)
+                * which have an allocated buffer in GGTT for reconfiguration,
+                * you need to hold dev_priv->perf.metrics_lock to access it.
+                * Elements are added to the list lazilly on execbuf (when a
+                * particular configuration is requested). The list is freed
+                * upon closing the perf stream.
+                */
+               struct list_head metrics_buffers;
+
                /*
                 * Lock associated with anything below within this structure
                 * except exclusive_stream.
@@ -2815,6 +2829,10 @@ int i915_perf_remove_config_ioctl(struct drm_device 
*dev, void *data,
 void i915_oa_init_reg_state(struct intel_engine_cs *engine,
                            struct intel_context *ce,
                            u32 *reg_state);
+int i915_perf_get_oa_config(struct drm_i915_private *i915,
+                           int metrics_set,
+                           struct i915_oa_config **out_config,
+                           struct drm_i915_gem_object **out_obj);
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 2e33a9b4eae7..e0071e44de3d 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -366,9 +366,16 @@ struct perf_open_properties {
        int oa_period_exponent;
 };
 
-static void free_oa_config(struct drm_i915_private *dev_priv,
-                          struct i915_oa_config *oa_config)
+static void put_oa_config(struct i915_oa_config *oa_config)
 {
+       if (!atomic_dec_and_test(&oa_config->ref_count))
+               return;
+
+       if (oa_config->obj) {
+               list_del(&oa_config->vma_link);
+               i915_gem_object_put(oa_config->obj);
+       }
+
        if (!PTR_ERR(oa_config->flex_regs))
                kfree(oa_config->flex_regs);
        if (!PTR_ERR(oa_config->b_counter_regs))
@@ -378,38 +385,126 @@ static void free_oa_config(struct drm_i915_private 
*dev_priv,
        kfree(oa_config);
 }
 
-static void put_oa_config(struct drm_i915_private *dev_priv,
-                         struct i915_oa_config *oa_config)
+static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 
n_regs)
 {
-       if (!atomic_dec_and_test(&oa_config->ref_count))
-               return;
+       u32 i;
 
-       free_oa_config(dev_priv, oa_config);
+       for (i = 0; i < n_regs; i++) {
+               if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+                       u32 n_lri = min(n_regs - i,
+                                       (u32) MI_LOAD_REGISTER_IMM_MAX_REGS);
+
+                       *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+               }
+               *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+               *cs++ = reg_data[i].value;
+       }
+
+       return cs;
 }
 
-static int get_oa_config(struct drm_i915_private *dev_priv,
-                        int metrics_set,
-                        struct i915_oa_config **out_config)
+static int alloc_oa_config_buffer(struct drm_i915_private *i915,
+                                 struct i915_oa_config *oa_config)
 {
-       int ret;
+       struct drm_i915_gem_object *bo;
+       size_t config_length = 0;
+       u32 *cs;
 
-       if (metrics_set == 1) {
-               *out_config = &dev_priv->perf.oa.test_config;
-               atomic_inc(&dev_priv->perf.oa.test_config.ref_count);
-               return 0;
+       if (oa_config->mux_regs_len > 0) {
+               config_length += DIV_ROUND_UP(oa_config->mux_regs_len,
+                                             MI_LOAD_REGISTER_IMM_MAX_REGS) * 
4;
+               config_length += oa_config->mux_regs_len * 8;
+       }
+       if (oa_config->b_counter_regs_len > 0) {
+               config_length += DIV_ROUND_UP(oa_config->b_counter_regs_len,
+                                             MI_LOAD_REGISTER_IMM_MAX_REGS) * 
4;
+               config_length += oa_config->b_counter_regs_len * 8;
+       }
+       if (oa_config->flex_regs_len > 0) {
+               config_length += DIV_ROUND_UP(oa_config->flex_regs_len,
+                                             MI_LOAD_REGISTER_IMM_MAX_REGS) * 
4;
+               config_length += oa_config->flex_regs_len * 8;
        }
+       config_length += 4; /* MI_BATCH_BUFFER_END */
+       config_length = ALIGN(config_length, I915_GTT_PAGE_SIZE);
 
-       ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
+       bo = i915_gem_object_create_shmem(i915, config_length);
+       if (IS_ERR(bo))
+               return PTR_ERR(bo);
+
+       cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
+       if (IS_ERR(cs)) {
+               i915_gem_object_put(bo);
+               return PTR_ERR(cs);
+       }
+
+       cs = write_cs_mi_lri(cs, oa_config->mux_regs, oa_config->mux_regs_len);
+       cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, 
oa_config->b_counter_regs_len);
+       cs = write_cs_mi_lri(cs, oa_config->flex_regs, 
oa_config->flex_regs_len);
+
+       *cs++ = MI_BATCH_BUFFER_END;
+
+       i915_gem_object_flush_map(bo);
+       i915_gem_object_unpin_map(bo);
+
+       oa_config->obj = bo;
+
+       return 0;
+}
+
+int i915_perf_get_oa_config(struct drm_i915_private *i915,
+                           int metrics_set,
+                           struct i915_oa_config **out_config,
+                           struct drm_i915_gem_object **out_obj)
+{
+       int ret = 0;
+       struct i915_oa_config *oa_config;
+
+       if (!i915->perf.initialized)
+               return -ENODEV;
+
+       ret = mutex_lock_interruptible(&i915->perf.metrics_lock);
        if (ret)
                return ret;
 
-       *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
-       if (!*out_config)
-               ret = -EINVAL;
-       else
-               atomic_inc(&(*out_config)->ref_count);
+       if (metrics_set == 1) {
+               oa_config = &i915->perf.oa.test_config;
+       } else {
+               oa_config = idr_find(&i915->perf.metrics_idr, metrics_set);
+               if (!oa_config) {
+                       ret = -EINVAL;
+                       goto unlock;
+               }
+       }
 
-       mutex_unlock(&dev_priv->perf.metrics_lock);
+       if (out_config) {
+               atomic_inc(&oa_config->ref_count);
+               *out_config = oa_config;
+       }
+
+       if (out_obj) {
+               if (oa_config->obj) {
+                       *out_obj = i915_gem_object_get(oa_config->obj);
+               } else {
+                       ret = alloc_oa_config_buffer(i915, oa_config);
+                       if (ret)
+                               goto err_buf_alloc;
+
+                       list_add(&oa_config->vma_link,
+                                &i915->perf.metrics_buffers);
+                       *out_obj = i915_gem_object_get(oa_config->obj);
+               }
+       }
+
+       goto unlock;
+
+err_buf_alloc:
+       if (out_config) {
+               put_oa_config(oa_config);
+               *out_config = NULL;
+       }
+unlock:
+       mutex_unlock(&i915->perf.metrics_lock);
 
        return ret;
 }
@@ -1380,7 +1475,7 @@ static void i915_oa_stream_destroy(struct 
i915_perf_stream *stream)
        if (stream->ctx)
                oa_put_render_ctx_id(stream);
 
-       put_oa_config(dev_priv, stream->oa_config);
+       put_oa_config(stream->oa_config);
 
        if (dev_priv->perf.oa.spurious_report_rs.missed) {
                DRM_NOTE("%d spurious OA report notices suppressed due to 
ratelimiting\n",
@@ -2094,7 +2189,8 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
                }
        }
 
-       ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
+       ret = i915_perf_get_oa_config(dev_priv, props->metrics_set,
+                                     &stream->oa_config, NULL);
        if (ret) {
                DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
                goto err_config;
@@ -2132,6 +2228,8 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
                goto err_enable;
        }
 
+       DRM_DEBUG("opening stream oa config uuid=%s\n", 
stream->oa_config->uuid);
+
        mutex_unlock(&dev_priv->drm.struct_mutex);
 
        return 0;
@@ -2145,7 +2243,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
        free_oa_buffer(dev_priv);
 
 err_oa_buf_alloc:
-       put_oa_config(dev_priv, stream->oa_config);
+       put_oa_config(stream->oa_config);
 
        intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
        intel_runtime_pm_put(dev_priv, stream->wakeref);
@@ -2512,9 +2610,21 @@ static int i915_perf_release(struct inode *inode, struct 
file *file)
 {
        struct i915_perf_stream *stream = file->private_data;
        struct drm_i915_private *dev_priv = stream->dev_priv;
+       struct i915_oa_config *oa_config, *next;
 
        mutex_lock(&dev_priv->perf.lock);
+
        i915_perf_destroy_locked(stream);
+
+       /* Dispose of all oa config batch buffers. */
+       mutex_lock(&dev_priv->perf.metrics_lock);
+       list_for_each_entry_safe(oa_config, next, 
&dev_priv->perf.metrics_buffers, vma_link) {
+               list_del(&oa_config->vma_link);
+               i915_gem_object_put(oa_config->obj);
+               oa_config->obj = NULL;
+       }
+       mutex_unlock(&dev_priv->perf.metrics_lock);
+
        mutex_unlock(&dev_priv->perf.lock);
 
        return 0;
@@ -3296,7 +3406,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, 
void *data,
 sysfs_err:
        mutex_unlock(&dev_priv->perf.metrics_lock);
 reg_err:
-       put_oa_config(dev_priv, oa_config);
+       put_oa_config(oa_config);
        DRM_DEBUG("Failed to add new OA config\n");
        return err;
 }
@@ -3350,7 +3460,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, 
void *data,
 
        DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
 
-       put_oa_config(dev_priv, oa_config);
+       put_oa_config(oa_config);
 
 config_err:
        mutex_unlock(&dev_priv->perf.metrics_lock);
@@ -3492,6 +3602,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
                init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
 
                INIT_LIST_HEAD(&dev_priv->perf.streams);
+               INIT_LIST_HEAD(&dev_priv->perf.metrics_buffers);
+
                mutex_init(&dev_priv->perf.lock);
                spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
 
@@ -3508,10 +3620,9 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 
 static int destroy_config(int id, void *p, void *data)
 {
-       struct drm_i915_private *dev_priv = data;
        struct i915_oa_config *oa_config = p;
 
-       put_oa_config(dev_priv, oa_config);
+       put_oa_config(oa_config);
 
        return 0;
 }
@@ -3525,7 +3636,7 @@ void i915_perf_fini(struct drm_i915_private *dev_priv)
        if (!dev_priv->perf.initialized)
                return;
 
-       idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv);
+       idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, NULL);
        idr_destroy(&dev_priv->perf.metrics_idr);
 
        unregister_sysctl_table(dev_priv->perf.sysctl_header);
-- 
2.21.0.392.gf8f6787159e

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to