From: Lionel Landwerlin <lionel.g.landwer...@intel.com>

Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

v2: No need for locking on object OA config object creation (Chris)
    Flush cpu mapping of OA config (Chris)

v3: Properly deal with the perf_metric lock (Chris/Lionel)

v4: Fix oa config unref/put when not found (Lionel)

v5: Allocate BOs for configurations on the stream instead of globally
    (Lionel)

v6: Fix 64bit division (Chris)

v7: Store allocated config BOs into the stream (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
Reviewed-by: Chris Wilson <ch...@chris-wilson.co.uk> (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   1 +
 drivers/gpu/drm/i915/i915_perf.c             | 264 ++++++++++++++++---
 drivers/gpu/drm/i915/i915_perf.h             |  31 +++
 drivers/gpu/drm/i915/i915_perf_types.h       |  24 +-
 4 files changed, 275 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index b0227ab2fe1b..0987100c786b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,6 +138,7 @@
 /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
 #define   MI_LRI_CS_MMIO               (1<<19)
 #define   MI_LRI_FORCE_POSTED          (1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
 #define   MI_SRM_LRM_GLOBAL_GTT                (1<<22)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 027a1d39f006..5bd912c01db8 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -369,52 +369,215 @@ struct perf_open_properties {
        struct intel_engine_cs *engine;
 };
 
+struct i915_oa_config_bo {
+       struct list_head link;
+
+       struct i915_oa_config *oa_config;
+       struct i915_vma *vma;
+};
+
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
 
-static void free_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
 {
+       struct i915_oa_config *oa_config =
+               container_of(ref, typeof(*oa_config), ref);
+
        if (!PTR_ERR(oa_config->flex_regs))
                kfree(oa_config->flex_regs);
        if (!PTR_ERR(oa_config->b_counter_regs))
                kfree(oa_config->b_counter_regs);
        if (!PTR_ERR(oa_config->mux_regs))
                kfree(oa_config->mux_regs);
-       kfree(oa_config);
+
+       kfree_rcu(oa_config, rcu);
 }
 
-static void put_oa_config(struct i915_oa_config *oa_config)
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
 {
-       if (!atomic_dec_and_test(&oa_config->ref_count))
-               return;
+       struct i915_oa_config *oa_config;
 
-       free_oa_config(oa_config);
+       rcu_read_lock();
+       if (metrics_set == 1)
+               oa_config = &perf->test_config;
+       else
+               oa_config = idr_find(&perf->metrics_idr, metrics_set);
+       if (oa_config)
+               oa_config = i915_oa_config_get(oa_config);
+       rcu_read_unlock();
+
+       return oa_config;
 }
 
-static int get_oa_config(struct i915_perf *perf,
-                        int metrics_set,
-                        struct i915_oa_config **out_config)
+static u32 *write_cs_mi_lri(u32 *cs,
+                           const struct i915_oa_reg *reg_data,
+                           u32 n_regs)
 {
-       int ret;
+       u32 i;
 
-       if (metrics_set == 1) {
-               *out_config = &perf->test_config;
-               atomic_inc(&perf->test_config.ref_count);
-               return 0;
+       for (i = 0; i < n_regs; i++) {
+               if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+                       u32 n_lri = min_t(u32,
+                                         n_regs - i,
+                                         MI_LOAD_REGISTER_IMM_MAX_REGS);
+
+                       *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+               }
+               *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+               *cs++ = reg_data[i].value;
        }
 
-       ret = mutex_lock_interruptible(&perf->metrics_lock);
-       if (ret)
-               return ret;
+       return cs;
+}
 
-       *out_config = idr_find(&perf->metrics_idr, metrics_set);
-       if (!*out_config)
-               ret = -EINVAL;
-       else
-               atomic_inc(&(*out_config)->ref_count);
+static int num_lri_dwords(int num_regs)
+{
+       int count = 0;
 
-       mutex_unlock(&perf->metrics_lock);
+       if (num_regs > 0) {
+               count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
+               count += num_regs * 2;
+       }
 
-       return ret;
+       return count;
+}
+
+static struct i915_oa_config_bo *
+alloc_oa_config_buffer(struct i915_perf_stream *stream,
+                      struct i915_oa_config *oa_config)
+{
+       struct drm_i915_gem_object *obj;
+       struct i915_oa_config_bo *oa_bo;
+       size_t config_length = 0;
+       u32 *cs;
+       int err;
+
+       oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
+       if (!oa_bo)
+               return ERR_PTR(-ENOMEM);
+
+       oa_bo->oa_config = i915_oa_config_get(oa_config);
+
+       config_length += num_lri_dwords(oa_config->mux_regs_len);
+       config_length += num_lri_dwords(oa_config->b_counter_regs_len);
+       config_length += num_lri_dwords(oa_config->flex_regs_len);
+       config_length++; /* MI_BATCH_BUFFER_END */
+       config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
+
+       obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
+       if (IS_ERR(obj)) {
+               err = PTR_ERR(obj);
+               goto err_oa_config;
+       }
+
+       cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+       if (IS_ERR(cs)) {
+               err = PTR_ERR(cs);
+               goto err_oa_bo;
+       }
+
+       cs = write_cs_mi_lri(cs,
+                            oa_config->mux_regs,
+                            oa_config->mux_regs_len);
+       cs = write_cs_mi_lri(cs,
+                            oa_config->b_counter_regs,
+                            oa_config->b_counter_regs_len);
+       cs = write_cs_mi_lri(cs,
+                            oa_config->flex_regs,
+                            oa_config->flex_regs_len);
+
+       *cs++ = MI_BATCH_BUFFER_END;
+
+       i915_gem_object_flush_map(obj);
+       i915_gem_object_unpin_map(obj);
+
+       oa_bo->vma =
+               i915_vma_instance(obj, &stream->engine->gt->ggtt->vm, NULL);
+       if (IS_ERR(oa_bo->vma)) {
+               err = PTR_ERR(oa_bo->vma);
+               goto err_oa_bo;
+       }
+
+       return oa_bo;
+
+err_oa_bo:
+       i915_gem_object_put(obj);
+err_oa_config:
+       i915_oa_config_put(oa_bo->oa_config);
+       kfree(oa_bo);
+       return ERR_PTR(err);
+}
+
+static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
+{
+       i915_oa_config_put(oa_bo->oa_config);
+       i915_vma_put(oa_bo->vma);
+       kfree(oa_bo);
+}
+
+int i915_perf_stream_get_oa_config(struct i915_perf_stream *stream,
+                                  int metrics_set,
+                                  struct i915_oa_config **out_config,
+                                  struct i915_vma **out_vma)
+{
+       struct i915_oa_config *oa_config;
+       int err = 0;
+
+       if (!stream->perf->i915)
+               return -ENODEV;
+
+       oa_config = i915_perf_get_oa_config(stream->perf, metrics_set);
+       if (!oa_config)
+               return -EINVAL;
+
+       if (out_vma) {
+               struct i915_oa_config_bo *oa_bo = NULL, *oa_bo_iter;
+
+               /* Look for the buffer in the already allocated BOs attached
+                * to the stream.
+                */
+               err = mutex_lock_interruptible(&stream->config_mutex);
+               if (err)
+                       goto err;
+
+               list_for_each_entry(oa_bo_iter, &stream->oa_config_bos, link) {
+                       if (oa_bo_iter->oa_config == oa_config &&
+                           memcmp(oa_bo_iter->oa_config->uuid,
+                                  oa_config->uuid,
+                                  sizeof(oa_config->uuid)) == 0) {
+                               oa_bo = oa_bo_iter;
+                               break;
+                       }
+               }
+
+               mutex_unlock(&stream->config_mutex);
+
+               if (!oa_bo) {
+                       oa_bo = alloc_oa_config_buffer(stream, oa_config);
+                       if (IS_ERR(oa_bo)) {
+                               err = PTR_ERR(oa_bo);
+                               goto err;
+                       }
+
+                       err = mutex_lock_interruptible(&stream->config_mutex);
+                       if (err) {
+                               free_oa_config_bo(oa_bo);
+                               goto err;
+                       }
+
+                       list_add(&oa_bo->link, &stream->oa_config_bos);
+
+                       mutex_unlock(&stream->config_mutex);
+               }
+
+               *out_vma = i915_vma_get(oa_bo->vma);
+       }
+
+err:
+       if (!err)
+               *out_config = oa_config;
+       return err;
 }
 
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
@@ -1337,6 +1500,16 @@ free_oa_buffer(struct i915_perf_stream *stream)
        stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+       struct i915_oa_config_bo *oa_bo, *tmp;
+
+       i915_oa_config_put(stream->oa_config);
+       list_for_each_entry_safe(oa_bo, tmp, &stream->oa_config_bos, link)
+               free_oa_config_bo(oa_bo);
+}
+
 static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 {
        struct i915_perf *perf = stream->perf;
@@ -1358,7 +1531,7 @@ static void i915_oa_stream_destroy(struct 
i915_perf_stream *stream)
        if (stream->ctx)
                oa_put_render_ctx_id(stream);
 
-       put_oa_config(stream->oa_config);
+       free_oa_configs(stream);
 
        if (perf->spurious_report_rs.missed) {
                DRM_NOTE("%d spurious OA report notices suppressed due to 
ratelimiting\n",
@@ -2171,6 +2344,8 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
                return -EINVAL;
        }
 
+       mutex_init(&stream->config_mutex);
+
        stream->sample_size = sizeof(struct drm_i915_perf_record_header);
 
        format_size = perf->oa_formats[props->oa_format].size;
@@ -2199,9 +2374,10 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
                }
        }
 
-       ret = get_oa_config(perf, props->metrics_set, &stream->oa_config);
-       if (ret) {
+       stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
+       if (!stream->oa_config) {
                DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
+               ret = -EINVAL;
                goto err_config;
        }
 
@@ -2234,6 +2410,9 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
                goto err_enable;
        }
 
+       DRM_DEBUG("opening stream oa config uuid=%s\n",
+                 stream->oa_config->uuid);
+
        hrtimer_init(&stream->poll_check_timer,
                     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        stream->poll_check_timer.function = oa_poll_check_timer_cb;
@@ -2249,11 +2428,11 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
        free_oa_buffer(stream);
 
 err_oa_buf_alloc:
-       put_oa_config(stream->oa_config);
-
        intel_uncore_forcewake_put(stream->gt->uncore, FORCEWAKE_ALL);
        intel_runtime_pm_put(stream->gt->uncore->rpm, stream->wakeref);
 
+       free_oa_configs(stream);
+
 err_config:
        if (stream->ctx)
                oa_put_render_ctx_id(stream);
@@ -2724,6 +2903,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
                goto err_ctx;
        }
 
+       INIT_LIST_HEAD(&stream->oa_config_bos);
        stream->perf = perf;
        stream->gt = &perf->i915->gt;
        stream->ctx = specific_ctx;
@@ -3058,7 +3238,8 @@ void i915_perf_register(struct drm_i915_private *i915)
        if (ret)
                goto sysfs_error;
 
-       atomic_set(&perf->test_config.ref_count, 1);
+       perf->test_config.perf = perf;
+       kref_init(&perf->test_config.ref);
 
        goto exit;
 
@@ -3316,7 +3497,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, 
void *data,
                return -ENOMEM;
        }
 
-       atomic_set(&oa_config->ref_count, 1);
+       oa_config->perf = perf;
+       kref_init(&oa_config->ref);
 
        if (!uuid_is_valid(args->uuid)) {
                DRM_DEBUG("Invalid uuid format for OA config\n");
@@ -3415,7 +3597,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, 
void *data,
 sysfs_err:
        mutex_unlock(&perf->metrics_lock);
 reg_err:
-       put_oa_config(oa_config);
+       i915_oa_config_put(oa_config);
        DRM_DEBUG("Failed to add new OA config\n");
        return err;
 }
@@ -3451,13 +3633,13 @@ int i915_perf_remove_config_ioctl(struct drm_device 
*dev, void *data,
 
        ret = mutex_lock_interruptible(&perf->metrics_lock);
        if (ret)
-               goto lock_err;
+               return ret;
 
        oa_config = idr_find(&perf->metrics_idr, *arg);
        if (!oa_config) {
                DRM_DEBUG("Failed to remove unknown OA config\n");
                ret = -ENOENT;
-               goto config_err;
+               goto err_unlock;
        }
 
        GEM_BUG_ON(*arg != oa_config->id);
@@ -3467,13 +3649,16 @@ int i915_perf_remove_config_ioctl(struct drm_device 
*dev, void *data,
 
        idr_remove(&perf->metrics_idr, *arg);
 
+       mutex_unlock(&perf->metrics_lock);
+
        DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
 
-       put_oa_config(oa_config);
+       i915_oa_config_put(oa_config);
+
+       return 0;
 
-config_err:
+err_unlock:
        mutex_unlock(&perf->metrics_lock);
-lock_err:
        return ret;
 }
 
@@ -3643,7 +3828,7 @@ void i915_perf_init(struct drm_i915_private *i915)
 
 static int destroy_config(int id, void *p, void *data)
 {
-       put_oa_config(p);
+       i915_oa_config_put(p);
        return 0;
 }
 
@@ -3655,9 +3840,6 @@ void i915_perf_fini(struct drm_i915_private *i915)
 {
        struct i915_perf *perf = &i915->perf;
 
-       if (!perf->i915)
-               return;
-
        idr_for_each(&perf->metrics_idr, destroy_config, perf);
        idr_destroy(&perf->metrics_idr);
 
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index 295e33e8eef7..2d1fcc94c518 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -7,12 +7,16 @@
 #define __I915_PERF_H__
 
 #include <linux/types.h>
+#include <linux/kref.h>
 
 #include "i915_perf_types.h"
 
 struct drm_device;
 struct drm_file;
+struct drm_i915_gem_object;
 struct drm_i915_private;
+struct i915_oa_config;
+struct i915_perf_stream;
 struct intel_context;
 struct intel_engine_cs;
 
@@ -28,7 +32,34 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void 
*data,
                               struct drm_file *file);
 int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
                                  struct drm_file *file);
+
 void i915_oa_init_reg_state(const struct intel_context *ce,
                            const struct intel_engine_cs *engine);
 
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set);
+
+int i915_perf_stream_get_oa_config(struct i915_perf_stream *stream,
+                                  int metrics_set,
+                                  struct i915_oa_config **out_config,
+                                  struct i915_vma **out_vma);
+
+static inline struct i915_oa_config *
+i915_oa_config_get(struct i915_oa_config *oa_config)
+{
+       if (kref_get_unless_zero(&oa_config->ref))
+               return oa_config;
+       else
+               return NULL;
+}
+
+void i915_oa_config_release(struct kref *ref);
+static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
+{
+       if (!oa_config)
+               return;
+
+       kref_put(&oa_config->ref, i915_oa_config_release);
+}
+
 #endif /* __I915_PERF_H__ */
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h 
b/drivers/gpu/drm/i915/i915_perf_types.h
index 82cd3b295037..5baee4f0fe91 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -22,6 +22,7 @@
 struct drm_i915_private;
 struct file;
 struct i915_gem_context;
+struct i915_perf;
 struct i915_vma;
 struct intel_context;
 struct intel_engine_cs;
@@ -37,6 +38,8 @@ struct i915_oa_reg {
 };
 
 struct i915_oa_config {
+       struct i915_perf *perf;
+
        char uuid[UUID_STRING_LEN + 1];
        int id;
 
@@ -51,7 +54,8 @@ struct i915_oa_config {
        struct attribute *attrs[2];
        struct device_attribute sysfs_metric_id;
 
-       atomic_t ref_count;
+       struct kref ref;
+       struct rcu_head rcu;
 };
 
 struct i915_perf_stream;
@@ -178,13 +182,25 @@ struct i915_perf_stream {
         */
        const struct i915_perf_stream_ops *ops;
 
+       /**
+        * @active_config_mutex: Protects access to @oa_config & @oa_config_bos.
+        */
+       struct mutex config_mutex;
+
        /**
         * @oa_config: The OA configuration used by the stream.
         */
        struct i915_oa_config *oa_config;
 
+       /**
+        * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
+        * each time @oa_config changes.
+        */
+       struct list_head oa_config_bos;
+
        /**
         * @pinned_ctx: The OA context specific information.
+        * The OA context specific information.
         */
        struct intel_context *pinned_ctx;
        u32 specific_ctx_id;
@@ -337,13 +353,13 @@ struct i915_perf {
 
        /*
         * Lock associated with adding/modifying/removing OA configs
-        * in dev_priv->perf.metrics_idr.
+        * in perf->metrics_idr.
         */
        struct mutex metrics_lock;
 
        /*
-        * List of dynamic configurations, you need to hold
-        * dev_priv->perf.metrics_lock to access it.
+        * List of dynamic configurations (struct i915_oa_config), you
+        * need to hold perf->metrics_lock to access it.
         */
        struct idr metrics_idr;
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to