Now that we may have multiple OA units in a single GT as well as on
separate GTs, create an engine group that maps to a single OA unit.

Signed-off-by: Umesh Nerlige Ramappa <[email protected]>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   4 +
 drivers/gpu/drm/i915/gt/intel_sseu.c         |   3 +-
 drivers/gpu/drm/i915/i915_perf.c             | 126 +++++++++++++++++--
 drivers/gpu/drm/i915/i915_perf_types.h       |  51 +++++++-
 4 files changed, 171 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 4fd54fb8810f..8a8b0dce241b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -53,6 +53,8 @@ struct intel_gt;
 struct intel_ring;
 struct intel_uncore;
 struct intel_breadcrumbs;
+struct intel_engine_cs;
+struct i915_perf_group;
 
 typedef u32 intel_engine_mask_t;
 #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
@@ -603,6 +605,8 @@ struct intel_engine_cs {
        } props, defaults;
 
        I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
+
+       struct i915_perf_group *oa_group;
 };
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 6c6198a257ac..1141f875f5bd 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -6,6 +6,7 @@
 #include <linux/string_helpers.h>
 
 #include "i915_drv.h"
+#include "i915_perf_types.h"
 #include "intel_engine_regs.h"
 #include "intel_gt_regs.h"
 #include "intel_sseu.h"
@@ -677,7 +678,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
         * If i915/perf is active, we want a stable powergating configuration
         * on the system. Use the configuration pinned by i915/perf.
         */
-       if (gt->perf.exclusive_stream)
+       if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)
                req_sseu = &gt->perf.sseu;
 
        slices = hweight8(req_sseu->slice_mask);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e134523576f8..fda779b2c16f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1587,8 +1587,9 @@ static void i915_oa_stream_destroy(struct 
i915_perf_stream *stream)
 {
        struct i915_perf *perf = stream->perf;
        struct intel_gt *gt = stream->engine->gt;
+       struct i915_perf_group *g = stream->engine->oa_group;
 
-       if (WARN_ON(stream != gt->perf.exclusive_stream))
+       if (WARN_ON(stream != g->exclusive_stream))
                return;
 
        /*
@@ -1597,7 +1598,7 @@ static void i915_oa_stream_destroy(struct 
i915_perf_stream *stream)
         *
         * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
         */
-       WRITE_ONCE(gt->perf.exclusive_stream, NULL);
+       WRITE_ONCE(g->exclusive_stream, NULL);
        perf->ops.disable_metric_set(stream);
 
        free_oa_buffer(stream);
@@ -3195,6 +3196,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
 {
        struct drm_i915_private *i915 = stream->perf->i915;
        struct i915_perf *perf = stream->perf;
+       struct i915_perf_group *g;
        struct intel_gt *gt;
        int ret;
 
@@ -3205,6 +3207,12 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
        }
        gt = props->engine->gt;
 
+       g = props->engine->oa_group;
+       if (!g) {
+               DRM_DEBUG("Perf group invalid\n");
+               return -EINVAL;
+       }
+
        /*
         * If the sysfs metrics/ directory wasn't registered for some
         * reason then don't let userspace try their luck with config
@@ -3234,7 +3242,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
         * counter reports and marshal to the appropriate client
         * we currently only allow exclusive access
         */
-       if (gt->perf.exclusive_stream) {
+       if (g->exclusive_stream) {
                drm_dbg(&stream->perf->i915->drm,
                        "OA unit already in use\n");
                return -EBUSY;
@@ -3329,7 +3337,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
        stream->ops = &i915_oa_stream_ops;
 
        stream->engine->gt->perf.sseu = props->sseu;
-       WRITE_ONCE(gt->perf.exclusive_stream, stream);
+       WRITE_ONCE(g->exclusive_stream, stream);
 
        ret = i915_perf_stream_enable_sync(stream);
        if (ret) {
@@ -3352,7 +3360,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
        return 0;
 
 err_enable:
-       WRITE_ONCE(gt->perf.exclusive_stream, NULL);
+       WRITE_ONCE(g->exclusive_stream, NULL);
        perf->ops.disable_metric_set(stream);
 
        free_oa_buffer(stream);
@@ -3381,12 +3389,13 @@ void i915_oa_init_reg_state(const struct intel_context 
*ce,
                            const struct intel_engine_cs *engine)
 {
        struct i915_perf_stream *stream;
+       struct i915_perf_group *g = engine->oa_group;
 
-       if (!engine_supports_oa(engine))
+       if (!g)
                return;
 
        /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
-       stream = READ_ONCE(engine->gt->perf.exclusive_stream);
+       stream = READ_ONCE(g->exclusive_stream);
        if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
                gen8_update_reg_state_unlocked(ce, stream);
 }
@@ -4755,6 +4764,95 @@ static struct ctl_table oa_table[] = {
        {}
 };
 
+static u32 __num_perf_groups_per_gt(struct intel_gt *gt)
+{
+       enum intel_platform platform = INTEL_INFO(gt->i915)->platform;
+
+       switch (platform) {
+       default:
+               return 1;
+       }
+}
+
+static u32 __oa_engine_group(struct intel_engine_cs *engine)
+{
+       if (!engine_supports_oa(engine))
+               return PERF_GROUP_INVALID;
+
+       switch (engine->class) {
+       case RENDER_CLASS:
+               return PERF_GROUP_OAG;
+
+       default:
+               return PERF_GROUP_INVALID;
+       }
+}
+
+static void oa_init_groups(struct intel_gt *gt)
+{
+       int i, num_groups = gt->perf.num_perf_groups;
+       struct i915_perf *perf = &gt->i915->perf;
+
+       for (i = 0; i < num_groups; i++) {
+               struct i915_perf_group *g = &gt->perf.group[i];
+
+               /* Fused off engines can result in a group with num_engines == 
0 */
+               if (g->num_engines == 0)
+                       continue;
+
+               /* Set oa_unit_ids now to ensure ids remain contiguous. */
+               g->oa_unit_id = perf->oa_unit_ids++;
+
+               g->gt = gt;
+       }
+}
+
+static int oa_init_gt(struct intel_gt *gt)
+{
+       u32 num_groups = __num_perf_groups_per_gt(gt);
+       struct intel_engine_cs *engine;
+       struct i915_perf_group *g;
+       intel_engine_mask_t tmp;
+
+       g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
+       if (drm_WARN_ON(&gt->i915->drm, !g))
+               return -ENOMEM;
+
+       for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
+               u32 index;
+
+               index = __oa_engine_group(engine);
+               if (index < num_groups) {
+                       g[index].engine_mask |= BIT(engine->id);
+                       g[index].num_engines++;
+                       engine->oa_group = &g[index];
+               } else {
+                       engine->oa_group = NULL;
+               }
+       }
+
+       gt->perf.num_perf_groups = num_groups;
+       gt->perf.group = g;
+
+       oa_init_groups(gt);
+
+       return 0;
+}
+
+static int oa_init_engine_groups(struct i915_perf *perf)
+{
+       struct intel_gt *gt;
+       int i, ret;
+
+       for_each_gt(gt, perf->i915, i) {
+               ret = oa_init_gt(gt);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
 static void oa_init_supported_formats(struct i915_perf *perf)
 {
        struct drm_i915_private *i915 = perf->i915;
@@ -4921,7 +5019,7 @@ int i915_perf_init(struct drm_i915_private *i915)
 
        if (perf->ops.enable_metric_set) {
                struct intel_gt *gt;
-               int i;
+               int i, ret;
 
                for_each_gt(gt, i915, i)
                        mutex_init(&gt->perf.lock);
@@ -4960,6 +5058,13 @@ int i915_perf_init(struct drm_i915_private *i915)
 
                perf->i915 = i915;
 
+               ret = oa_init_engine_groups(perf);
+               if (ret) {
+                       drm_err(&i915->drm,
+                               "OA initialization failed %d\n", ret);
+                       return ret;
+               }
+
                oa_init_supported_formats(perf);
        }
 
@@ -4990,10 +5095,15 @@ void i915_perf_sysctl_unregister(void)
 void i915_perf_fini(struct drm_i915_private *i915)
 {
        struct i915_perf *perf = &i915->perf;
+       struct intel_gt *gt;
+       int i;
 
        if (!perf->i915)
                return;
 
+       for_each_gt(gt, perf->i915, i)
+               kfree(gt->perf.group);
+
        idr_for_each(&perf->metrics_idr, destroy_config, perf);
        idr_destroy(&perf->metrics_idr);
 
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h 
b/drivers/gpu/drm/i915/i915_perf_types.h
index e36f046fe2b6..ce99551ad0fd 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -17,6 +17,7 @@
 #include <linux/wait.h>
 #include <uapi/drm/i915_drm.h>
 
+#include "gt/intel_engine_types.h"
 #include "gt/intel_sseu.h"
 #include "i915_reg_defs.h"
 #include "intel_wakeref.h"
@@ -30,6 +31,13 @@ struct i915_vma;
 struct intel_context;
 struct intel_engine_cs;
 
+enum {
+       PERF_GROUP_OAG = 0,
+
+       PERF_GROUP_MAX,
+       PERF_GROUP_INVALID = U32_MAX,
+};
+
 struct i915_oa_format {
        u32 format;
        int size;
@@ -390,6 +398,35 @@ struct i915_oa_ops {
        u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
 };
 
+struct i915_perf_group {
+       /*
+        * @type: Identifier for the OA unit.
+        */
+       u32 oa_unit_id;
+
+       /*
+        * @gt: gt that this group belongs to
+        */
+       struct intel_gt *gt;
+
+       /*
+        * @exclusive_stream: The stream currently using the OA unit. This is
+        * sometimes accessed outside a syscall associated to its file
+        * descriptor.
+        */
+       struct i915_perf_stream *exclusive_stream;
+
+       /*
+        * @num_engines: The number of engines using this OA buffer.
+        */
+       u32 num_engines;
+
+       /*
+        * @engine_mask: A mask of engines using a single OA buffer.
+        */
+       intel_engine_mask_t engine_mask;
+};
+
 struct i915_perf_gt {
        /*
         * Lock associated with anything below within this structure.
@@ -402,12 +439,15 @@ struct i915_perf_gt {
         */
        struct intel_sseu sseu;
 
+       /**
+        * @num_perf_groups: number of perf groups per gt.
+        */
+       u32 num_perf_groups;
+
        /*
-        * @exclusive_stream: The stream currently using the OA unit. This is
-        * sometimes accessed outside a syscall associated to its file
-        * descriptor.
+        * @group: list of OA groups - one for each OA buffer.
         */
-       struct i915_perf_stream *exclusive_stream;
+       struct i915_perf_group *group;
 };
 
 struct i915_perf {
@@ -461,6 +501,9 @@ struct i915_perf {
        unsigned long format_mask[FORMAT_MASK_SIZE];
 
        atomic64_t noa_programming_delay;
+
+       /* oa unit ids */
+       u32 oa_unit_ids;
 };
 
 #endif /* _I915_PERF_TYPES_H_ */
-- 
2.36.1

Reply via email to