From: Tvrtko Ursulin <tvrtko.ursu...@intel.com>

This reduces the cost of the software engine busyness tracking
to a single no-op instruction when there are no listeners.

v2: Rebase and some comments.
v3: Rebase.
v4: Checkpatch fixes.
v5: Rebase.
v6: Use system_long_wq to avoid being blocked by struct_mutex
    users.
v7: Fix bad conflict resolution from last rebase. (Dmitry Rogozhkin)
v8: Rebase.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c         |  54 +++++++++++++++--
 drivers/gpu/drm/i915/intel_engine_cs.c  |  17 ++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h | 101 ++++++++++++++++++++------------
 3 files changed, 130 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 228aa50ce709..e768f33ebb3d 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -501,11 +501,17 @@ static void i915_pmu_enable(struct perf_event *event)
                GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
                GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
                if (engine->pmu.enable_count[sample]++ == 0) {
+                       /*
+                        * Enable engine busy stats tracking if needed or
+                        * alternatively cancel the scheduled disabling of the
+                        * same.
+                        */
                        if (engine_needs_busy_stats(engine) &&
                            !engine->pmu.busy_stats) {
-                               engine->pmu.busy_stats =
-                                       intel_enable_engine_stats(engine) == 0;
-                               WARN_ON_ONCE(!engine->pmu.busy_stats);
+                               engine->pmu.busy_stats = true;
+                               if 
(!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+                                       queue_work(system_long_wq,
+                                                  
&engine->pmu.enable_busy_stats);
                        }
                }
        }
@@ -548,7 +554,15 @@ static void i915_pmu_disable(struct perf_event *event)
                        if (!engine_needs_busy_stats(engine) &&
                            engine->pmu.busy_stats) {
                                engine->pmu.busy_stats = false;
-                               intel_disable_engine_stats(engine);
+                               /*
+                                * We request a delayed disable to handle the
+                                * rapid on/off cycles on events which can
+                                * happen when tools like perf stat start in a
+                                * nicer way.
+                                */
+                               queue_delayed_work(system_long_wq,
+                                                  
&engine->pmu.disable_busy_stats,
+                                                  
round_jiffies_up_relative(HZ));
                        }
                }
        }
@@ -739,9 +753,27 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct 
hlist_node *node)
        return 0;
 }
 
+static void __enable_busy_stats(struct work_struct *work)
+{
+       struct intel_engine_cs *engine =
+               container_of(work, typeof(*engine), pmu.enable_busy_stats);
+
+       WARN_ON_ONCE(intel_enable_engine_stats(engine));
+}
+
+static void __disable_busy_stats(struct work_struct *work)
+{
+       struct intel_engine_cs *engine =
+              container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
+
+       intel_disable_engine_stats(engine);
+}
+
 void i915_pmu_register(struct drm_i915_private *i915)
 {
        int ret;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
 
        if (INTEL_GEN(i915) <= 2) {
                DRM_INFO("PMU not supported for this GPU.");
@@ -775,6 +807,12 @@ void i915_pmu_register(struct drm_i915_private *i915)
        i915->pmu.timer.function = i915_sample;
        i915->pmu.enable = 0;
 
+       for_each_engine(engine, i915, id) {
+               INIT_WORK(&engine->pmu.enable_busy_stats, __enable_busy_stats);
+               INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
+                                 __disable_busy_stats);
+       }
+
        ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
        if (ret == 0)
                return;
@@ -793,6 +831,9 @@ void i915_pmu_register(struct drm_i915_private *i915)
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
 {
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
        if (!i915->pmu.base.event_init)
                return;
 
@@ -804,6 +845,11 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 
        hrtimer_cancel(&i915->pmu.timer);
 
+       for_each_engine(engine, i915, id) {
+               flush_work(&engine->pmu.enable_busy_stats);
+               flush_delayed_work(&engine->pmu.disable_busy_stats);
+       }
+
        perf_pmu_unregister(&i915->pmu.base);
        i915->pmu.base.event_init = NULL;
 }
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index 8db83f504d70..eaf1d31dbf31 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -21,6 +21,7 @@
  * IN THE SOFTWARE.
  *
  */
+#include <linux/static_key.h>
 
 #include "i915_drv.h"
 #include "intel_ringbuffer.h"
@@ -1615,6 +1616,10 @@ intel_engine_lookup_user(struct drm_i915_private *i915, 
u8 class, u8 instance)
        return i915->engine_class[class][instance];
 }
 
+DEFINE_STATIC_KEY_FALSE(i915_engine_stats_key);
+static DEFINE_MUTEX(i915_engine_stats_mutex);
+static int i915_engine_stats_ref;
+
 /**
  * intel_enable_engine_stats() - Enable engine busy tracking on engine
  * @engine: engine to enable stats collection
@@ -1630,6 +1635,8 @@ int intel_enable_engine_stats(struct intel_engine_cs 
*engine)
        if (!i915_modparams.enable_execlists)
                return -ENODEV;
 
+       mutex_lock(&i915_engine_stats_mutex);
+
        spin_lock_irqsave(&engine->stats.lock, flags);
        if (engine->stats.enabled == ~0)
                goto busy;
@@ -1637,10 +1644,16 @@ int intel_enable_engine_stats(struct intel_engine_cs 
*engine)
                engine->stats.enabled_at = ktime_get();
        spin_unlock_irqrestore(&engine->stats.lock, flags);
 
+       if (i915_engine_stats_ref++ == 0)
+               static_branch_enable(&i915_engine_stats_key);
+
+       mutex_unlock(&i915_engine_stats_mutex);
+
        return 0;
 
 busy:
        spin_unlock_irqrestore(&engine->stats.lock, flags);
+       mutex_unlock(&i915_engine_stats_mutex);
 
        return -EBUSY;
 }
@@ -1658,6 +1671,7 @@ void intel_disable_engine_stats(struct intel_engine_cs 
*engine)
        if (!i915_modparams.enable_execlists)
                return;
 
+       mutex_lock(&i915_engine_stats_mutex);
        spin_lock_irqsave(&engine->stats.lock, flags);
        WARN_ON_ONCE(engine->stats.enabled == 0);
        if (--engine->stats.enabled == 0) {
@@ -1667,6 +1681,9 @@ void intel_disable_engine_stats(struct intel_engine_cs 
*engine)
                engine->stats.total = 0;
        }
        spin_unlock_irqrestore(&engine->stats.lock, flags);
+       if (--i915_engine_stats_ref == 0)
+               static_branch_disable(&i915_engine_stats_key);
+       mutex_unlock(&i915_engine_stats_mutex);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ef638b97e46a..7d9506c3efdc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -355,6 +355,22 @@ struct intel_engine_cs {
                 *              requested.
                 */
                bool busy_stats;
+               /**
+                * @enable_busy_stats: Work item for engine busy stats enabling.
+                *
+                * Since the action can sleep it needs to be decoupled from the
+                * perf API callback.
+                */
+               struct work_struct enable_busy_stats;
+               /**
+                * @disable_busy_stats: Work item for busy stats disabling.
+                *
+                * Same as with @enable_busy_stats action, with the difference
+                * that we delay it in case there are rapid enable-disable
+                * actions, which can happen during tool startup (like perf
+                * stat).
+                */
+               struct delayed_work disable_busy_stats;
        } pmu;
 
        /*
@@ -896,59 +912,68 @@ bool intel_engine_can_store_dword(struct intel_engine_cs 
*engine);
 struct intel_engine_cs *
 intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
 
+DECLARE_STATIC_KEY_FALSE(i915_engine_stats_key);
+
 static inline void intel_engine_context_in(struct intel_engine_cs *engine)
 {
        unsigned long flags;
 
-       if (READ_ONCE(engine->stats.enabled) == 0)
-               return;
+       if (static_branch_unlikely(&i915_engine_stats_key)) {
+               if (READ_ONCE(engine->stats.enabled) == 0)
+                       return;
 
-       spin_lock_irqsave(&engine->stats.lock, flags);
+               spin_lock_irqsave(&engine->stats.lock, flags);
 
-       if (engine->stats.enabled > 0) {
-               if (engine->stats.active++ == 0)
-                       engine->stats.start = ktime_get();
-               GEM_BUG_ON(engine->stats.active == 0);
-       }
+                       if (engine->stats.enabled > 0) {
+                               if (engine->stats.active++ == 0)
+                                       engine->stats.start = ktime_get();
+                               GEM_BUG_ON(engine->stats.active == 0);
+                       }
 
-       spin_unlock_irqrestore(&engine->stats.lock, flags);
+               spin_unlock_irqrestore(&engine->stats.lock, flags);
+       }
 }
 
 static inline void intel_engine_context_out(struct intel_engine_cs *engine)
 {
        unsigned long flags;
 
-       if (READ_ONCE(engine->stats.enabled) == 0)
-               return;
-
-       spin_lock_irqsave(&engine->stats.lock, flags);
-
-       if (engine->stats.enabled > 0) {
-               ktime_t last, now = ktime_get();
-
-               if (engine->stats.active && --engine->stats.active == 0) {
-                       /*
-                        * Decrement the active context count and in case GPU
-                        * is now idle add up to the running total.
-                        */
-                       last = ktime_sub(now, engine->stats.start);
-
-                       engine->stats.total = ktime_add(engine->stats.total,
-                                                       last);
-               } else if (engine->stats.active == 0) {
-                       /*
-                        * After turning on engine stats, context out might be
-                        * the first event in which case we account from the
-                        * time stats gathering was turned on.
-                        */
-                       last = ktime_sub(now, engine->stats.enabled_at);
-
-                       engine->stats.total = ktime_add(engine->stats.total,
-                                                       last);
+       if (static_branch_unlikely(&i915_engine_stats_key)) {
+               if (READ_ONCE(engine->stats.enabled) == 0)
+                       return;
+
+               spin_lock_irqsave(&engine->stats.lock, flags);
+
+               if (engine->stats.enabled > 0) {
+                       ktime_t last, now = ktime_get();
+
+                       if (engine->stats.active &&
+                           --engine->stats.active == 0) {
+                               /*
+                                * Decrement the active context count and in
+                                * case GPU is now idle add up to the running
+                                * total.
+                                */
+                               last = ktime_sub(now, engine->stats.start);
+
+                               engine->stats.total =
+                                       ktime_add(engine->stats.total, last);
+                       } else if (engine->stats.active == 0) {
+                               /*
+                                * After turning on engine stats, context out
+                                * might be the first event in which case we
+                                * account from the time stats gathering was
+                                * turned on.
+                                */
+                               last = ktime_sub(now, engine->stats.enabled_at);
+
+                               engine->stats.total =
+                                       ktime_add(engine->stats.total, last);
+                       }
                }
-       }
 
-       spin_unlock_irqrestore(&engine->stats.lock, flags);
+               spin_unlock_irqrestore(&engine->stats.lock, flags);
+       }
 }
 
 int intel_enable_engine_stats(struct intel_engine_cs *engine);
-- 
2.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to