From: Tvrtko Ursulin <tvrtko.ursu...@intel.com>

We add a PMU counter to expose the number of requests with resolved
dependencies waiting for a slot on the GPU to run.

This is useful to analyze the overall load of the system.

v2: Don't limit to gen8+.

v3:
 * Rebase for dynamic sysfs.
 * Drop currently executing requests.

v4:
 * Sync with internal renaming.
 * Drop floating point constant. (Chris Wilson)

v5:
 * Change scale to 1024 for faster arithmetics. (Chris Wilson)

v6:
 * Refactored for timer period accounting.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c         | 19 +++++++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 include/uapi/drm/i915_drm.h             |  7 ++++++-
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 5f8cc3fe1826..41527b682c72 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -16,7 +16,8 @@
        (BIT(I915_SAMPLE_BUSY) | \
         BIT(I915_SAMPLE_WAIT) | \
         BIT(I915_SAMPLE_SEMA) | \
-        BIT(I915_SAMPLE_QUEUED))
+        BIT(I915_SAMPLE_QUEUED) | \
+        BIT(I915_SAMPLE_RUNNABLE))
 
 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
 
@@ -217,6 +218,12 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned 
int period_ns)
                                        
atomic_read(&engine->request_stats.queued),
                                        (u64)period_ns *
                                        I915_SAMPLE_QUEUED_DIVISOR / 1000000);
+
+               if (engine->pmu.enable & BIT(I915_SAMPLE_RUNNABLE))
+                       
add_sample_mult(&engine->pmu.sample[I915_SAMPLE_RUNNABLE],
+                                       engine->request_stats.runnable,
+                                       (u64)period_ns *
+                                       I915_SAMPLE_QUEUED_DIVISOR / 1000000);
        }
 
        if (fw)
@@ -331,6 +338,7 @@ engine_event_status(struct intel_engine_cs *engine,
        case I915_SAMPLE_BUSY:
        case I915_SAMPLE_WAIT:
        case I915_SAMPLE_QUEUED:
+       case I915_SAMPLE_RUNNABLE:
                break;
        case I915_SAMPLE_SEMA:
                if (INTEL_GEN(engine->i915) < 6)
@@ -549,7 +557,8 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
                } else {
                        val = engine->pmu.sample[sample].cur;
 
-                       if (sample == I915_SAMPLE_QUEUED)
+                       if (sample == I915_SAMPLE_QUEUED ||
+                           sample == I915_SAMPLE_RUNNABLE)
                                val = div_u64(val, MSEC_PER_SEC);  /* to qd */
                }
        } else {
@@ -846,6 +855,7 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char 
*name,
 
 /* No brackets or quotes below please. */
 #define I915_SAMPLE_QUEUED_SCALE 0.0009765625
+#define I915_SAMPLE_RUNNABLE_SCALE 0.0009765625
 
 static struct attribute **
 create_event_attributes(struct drm_i915_private *i915)
@@ -871,6 +881,8 @@ create_event_attributes(struct drm_i915_private *i915)
                __engine_event(I915_SAMPLE_WAIT, "wait"),
                __engine_event_scale(I915_SAMPLE_QUEUED, "queued",
                                     __stringify(I915_SAMPLE_QUEUED_SCALE)),
+               __engine_event_scale(I915_SAMPLE_RUNNABLE, "runnable",
+                                    __stringify(I915_SAMPLE_RUNNABLE_SCALE)),
        };
        unsigned int count = 0;
        struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -883,6 +895,9 @@ create_event_attributes(struct drm_i915_private *i915)
        BUILD_BUG_ON(I915_SAMPLE_QUEUED_DIVISOR !=
                     (1 / I915_SAMPLE_QUEUED_SCALE));
 
+       BUILD_BUG_ON(I915_SAMPLE_RUNNABLE_DIVISOR !=
+                    (1 / I915_SAMPLE_RUNNABLE_SCALE));
+
        /* Count how many counters we will be exposing. */
        for (i = 0; i < ARRAY_SIZE(events); i++) {
                if (!config_status(i915, events[i].config))
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 902b63eeaf50..703cea694f0d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -420,7 +420,7 @@ struct intel_engine_cs {
                 *
                 * Our internal timer stores the current counters in this field.
                 */
-#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_QUEUED + 1)
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_RUNNABLE + 1)
                struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
        } pmu;
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 6094cc9ca6d9..cf0265b20e37 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -111,11 +111,13 @@ enum drm_i915_pmu_engine_sample {
        I915_SAMPLE_BUSY = 0,
        I915_SAMPLE_WAIT = 1,
        I915_SAMPLE_SEMA = 2,
-       I915_SAMPLE_QUEUED = 3
+       I915_SAMPLE_QUEUED = 3,
+       I915_SAMPLE_RUNNABLE = 4,
 };
 
  /* Divide counter value by divisor to get the real value. */
 #define I915_SAMPLE_QUEUED_DIVISOR (1024)
+#define I915_SAMPLE_RUNNABLE_DIVISOR (1024)
 
 #define I915_PMU_SAMPLE_BITS (4)
 #define I915_PMU_SAMPLE_MASK (0xf)
@@ -140,6 +142,9 @@ enum drm_i915_pmu_engine_sample {
 #define I915_PMU_ENGINE_QUEUED(class, instance) \
        __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
 
+#define I915_PMU_ENGINE_RUNNABLE(class, instance) \
+       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_RUNNABLE)
+
 #define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
 
 #define I915_PMU_ACTUAL_FREQUENCY      __I915_PMU_OTHER(0)
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to