[Intel-gfx] [PATCH 2/4] drm/i915/pmu: Fix PMU enable vs execlists tasklet race

2018-02-13 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Commit 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking
inside for busy-stats") added a tasklet_disable call in busy stats
enabling, but we failed to understand that the PMU enable callback runs
as an hard IRQ (IPI).

Consequence of this is that the PMU enable callback can interrupt the
execlists tasklet, and will then deadlock when it calls
intel_engine_stats_enable->tasklet_disable.

To fix this, I realized it is possible to move the engine stats enablement
and disablement to PMU event init and destroy hooks. This allows for much
simpler implementation since those hooks run in normal context (can
sleep).

v2: Extract engine_event_destroy. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin 
Fixes: 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking inside 
for busy-stats")
Testcase: igt/perf_pmu/enable-race-*
Cc: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: Jani Nikula 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
Cc: intel-gfx@lists.freedesktop.org
Reviewed-by: Chris Wilson 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20180205093448.13877-1-tvrtko.ursu...@linux.intel.com
(cherry picked from commit b2f78cda260bc6a1a2d382b1d85a29e69b5b3724)
---
 drivers/gpu/drm/i915/i915_pmu.c | 125 +---
 drivers/gpu/drm/i915/intel_ringbuffer.h |  14 
 2 files changed, 52 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 55a8a1e29424..337eaa6ede52 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -285,26 +285,41 @@ static u64 count_interrupts(struct drm_i915_private *i915)
return sum;
 }
 
-static void i915_pmu_event_destroy(struct perf_event *event)
+static void engine_event_destroy(struct perf_event *event)
 {
-   WARN_ON(event->parent);
+   struct drm_i915_private *i915 =
+   container_of(event->pmu, typeof(*i915), pmu.base);
+   struct intel_engine_cs *engine;
+
+   engine = intel_engine_lookup_user(i915,
+ engine_event_class(event),
+ engine_event_instance(event));
+   if (WARN_ON_ONCE(!engine))
+   return;
+
+   if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
+   intel_engine_supports_stats(engine))
+   intel_disable_engine_stats(engine);
 }
 
-static int engine_event_init(struct perf_event *event)
+static void i915_pmu_event_destroy(struct perf_event *event)
 {
-   struct drm_i915_private *i915 =
-   container_of(event->pmu, typeof(*i915), pmu.base);
+   WARN_ON(event->parent);
 
-   if (!intel_engine_lookup_user(i915, engine_event_class(event),
- engine_event_instance(event)))
-   return -ENODEV;
+   if (is_engine_event(event))
+   engine_event_destroy(event);
+}
 
-   switch (engine_event_sample(event)) {
+static int
+engine_event_status(struct intel_engine_cs *engine,
+   enum drm_i915_pmu_engine_sample sample)
+{
+   switch (sample) {
case I915_SAMPLE_BUSY:
case I915_SAMPLE_WAIT:
break;
case I915_SAMPLE_SEMA:
-   if (INTEL_GEN(i915) < 6)
+   if (INTEL_GEN(engine->i915) < 6)
return -ENODEV;
break;
default:
@@ -314,6 +329,30 @@ static int engine_event_init(struct perf_event *event)
return 0;
 }
 
+static int engine_event_init(struct perf_event *event)
+{
+   struct drm_i915_private *i915 =
+   container_of(event->pmu, typeof(*i915), pmu.base);
+   struct intel_engine_cs *engine;
+   u8 sample;
+   int ret;
+
+   engine = intel_engine_lookup_user(i915, engine_event_class(event),
+ engine_event_instance(event));
+   if (!engine)
+   return -ENODEV;
+
+   sample = engine_event_sample(event);
+   ret = engine_event_status(engine, sample);
+   if (ret)
+   return ret;
+
+   if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
+   ret = intel_enable_engine_stats(engine);
+
+   return ret;
+}
+
 static int i915_pmu_event_init(struct perf_event *event)
 {
struct drm_i915_private *i915 =
@@ -387,7 +426,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
if (WARN_ON_ONCE(!engine)) {
/* Do nothing */
} else if (sample == I915_SAMPLE_BUSY &&
-  engine->pmu.busy_stats) {
+  intel_engine_supports_stats(engine)) {
val = 

[Intel-gfx] [PATCH 2/4] drm/i915/pmu: Fix PMU enable vs execlists tasklet race

2018-02-13 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Commit 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking
inside for busy-stats") added a tasklet_disable call in busy stats
enabling, but we failed to understand that the PMU enable callback runs
as an hard IRQ (IPI).

Consequence of this is that the PMU enable callback can interrupt the
execlists tasklet, and will then deadlock when it calls
intel_engine_stats_enable->tasklet_disable.

To fix this, I realized it is possible to move the engine stats enablement
and disablement to PMU event init and destroy hooks. This allows for much
simpler implementation since those hooks run in normal context (can
sleep).

v2: Extract engine_event_destroy. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin 
Fixes: 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking inside 
for busy-stats")
Testcase: igt/perf_pmu/enable-race-*
Cc: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: Jani Nikula 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
Cc: intel-gfx@lists.freedesktop.org
Reviewed-by: Chris Wilson 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20180205093448.13877-1-tvrtko.ursu...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_pmu.c | 125 +---
 drivers/gpu/drm/i915/intel_ringbuffer.h |  14 
 2 files changed, 52 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 55a8a1e29424..337eaa6ede52 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -285,26 +285,41 @@ static u64 count_interrupts(struct drm_i915_private *i915)
return sum;
 }
 
-static void i915_pmu_event_destroy(struct perf_event *event)
+static void engine_event_destroy(struct perf_event *event)
 {
-   WARN_ON(event->parent);
+   struct drm_i915_private *i915 =
+   container_of(event->pmu, typeof(*i915), pmu.base);
+   struct intel_engine_cs *engine;
+
+   engine = intel_engine_lookup_user(i915,
+ engine_event_class(event),
+ engine_event_instance(event));
+   if (WARN_ON_ONCE(!engine))
+   return;
+
+   if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
+   intel_engine_supports_stats(engine))
+   intel_disable_engine_stats(engine);
 }
 
-static int engine_event_init(struct perf_event *event)
+static void i915_pmu_event_destroy(struct perf_event *event)
 {
-   struct drm_i915_private *i915 =
-   container_of(event->pmu, typeof(*i915), pmu.base);
+   WARN_ON(event->parent);
 
-   if (!intel_engine_lookup_user(i915, engine_event_class(event),
- engine_event_instance(event)))
-   return -ENODEV;
+   if (is_engine_event(event))
+   engine_event_destroy(event);
+}
 
-   switch (engine_event_sample(event)) {
+static int
+engine_event_status(struct intel_engine_cs *engine,
+   enum drm_i915_pmu_engine_sample sample)
+{
+   switch (sample) {
case I915_SAMPLE_BUSY:
case I915_SAMPLE_WAIT:
break;
case I915_SAMPLE_SEMA:
-   if (INTEL_GEN(i915) < 6)
+   if (INTEL_GEN(engine->i915) < 6)
return -ENODEV;
break;
default:
@@ -314,6 +329,30 @@ static int engine_event_init(struct perf_event *event)
return 0;
 }
 
+static int engine_event_init(struct perf_event *event)
+{
+   struct drm_i915_private *i915 =
+   container_of(event->pmu, typeof(*i915), pmu.base);
+   struct intel_engine_cs *engine;
+   u8 sample;
+   int ret;
+
+   engine = intel_engine_lookup_user(i915, engine_event_class(event),
+ engine_event_instance(event));
+   if (!engine)
+   return -ENODEV;
+
+   sample = engine_event_sample(event);
+   ret = engine_event_status(engine, sample);
+   if (ret)
+   return ret;
+
+   if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
+   ret = intel_enable_engine_stats(engine);
+
+   return ret;
+}
+
 static int i915_pmu_event_init(struct perf_event *event)
 {
struct drm_i915_private *i915 =
@@ -387,7 +426,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
if (WARN_ON_ONCE(!engine)) {
/* Do nothing */
} else if (sample == I915_SAMPLE_BUSY &&
-  engine->pmu.busy_stats) {
+  intel_engine_supports_stats(engine)) {
val = ktime_to_ns(intel_engine_get_busy_time(engine));
} else {
val =