Replace the racy continuation check within retire_work with a definite
kill-switch on idling. The race was being exposed by gem_concurrent_blit
where the retire_worker would be terminated too early leaving us
spinning in debugfs/i915_drop_caches with nothing flushing the
retirement queue.

Although that the igt is trying to idle from one child while submitting
from another may be a contributing factor as to why  it runs so slowly...

Testcase: igt/gem_concurrent_blit
Fixes: 79ffac8599c4 ("drm/i915: Invert the GEM wakeref hierarchy")
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_pm.c             | 18 ++++++++++++------
 .../gpu/drm/i915/selftests/mock_gem_device.c   |  1 -
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c 
b/drivers/gpu/drm/i915/i915_gem_pm.c
index ae91ad7cb31e..b239b55f84cd 100644
--- a/drivers/gpu/drm/i915/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/i915_gem_pm.c
@@ -30,15 +30,23 @@ static void idle_work_handler(struct work_struct *work)
 {
        struct drm_i915_private *i915 =
                container_of(work, typeof(*i915), gem.idle_work);
+       bool restart = true;
 
+       cancel_delayed_work_sync(&i915->gem.retire_work);
        mutex_lock(&i915->drm.struct_mutex);
 
        intel_wakeref_lock(&i915->gt.wakeref);
-       if (!intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work))
+       if (!intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work)) {
                i915_gem_park(i915);
+               restart = false;
+       }
        intel_wakeref_unlock(&i915->gt.wakeref);
 
        mutex_unlock(&i915->drm.struct_mutex);
+       if (restart)
+               queue_delayed_work(i915->wq,
+                                  &i915->gem.retire_work,
+                                  round_jiffies_up_relative(HZ));
 }
 
 static void retire_work_handler(struct work_struct *work)
@@ -52,10 +60,9 @@ static void retire_work_handler(struct work_struct *work)
                mutex_unlock(&i915->drm.struct_mutex);
        }
 
-       if (intel_wakeref_active(&i915->gt.wakeref))
-               queue_delayed_work(i915->wq,
-                                  &i915->gem.retire_work,
-                                  round_jiffies_up_relative(HZ));
+       queue_delayed_work(i915->wq,
+                          &i915->gem.retire_work,
+                          round_jiffies_up_relative(HZ));
 }
 
 static int pm_notifier(struct notifier_block *nb,
@@ -140,7 +147,6 @@ void i915_gem_suspend(struct drm_i915_private *i915)
         * Assert that we successfully flushed all the work and
         * reset the GPU back to its idle, low power state.
         */
-       drain_delayed_work(&i915->gem.retire_work);
        GEM_BUG_ON(i915->gt.awake);
        flush_work(&i915->gem.idle_work);
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c 
b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index d919f512042c..9fd02025d382 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -58,7 +58,6 @@ static void mock_device_release(struct drm_device *dev)
        i915_gem_contexts_lost(i915);
        mutex_unlock(&i915->drm.struct_mutex);
 
-       drain_delayed_work(&i915->gem.retire_work);
        flush_work(&i915->gem.idle_work);
        i915_gem_drain_workqueue(i915);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to