We always try to do an unlocked wait before resorting to having a
blocking wait under the mutex - so we very rarely have to sleep under
the struct_mutex. However, when we do we want that wait to be as short
as possible as the struct_mutex is our BKL that will stall the driver and
all clients.

There should be no impact for all typical workloads.

v2: Move down a layer to apply to all waits.
v3: Make the priority boost explicit. This makes the paths where we want
boosting under the mutex clear and prevents boosting priority uselessly
for when we are waiting for idle.

Signed-off-by: Chris Wilson <[email protected]>
Cc: Tvrtko Ursulin <[email protected]>
Cc: Joonas Lahtinen <[email protected]>
Cc: Daniel Vetter <[email protected]>
---
 drivers/gpu/drm/i915/i915_gem.c         | 10 +++++++++-
 drivers/gpu/drm/i915/i915_gem_request.c | 10 ++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h |  7 +++++--
 drivers/gpu/drm/i915/intel_ringbuffer.c |  4 +++-
 4 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0926c291404c..1f53a2a46602 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -330,6 +330,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
        ret = i915_gem_object_wait(obj,
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_LOCKED |
+                                  I915_WAIT_PRIORITY |
                                   I915_WAIT_ALL,
                                   MAX_SCHEDULE_TIMEOUT,
                                   NULL);
@@ -755,7 +756,8 @@ int i915_gem_obj_prepare_shmem_read(struct 
drm_i915_gem_object *obj,
 
        ret = i915_gem_object_wait(obj,
                                   I915_WAIT_INTERRUPTIBLE |
-                                  I915_WAIT_LOCKED,
+                                  I915_WAIT_LOCKED |
+                                  I915_WAIT_PRIORITY,
                                   MAX_SCHEDULE_TIMEOUT,
                                   NULL);
        if (ret)
@@ -806,6 +808,7 @@ int i915_gem_obj_prepare_shmem_write(struct 
drm_i915_gem_object *obj,
        ret = i915_gem_object_wait(obj,
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_LOCKED |
+                                  I915_WAIT_PRIORITY |
                                   I915_WAIT_ALL,
                                   MAX_SCHEDULE_TIMEOUT,
                                   NULL);
@@ -3073,6 +3076,8 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, 
unsigned int flags)
 {
        int ret;
 
+       GEM_BUG_ON(flags & I915_WAIT_PRIORITY);
+
        if (flags & I915_WAIT_LOCKED) {
                struct i915_gem_timeline *tl;
 
@@ -3162,6 +3167,7 @@ i915_gem_object_set_to_gtt_domain(struct 
drm_i915_gem_object *obj, bool write)
        ret = i915_gem_object_wait(obj,
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_LOCKED |
+                                  I915_WAIT_PRIORITY |
                                   (write ? I915_WAIT_ALL : 0),
                                   MAX_SCHEDULE_TIMEOUT,
                                   NULL);
@@ -3284,6 +3290,7 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
                ret = i915_gem_object_wait(obj,
                                           I915_WAIT_INTERRUPTIBLE |
                                           I915_WAIT_LOCKED |
+                                          I915_WAIT_PRIORITY |
                                           I915_WAIT_ALL,
                                           MAX_SCHEDULE_TIMEOUT,
                                           NULL);
@@ -3566,6 +3573,7 @@ i915_gem_object_set_to_cpu_domain(struct 
drm_i915_gem_object *obj, bool write)
        ret = i915_gem_object_wait(obj,
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_LOCKED |
+                                  I915_WAIT_PRIORITY |
                                   (write ? I915_WAIT_ALL : 0),
                                   MAX_SCHEDULE_TIMEOUT,
                                   NULL);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index bacb875a6ef3..1db5c1f5deb9 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1046,6 +1046,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
                   !!lockdep_is_held(&req->i915->drm.struct_mutex) !=
                   !!(flags & I915_WAIT_LOCKED));
 #endif
+       GEM_BUG_ON((flags & I915_WAIT_PRIORITY) && !(flags & I915_WAIT_LOCKED));
        GEM_BUG_ON(timeout < 0);
 
        if (i915_gem_request_completed(req))
@@ -1054,6 +1055,15 @@ long i915_wait_request(struct drm_i915_gem_request *req,
        if (!timeout)
                return -ETIME;
 
+       /* Very rarely do we wait whilst holding the mutex. We try to always
+        * do an unlocked wait before using a locked wait. However, when we
+        * have to resort to a locked wait, we want that wait to be as short
+        * as possible as the struct_mutex is our BKL that will stall the
+        * driver and all clients.
+        */
+       if (flags & I915_WAIT_PRIORITY && req->engine->schedule)
+               req->engine->schedule(req, I915_PRIORITY_MAX);
+
        trace_i915_gem_request_wait_begin(req);
 
        add_wait_queue(&req->execute, &exec);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index 316c86c98b6a..47476f5e3e5f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -297,7 +297,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
        __attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE        BIT(0)
 #define I915_WAIT_LOCKED       BIT(1) /* struct_mutex held, handle GPU reset */
-#define I915_WAIT_ALL          BIT(2) /* used by i915_gem_object_wait() */
+#define I915_WAIT_PRIORITY     BIT(2) /* struct_mutex held, boost priority */
+#define I915_WAIT_ALL          BIT(3) /* used by i915_gem_object_wait() */
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
 
@@ -722,7 +723,9 @@ i915_gem_active_retire(struct i915_gem_active *active,
                return 0;
 
        ret = i915_wait_request(request,
-                               I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+                               I915_WAIT_INTERRUPTIBLE |
+                               I915_WAIT_LOCKED |
+                               I915_WAIT_PRIORITY,
                                MAX_SCHEDULE_TIMEOUT);
        if (ret < 0)
                return ret;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index cf4ec937d923..2f7651ef45d5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2158,7 +2158,9 @@ static int wait_for_space(struct drm_i915_gem_request 
*req, int bytes)
                return -ENOSPC;
 
        timeout = i915_wait_request(target,
-                                   I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+                                   I915_WAIT_INTERRUPTIBLE |
+                                   I915_WAIT_LOCKED |
+                                   I915_WAIT_PRIORITY,
                                    MAX_SCHEDULE_TIMEOUT);
        if (timeout < 0)
                return timeout;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to