Our low-level wait routine has evolved from our generic wait interface
that handled unlocked, RPS boosting, waits with time tracking. If we
push our GEM fence tracking to use reservation_objects (required for
handling multiple timelines), we lose the ability to pass the required
information down to i915_wait_request(). However, if we push the extra
functionality from i915_wait_request() to the individual callsites
(i915_gem_object_wait_rendering and i915_gem_wait_ioctl) that make use
of those extras, we can both simplify our low level wait and prepare for
extending the GEM interface for use of reservation_objects.
Signed-off-by: Chris Wilson
---
drivers/gpu/drm/i915/i915_drv.h | 7 +-
drivers/gpu/drm/i915/i915_gem.c | 312 +++-
drivers/gpu/drm/i915/i915_gem_request.c | 119 ++--
drivers/gpu/drm/i915/i915_gem_request.h | 32 ++--
drivers/gpu/drm/i915/i915_gem_userptr.c | 12 +-
drivers/gpu/drm/i915/intel_display.c| 27 +--
drivers/gpu/drm/i915/intel_ringbuffer.c | 14 +-
drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +-
8 files changed, 294 insertions(+), 232 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a38295609000..bc2ab2e2371c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3303,9 +3303,10 @@ int __must_check i915_gem_wait_for_idle(struct
drm_i915_private *dev_priv,
int __must_check i915_gem_suspend(struct drm_device *dev);
void i915_gem_resume(struct drm_device *dev);
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
-int __must_check
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
- bool readonly);
+int i915_gem_object_wait(struct drm_i915_gem_object *obj,
+unsigned int flags,
+long timeout,
+struct intel_rps_client *rps);
int __must_check
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
bool write);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8b68d874bc2f..92a455aa5e15 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -292,7 +292,12 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
* must wait for all rendering to complete to the object (as unbinding
* must anyway), and retire the requests.
*/
- ret = i915_gem_object_wait_rendering(obj, false);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
return ret;
@@ -311,88 +316,171 @@ int i915_gem_object_unbind(struct drm_i915_gem_object
*obj)
return ret;
}
-/**
- * Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT or access from the CPU.
- * @obj: i915 gem object
- * @readonly: waiting for just read access or read-write access
- */
-int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
- bool readonly)
+static long
+i915_gem_object_wait_fence(struct fence *fence,
+ unsigned int flags,
+ long timeout,
+ struct intel_rps_client *rps)
{
- struct reservation_object *resv;
- struct i915_gem_active *active;
- unsigned long active_mask;
- int idx;
+ struct drm_i915_gem_request *rq;
- lockdep_assert_held(>base.dev->struct_mutex);
+ BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
- if (!readonly) {
- active = obj->last_read;
- active_mask = i915_gem_object_get_active(obj);
- } else {
- active_mask = 1;
- active = >last_write;
+ if (test_bit(FENCE_FLAG_SIGNALED_BIT, >flags))
+ return timeout;
+
+ if (!fence_is_i915(fence))
+ return fence_wait_timeout(fence,
+ flags & I915_WAIT_INTERRUPTIBLE,
+ timeout);
+
+ rq = to_request(fence);
+ if (i915_gem_request_completed(rq))
+ goto out;
+
+ /* This client is about to stall waiting for the GPU. In many cases
+* this is undesirable and limits the throughput of the system, as
+* many clients cannot continue processing user input/output whilst
+* blocked. RPS autotuning may take tens of milliseconds to respond
+* to the GPU load and thus incurs additional latency for the client.
+* We can circumvent that by promoting the GPU frequency to maximum
+* before we