On ma, 2016-08-01 at 19:22 +0100, Chris Wilson wrote:
> By applying the same logic as for wait-ioctl, we can query whether a
> request has completed without holding struct_mutex. The biggest impact
> system-wide is removing the flush_active and the contention that causes.
> 
> Testcase: igt/gem_busy
> Signed-off-by: Chris Wilson <[email protected]>
> Cc: Akash Goel <[email protected]>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 110 
> +++++++++++++++++++++++++++++-----------
>  1 file changed, 80 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 43069b05bdd2..f2f70f5ff9f4 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3721,49 +3721,99 @@ i915_gem_object_ggtt_unpin_view(struct 
> drm_i915_gem_object *obj,
>       i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
>  }
>  
> +static __always_inline unsigned
> +__busy_read_flag(const struct drm_i915_gem_request *request)
> +{
> +     return 0x10000 << request->engine->exec_id;
> +}
> +
> +static __always_inline unsigned int
> +__busy_write_flag(const struct drm_i915_gem_request *request)
> +{
> +     return request->engine->exec_id;

Just realized (to my horror) this is not a flag, it's a bare ID, so
better not call the function _flag, but rather _id?

> +}
> +
> +static __always_inline unsigned
> +__busy_flag(const struct i915_gem_active *active,
> +         unsigned int (*flag)(const struct drm_i915_gem_request *))
> +{
> +     struct drm_i915_gem_request *request;
> +
> +     request = rcu_dereference(active->request);
> +     if (!request || i915_gem_request_completed(request))
> +             return 0;
> +
> +     return flag(request);
> +}
> +
> +static inline unsigned
> +busy_read_flag(const struct i915_gem_active *active)
> +{
> +     return __busy_flag(active, __busy_read_flag);
> +}
> +
> +static inline unsigned
> +busy_write_flag(const struct i915_gem_active *active)
> +{
> +     return __busy_flag(active, __busy_write_flag);
> +}
> +
>  int
>  i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>                   struct drm_file *file)
>  {
>       struct drm_i915_gem_busy *args = data;
>       struct drm_i915_gem_object *obj;
> -     int ret;
> -
> -     ret = i915_mutex_lock_interruptible(dev);
> -     if (ret)
> -             return ret;
> +     unsigned long active;
>  
>       obj = i915_gem_object_lookup(file, args->handle);
> -     if (!obj) {
> -             ret = -ENOENT;
> -             goto unlock;
> -     }
> +     if (!obj)
> +             return -ENOENT;
>  
> -     /* Count all active objects as busy, even if they are currently not used
> -      * by the gpu. Users of this interface expect objects to eventually
> -      * become non-busy without any further actions.
> -      */
>       args->busy = 0;
> -     if (i915_gem_object_is_active(obj)) {
> -             struct drm_i915_gem_request *req;
> -             int i;
> +     active = __I915_BO_ACTIVE(obj);
> +     if (active) {
> +             int idx;
>  
> -             for (i = 0; i < I915_NUM_ENGINES; i++) {
> -                     req = i915_gem_active_peek(&obj->last_read[i],
> -                                                
> &obj->base.dev->struct_mutex);
> -                     if (req)
> -                             args->busy |= 1 << (16 + req->engine->exec_id);
> -             }
> -             req = i915_gem_active_peek(&obj->last_write,
> -                                        &obj->base.dev->struct_mutex);
> -             if (req)
> -                     args->busy |= req->engine->exec_id;
> +             /* Yes, the lookups are intentionally racy.
> +              *
> +              * Even though we guard the pointer lookup by RCU, that only
> +              * guarantees that the pointer and its contents remain
> +              * dereferencable and does *not* mean that the request we
> +              * have is the same as the one being tracked by the object.
> +              *
> +              * Consider that we lookup the request just as it is being
> +              * retired and free. We take a local copy of the pointer,

still s/free/freed/

> +              * but before we add its engine into the busy set, the other
> +              * thread reallocates it and assigns it to a task on another
> +              * engine with a fresh and incomplete seqno.
> +              *
> +              * So after we lookup the engine's id, we double check that
> +              * the active request is the same and only then do we add it
> +              * into the busy set.
> +              */
> +             rcu_read_lock();
> +
> +             for_each_active(active, idx)
> +                     args->busy |= busy_read_flag(&obj->last_read[idx]);

So you mean this is double check against __I915_BO_ACTIVE, right?

We're getting there, though. With above fixed;

Reviewed-by: Joonas Lahtinen <[email protected]>
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to