i915: Use per object locking instead of struct_mutex for execbuf

Maarten Lankhorst Tue, 10 Dec 2019 02:37:29 -0800

Now that we changed execbuf submission slightly to allow us to do all
pinning in one place, we can now simply replace the struct_mutex lock
with ww versions. All we have to do is a separate path for -EDEADLK
handling, which needs to unpin all gem bo's before dropping the lock,
then starting over.


This finally allows us to do parallel submission.

Signed-off-by: Maarten Lankhorst <[email protected]>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 103 +++++++++---------
 drivers/gpu/drm/i915/i915_cmd_parser.c        |  15 +--
 2 files changed, 51 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 1f41c245665a..f32a3faeb450 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -249,6 +249,8 @@ struct i915_execbuffer {
        /** list of vma that have execobj.relocation_count */
        struct list_head relocs;
 
+       struct i915_gem_ww_ctx ww;
+
        /**
         * Track the most recently used object for relocations, as we
         * frequently have to perform multiple relocations within the same
@@ -833,6 +835,10 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
                struct eb_vma *ev = &eb->vma[i];
                struct i915_vma *vma = ev->vma;
 
+               err = i915_gem_object_lock(vma->obj, &eb->ww);
+               if (err)
+                       return err;
+
                if (eb_pin_vma(eb, entry, ev)) {
                        if (entry->offset != vma->node.start) {
                                entry->offset = vma->node.start | UPDATE;
@@ -1143,7 +1149,7 @@ static int reloc_move_to_gpu(struct i915_request *rq, 
struct i915_vma *vma)
        struct drm_i915_gem_object *obj = vma->obj;
        int err;
 
-       i915_vma_lock(vma);
+       assert_vma_held(vma);
 
        if (obj->cache_dirty & ~obj->cache_coherent)
                i915_gem_clflush_object(obj, 0);
@@ -1153,8 +1159,6 @@ static int reloc_move_to_gpu(struct i915_request *rq, 
struct i915_vma *vma)
        if (err == 0)
                err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 
-       i915_vma_unlock(vma);
-
        return err;
 }
 
@@ -1212,11 +1216,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
        if (err)
                goto skip_request;
 
-       i915_vma_lock(batch);
+       assert_vma_held(batch);
        err = i915_request_await_object(rq, batch->obj, false);
        if (err == 0)
                err = i915_vma_move_to_active(batch, rq, 0);
-       i915_vma_unlock(batch);
        if (err)
                goto skip_request;
 
@@ -1709,7 +1712,10 @@ static int eb_lock_and_parse_cmdbuffer(struct 
i915_execbuffer *eb)
        if (IS_ERR(pool))
                return PTR_ERR(pool);
 
-       err = eb_parse(eb, pool);
+       err = i915_gem_object_lock(pool->obj, &eb->ww);
+       if (!err)
+               err = eb_parse(eb, pool);
+
        if (err)
                intel_engine_pool_put(pool);
 
@@ -1718,7 +1724,6 @@ static int eb_lock_and_parse_cmdbuffer(struct 
i915_execbuffer *eb)
 
 static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
 {
-       struct drm_device *dev = &eb->i915->drm;
        bool have_copy = false;
        struct eb_vma *ev;
        int err = 0;
@@ -1731,7 +1736,7 @@ static noinline int eb_relocate_slow(struct 
i915_execbuffer *eb)
 
        /* We may process another execbuffer during the unlock... */
        eb_release_vmas(eb);
-       mutex_unlock(&dev->struct_mutex);
+       i915_gem_ww_ctx_fini(&eb->ww);
 
        /*
         * We take 3 passes through the slowpatch.
@@ -1756,20 +1761,17 @@ static noinline int eb_relocate_slow(struct 
i915_execbuffer *eb)
                err = 0;
        }
        if (err) {
-               mutex_lock(&dev->struct_mutex);
+               i915_gem_ww_ctx_init(&eb->ww, true);
                goto out;
        }
 
        /* A frequent cause for EAGAIN are currently unavailable client pages */
        flush_workqueue(eb->i915->mm.userptr_wq);
 
-       err = mutex_lock_interruptible(&dev->struct_mutex);
-       if (err) {
-               mutex_lock(&dev->struct_mutex);
-               goto out;
-       }
+       i915_gem_ww_ctx_init(&eb->ww, true);
 
        /* reacquire the objects */
+repeat_validate:
        err = eb_validate_vmas(eb);
        if (err)
                goto err;
@@ -1802,6 +1804,13 @@ static noinline int eb_relocate_slow(struct 
i915_execbuffer *eb)
         */
 
 err:
+       if (err == -EDEADLK) {
+               eb_release_vmas(eb);
+               err = i915_gem_ww_ctx_backoff(&eb->ww);
+               if (!err)
+                       goto repeat_validate;
+       }
+
        if (err == -EAGAIN)
                goto repeat;
 
@@ -1834,10 +1843,19 @@ static int eb_relocate_and_parse_cmdbuf(struct 
i915_execbuffer *eb)
        if (err)
                return err;
 
+retry:
        err = eb_validate_vmas(eb);
        if (!err)
                err = eb_lock_and_parse_cmdbuffer(eb);
-       if (err)
+
+       if (err == -EDEADLK) {
+               err = i915_gem_ww_ctx_backoff(&eb->ww);
+               if (err)
+                       return err;
+
+               goto retry;
+       }
+       else if (err)
                goto slow;
 
        /* The objects are in their final locations, apply the relocations. */
@@ -1853,43 +1871,26 @@ static int eb_relocate_and_parse_cmdbuf(struct 
i915_execbuffer *eb)
        return 0;
 
 slow:
-       return eb_relocate_slow(eb);
+       err = eb_relocate_slow(eb);
+       if (err)
+               /*
+                * If the user expects the execobject.offset and
+                * reloc.presumed_offset to be an exact match,
+                * as for using NO_RELOC, then we cannot update
+                * the execobject.offset until we have completed
+                * relocation.
+                */
+               eb->args->flags &= ~__EXEC_HAS_RELOC;
+
+       return err;
 }
 
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
 {
        const unsigned int count = eb->buffer_count;
-       struct ww_acquire_ctx acquire;
-       unsigned int i;
+       unsigned int i = count;
        int err = 0;
 
-       ww_acquire_init(&acquire, &reservation_ww_class);
-
-       for (i = 0; i < count; i++) {
-               struct eb_vma *ev = &eb->vma[i];
-               struct i915_vma *vma = ev->vma;
-
-               err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
-               if (err == -EDEADLK) {
-                       GEM_BUG_ON(i == 0);
-                       do {
-                               int j = i - 1;
-
-                               ww_mutex_unlock(&eb->vma[j].vma->resv->lock);
-
-                               swap(eb->vma[i],  eb->vma[j]);
-                       } while (--i);
-
-                       err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
-                                                              &acquire);
-               }
-               if (err == -EALREADY)
-                       err = 0;
-               if (err)
-                       break;
-       }
-       ww_acquire_done(&acquire);
-
        while (i--) {
                struct eb_vma *ev = &eb->vma[i];
                struct i915_vma *vma = ev->vma;
@@ -1934,15 +1935,12 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
                if (err == 0)
                        err = i915_vma_move_to_active(vma, eb->request, flags);
 
-               i915_vma_unlock(vma);
-
                __eb_unreserve_vma(vma, flags);
                if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
                        i915_vma_put(vma);
 
                ev->vma = NULL;
        }
-       ww_acquire_fini(&acquire);
 
        if (unlikely(err))
                goto err_skip;
@@ -2589,14 +2587,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        if (unlikely(err))
                goto err_context;
 
-       err = mutex_lock_interruptible(&dev->struct_mutex);
-       if (err)
-               goto err_engine;
+       i915_gem_ww_ctx_init(&eb.ww, true);
 
        err = eb_relocate_and_parse_cmdbuf(&eb);
        if (err)
                goto err_vma;
 
+       ww_acquire_done(&eb.ww.ctx);
+
        /*
         * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
         * batch" bit. Hence we need to pin secure batches into the global gtt.
@@ -2701,8 +2699,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_vma:
        if (eb.exec)
                eb_release_vmas(&eb);
-       mutex_unlock(&dev->struct_mutex);
-err_engine:
+       i915_gem_ww_ctx_fini(&eb.ww);
        eb_unpin_engine(&eb);
 err_context:
        i915_gem_context_put(eb.gem_context);
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index f91c6d214634..f5e821bf5d59 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1136,31 +1136,19 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dst_obj,
        void *dst, *src;
        int ret;
 
-       ret = i915_gem_object_lock_interruptible(dst_obj, NULL);
+       ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
        if (ret)
                return ERR_PTR(ret);
 
-       ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
-       if (ret) {
-               i915_gem_object_unlock(dst_obj);
-               return ERR_PTR(ret);
-       }
-
        dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB);
        i915_gem_object_finish_access(dst_obj);
-       i915_gem_object_unlock(dst_obj);
 
        if (IS_ERR(dst))
                return dst;
 
-       ret = i915_gem_object_lock_interruptible(src_obj, NULL);
-       if (ret)
-               return ERR_PTR(ret);
-
        ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
        if (ret) {
                i915_gem_object_unpin_map(dst_obj);
-               i915_gem_object_unlock(src_obj);
                return ERR_PTR(ret);
        }
 
@@ -1209,7 +1197,6 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dst_obj,
        }
 
        i915_gem_object_finish_access(src_obj);
-       i915_gem_object_unlock(src_obj);
 
        /* dst_obj is returned with vmap pinned */
        *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 7/7] drm/i915: Use per object locking instead of struct_mutex for execbuf

Reply via email to