On 26/03/2018 15:59, Chris Wilson wrote:
We've always been blatantly ignoring mmu_notifier.h:

  * Invalidation of multiple concurrent ranges may be
  * optionally permitted by the driver. Either way the
  * establishment of sptes is forbidden in the range passed to
  * invalidate_range_begin/end for the whole duration of the
  * invalidate_range_begin/end critical section.

by not preventing concurrent calls to gup while an invalidate_range is
being processed. Wrap gup and invalidate_range in a paired rw_semaphore
to allow concurrent lookups, that are then interrupted and disabled
across the invalidate_range. Further refinement can be applied by
tracking the invalidate_range versus individual gup, which should be
done using a common set of helpers for all mmu_notifier subsystems share
the same need. I hear HMM is one such toolbox...

For the time being, assume concurrent invalidate and lookup are rare,
but not rare enough to completely ignore.

I think I suggested a few times we should just "ban" the object on first invalidate and never ever for its lifetime allow it to obtain backing store again. I just don't remember why we decided not to go with that approach. :( Thinking about it now I still don't see that this restriction would be a problem and would simplify things.

With more locks I am quite fearful what lockdep will say, but lets see...


Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vet...@ffwll.ch>
Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Cc: Michał Winiarski <michal.winiar...@intel.com>
---
  drivers/gpu/drm/i915/i915_gem_userptr.c | 29 ++++++++++++++++++++++++++++-
  1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/i915_gem_userptr.c
index d596a8302ca3..938107dffd37 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -47,6 +47,7 @@ struct i915_mm_struct {
struct i915_mmu_notifier {
        spinlock_t lock;
+       struct rw_semaphore sem;
        struct hlist_node node;
        struct mmu_notifier mn;
        struct rb_root_cached objects;
@@ -123,6 +124,8 @@ static void 
i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
        struct interval_tree_node *it;
        LIST_HEAD(cancelled);
+ down_write(&mn->sem);
+
        if (RB_EMPTY_ROOT(&mn->objects.rb_root))
                return;
@@ -156,8 +159,20 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
                flush_workqueue(mn->wq);
  }
+static void i915_gem_userptr_mn_invalidate_range_end(struct mmu_notifier *_mn,
+                                                    struct mm_struct *mm,
+                                                    unsigned long start,
+                                                    unsigned long end)
+{
+       struct i915_mmu_notifier *mn =
+               container_of(_mn, struct i915_mmu_notifier, mn);
+
+       up_write(&mn->sem);
+}
+
  static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
        .invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+       .invalidate_range_end = i915_gem_userptr_mn_invalidate_range_end,
  };
static struct i915_mmu_notifier *
@@ -170,6 +185,7 @@ i915_mmu_notifier_create(struct mm_struct *mm)
                return ERR_PTR(-ENOMEM);
spin_lock_init(&mn->lock);
+       init_rwsem(&mn->sem);
        mn->mn.ops = &i915_gem_userptr_notifier;
        mn->objects = RB_ROOT_CACHED;
        mn->wq = alloc_workqueue("i915-userptr-release",
@@ -504,12 +520,15 @@ __i915_gem_userptr_get_pages_worker(struct work_struct 
*_work)
pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
        if (pvec != NULL) {
+               struct i915_mmu_notifier *mn = obj->userptr.mm->mn;
                struct mm_struct *mm = obj->userptr.mm->mm;
                unsigned int flags = 0;
if (!obj->userptr.read_only)
                        flags |= FOLL_WRITE;
+ down_read(&mn->sem);
+
                ret = -EFAULT;
                if (mmget_not_zero(mm)) {
                        down_read(&mm->mmap_sem);
@@ -528,6 +547,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct 
*_work)
                        up_read(&mm->mmap_sem);
                        mmput(mm);
                }
+
+               up_read(&mn->sem);
        }
mutex_lock(&obj->mm.lock);
@@ -636,15 +657,21 @@ static int i915_gem_userptr_get_pages(struct 
drm_i915_gem_object *obj)
        pinned = 0;
if (mm == current->mm) {
+               struct i915_mmu_notifier *mn = obj->userptr.mm->mn;
+
                pvec = kvmalloc_array(num_pages, sizeof(struct page *),
                                      GFP_KERNEL |
                                      __GFP_NORETRY |
                                      __GFP_NOWARN);
-               if (pvec) /* defer to worker if malloc fails */
+
+               /* defer to worker if malloc fails */
+               if (pvec && down_read_trylock(&mn->sem)) {
                        pinned = __get_user_pages_fast(obj->userptr.ptr,
                                                       num_pages,
                                                       !obj->userptr.read_only,
                                                       pvec);
+                       up_read(&mn->sem);
+               }
        }
active = false;


Simple enough but I don't dare say anything until results from shards arrive.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to