On Tue, Oct 11, 2016 at 03:37:57PM +0100, Chris Wilson wrote:
> We want to decouple RPM and struct_mutex, but currently RPM has to walk
> the list of bound objects and remove userspace mmapping before we
> suspend (otherwise userspace may continue to access the GTT whilst it is
> powered down). This currently requires the struct_mutex to walk the
> bound_list, but if we move that to a separate list and lock we can take
> the first step towards removing the struct_mutex.
> 
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c   |  4 ++--
>  drivers/gpu/drm/i915/i915_drv.h       | 20 +++++++++++-------
>  drivers/gpu/drm/i915/i915_gem.c       | 39 
> +++++++++++++++++++++++++++--------
>  drivers/gpu/drm/i915/i915_gem_evict.c |  2 +-
>  4 files changed, 46 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 358663e833d6..d4ecc5283c2a 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -186,11 +186,11 @@ describe_obj(struct seq_file *m, struct 
> drm_i915_gem_object *obj)
>       }
>       if (obj->stolen)
>               seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
> -     if (obj->pin_display || obj->fault_mappable) {
> +     if (obj->pin_display || !list_empty(&obj->userfault_link)) {
>               char s[3], *t = s;
>               if (obj->pin_display)
>                       *t++ = 'p';
> -             if (obj->fault_mappable)
> +             if (!list_empty(&obj->userfault_link))
>                       *t++ = 'f';
>               *t = '\0';
>               seq_printf(m, " (%s mappable)", s);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bf397b643cc0..72b3126c6c74 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1359,6 +1359,14 @@ struct i915_gem_mm {
>        */
>       struct list_head unbound_list;
>  
> +     /** Protects access to the userfault_list */
> +     spinlock_t userfault_lock;
> +
> +     /** List of all objects in gtt_space, currently mmaped by userspace.
> +      * All objects within this list must also be on bound_list.
> +      */
> +     struct list_head userfault_list;
> +
>       /** Usable portion of the GTT for GEM */
>       unsigned long stolen_base; /* limited to low memory (32-bit) */
>  
> @@ -2215,6 +2223,11 @@ struct drm_i915_gem_object {
>       struct drm_mm_node *stolen;
>       struct list_head global_list;
>  
> +     /**
> +      * Whether the object is currently in the GGTT mmap.
> +      */
> +     struct list_head userfault_link;
> +
>       /** Used in execbuf to temporarily hold a ref */
>       struct list_head obj_exec_link;
>  
> @@ -2242,13 +2255,6 @@ struct drm_i915_gem_object {
>        */
>       unsigned int madv:2;
>  
> -     /**
> -      * Whether the current gtt mapping needs to be mappable (and isn't just
> -      * mappable by accident). Track pin and fault separate for a more
> -      * accurate mappable working set.
> -      */
> -     unsigned int fault_mappable:1;
> -
>       /*
>        * Is the object to be mapped as read-only to the GPU
>        * Only honoured if hardware has relevant pte bit
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fdd496e6c081..91910ffe0964 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1850,7 +1850,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct 
> vm_fault *vmf)
>       if (ret)
>               goto err_unpin;
>  
> -     obj->fault_mappable = true;
> +     spin_lock(&dev_priv->mm.userfault_lock);
> +     list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
> +     spin_unlock(&dev_priv->mm.userfault_lock);

I think we need to add it to the list _before_ we start punching in new
ptes. At least if we want to decouple the locking and rpm refcounting a
lot more (which I think we should, I had magic locks that ensure ordering
on their own simply by being a BKL). But right now it's ok, so just a
bikeshed.

> +
>  err_unpin:
>       __i915_vma_unpin(vma);
>  err_unlock:
> @@ -1918,36 +1921,52 @@ err:
>  void
>  i915_gem_release_mmap(struct drm_i915_gem_object *obj)
>  {
> +     struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +     bool zap = false;
> +
>       /* Serialisation between user GTT access and our code depends upon
>        * revoking the CPU's PTE whilst the mutex is held. The next user
>        * pagefault then has to wait until we release the mutex.
> +      *
> +      * Note that RPM complicates somewhat by adding an additional
> +      * requirement that operations to the GGTT be made holding the RPM
> +      * wakeref.
>        */
>       lockdep_assert_held(&obj->base.dev->struct_mutex);
>  
> -     if (!obj->fault_mappable)
> +     spin_lock(&i915->mm.userfault_lock);
> +     if (!list_empty(&obj->userfault_link)) {
> +             list_del_init(&obj->userfault_link);
> +             zap = true;
> +     }
> +     spin_unlock(&i915->mm.userfault_lock);
> +     if (!zap)
>               return;
>  
>       drm_vma_node_unmap(&obj->base.vma_node,
>                          obj->base.dev->anon_inode->i_mapping);
>  
>       /* Ensure that the CPU's PTE are revoked and there are not outstanding
> -      * memory transactions from userspace before we return. The TLB
> -      * flushing implied above by changing the PTE above *should* be
> +      * memory transactions from userspace before we return. The TLB 
> +      * flushing implied above by changing the PTE above *should* be 
>        * sufficient, an extra barrier here just provides us with a bit
>        * of paranoid documentation about our requirement to serialise
> -      * memory writes before touching registers / GSM.
> +      * memory writes before touching registers / GSM. 
>        */
>       wmb();
> -
> -     obj->fault_mappable = false;
>  }
>  
>  void
>  i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
>  {
> -     struct drm_i915_gem_object *obj;
> +     struct drm_i915_gem_object *obj, *on;
        /* protected by dev_priv->mm.userfault_lock */

Since I had to think about it for a while ;-)

> +     struct list_head userfault_list;
> +
> +     spin_lock(&dev_priv->mm.userfault_lock);
> +     list_replace_init(&dev_priv->mm.userfault_list, &userfault_list);
> +     spin_unlock(&dev_priv->mm.userfault_lock);
>  
> -     list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
> +     list_for_each_entry_safe(obj, on, &userfault_list, userfault_link)
>               i915_gem_release_mmap(obj);
>  }
>  
> @@ -4066,6 +4085,7 @@ void i915_gem_object_init(struct drm_i915_gem_object 
> *obj,
>       int i;
>  
>       INIT_LIST_HEAD(&obj->global_list);
> +     INIT_LIST_HEAD(&obj->userfault_link);
>       for (i = 0; i < I915_NUM_ENGINES; i++)
>               init_request_active(&obj->last_read[i],
>                                   i915_gem_object_retire__read);
> @@ -4441,6 +4461,7 @@ int i915_gem_init(struct drm_device *dev)
>       int ret;
>  
>       mutex_lock(&dev->struct_mutex);
> +     spin_lock_init(&dev_priv->mm.userfault_lock);
>  
>       if (!i915.enable_execlists) {
>               dev_priv->gt.resume = intel_legacy_submission_resume;
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
> b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 5b6f81c1dbca..ca175c3063ed 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -55,7 +55,7 @@ mark_free(struct i915_vma *vma, unsigned int flags, struct 
> list_head *unwind)
>       if (WARN_ON(!list_empty(&vma->exec_list)))
>               return false;
>  
> -     if (flags & PIN_NONFAULT && vma->obj->fault_mappable)
> +     if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link))

Racy access of list_empty. Where's our friend COMPUTE_ONCE again? Since
there it's just an optimization there's no issue with racing, except for
diverging control flow in case gcc decides to recompute this.

I think that's the only one I spotted, with that addressed:

Reviewed-by: Daniel Vetter <daniel.vet...@ffwll.ch>

>               return false;
>  
>       list_add(&vma->exec_list, unwind);

Reviewed-by: Daniel Vetter <daniel.vet...@ffwll.ch>
> -- 
> 2.9.3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to