On Tue, Oct 11, 2016 at 03:37:57PM +0100, Chris Wilson wrote:
> We want to decouple RPM and struct_mutex, but currently RPM has to walk
> the list of bound objects and remove userspace mmapping before we
> suspend (otherwise userspace may continue to access the GTT whilst it is
> powered down). This currently requires the struct_mutex to walk the
> bound_list, but if we move that to a separate list and lock we can take
> the first step towards removing the struct_mutex.
>
> Signed-off-by: Chris Wilson
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 4 ++--
> drivers/gpu/drm/i915/i915_drv.h | 20 +++---
> drivers/gpu/drm/i915/i915_gem.c | 39
> +++
> drivers/gpu/drm/i915/i915_gem_evict.c | 2 +-
> 4 files changed, 46 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 358663e833d6..d4ecc5283c2a 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -186,11 +186,11 @@ describe_obj(struct seq_file *m, struct
> drm_i915_gem_object *obj)
> }
> if (obj->stolen)
> seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
> - if (obj->pin_display || obj->fault_mappable) {
> + if (obj->pin_display || !list_empty(>userfault_link)) {
> char s[3], *t = s;
> if (obj->pin_display)
> *t++ = 'p';
> - if (obj->fault_mappable)
> + if (!list_empty(>userfault_link))
> *t++ = 'f';
> *t = '\0';
> seq_printf(m, " (%s mappable)", s);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bf397b643cc0..72b3126c6c74 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1359,6 +1359,14 @@ struct i915_gem_mm {
>*/
> struct list_head unbound_list;
>
> + /** Protects access to the userfault_list */
> + spinlock_t userfault_lock;
> +
> + /** List of all objects in gtt_space, currently mmaped by userspace.
> + * All objects within this list must also be on bound_list.
> + */
> + struct list_head userfault_list;
> +
> /** Usable portion of the GTT for GEM */
> unsigned long stolen_base; /* limited to low memory (32-bit) */
>
> @@ -2215,6 +2223,11 @@ struct drm_i915_gem_object {
> struct drm_mm_node *stolen;
> struct list_head global_list;
>
> + /**
> + * Whether the object is currently in the GGTT mmap.
> + */
> + struct list_head userfault_link;
> +
> /** Used in execbuf to temporarily hold a ref */
> struct list_head obj_exec_link;
>
> @@ -2242,13 +2255,6 @@ struct drm_i915_gem_object {
>*/
> unsigned int madv:2;
>
> - /**
> - * Whether the current gtt mapping needs to be mappable (and isn't just
> - * mappable by accident). Track pin and fault separate for a more
> - * accurate mappable working set.
> - */
> - unsigned int fault_mappable:1;
> -
> /*
>* Is the object to be mapped as read-only to the GPU
>* Only honoured if hardware has relevant pte bit
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fdd496e6c081..91910ffe0964 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1850,7 +1850,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct
> vm_fault *vmf)
> if (ret)
> goto err_unpin;
>
> - obj->fault_mappable = true;
> + spin_lock(_priv->mm.userfault_lock);
> + list_add(>userfault_link, _priv->mm.userfault_list);
> + spin_unlock(_priv->mm.userfault_lock);
I think we need to add it to the list _before_ we start punching in new
ptes. At least if we want to decouple the locking and rpm refcounting a
lot more (which I think we should, I had magic locks that ensure ordering
on their own simply by being a BKL). But right now it's ok, so just a
bikeshed.
> +
> err_unpin:
> __i915_vma_unpin(vma);
> err_unlock:
> @@ -1918,36 +1921,52 @@ err:
> void
> i915_gem_release_mmap(struct drm_i915_gem_object *obj)
> {
> + struct drm_i915_private *i915 = to_i915(obj->base.dev);
> + bool zap = false;
> +
> /* Serialisation between user GTT access and our code depends upon
>* revoking the CPU's PTE whilst the mutex is held. The next user
>* pagefault then has to wait until we release the mutex.
> + *
> + * Note that RPM complicates somewhat by adding an additional
> + * requirement that operations to the GGTT be made holding the RPM
> + * wakeref.
>*/
> lockdep_assert_held(>base.dev->struct_mutex);
>
> - if (!obj->fault_mappable)
> + spin_lock(>mm.userfault_lock);
> + if (!list_empty(>userfault_link)) {
> +