Re: [Intel-gfx] [PATCH 2/3] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start

2019-01-15 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-01-15 11:54:15)
> 
> On 14/01/2019 21:17, Chris Wilson wrote:
> > -static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier 
> > *_mn,
> > - const struct mmu_notifier_range *range)
> > +static struct mutex *__i915_mutex_lock_recursive(struct mutex *m)
> > +{
> > + switch (mutex_trylock_recursive(m)) {
> > + default:
> > + case MUTEX_TRYLOCK_FAILED:
> > + mutex_lock_nested(m, I915_MM_SHRINKER);
> 
> Worth using killable and refactoring the caller to handle the failure? 
> If this path ends up deadlocking one day it would be slightly better 
> that the process can be removed.

Good call.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/3] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start

2019-01-15 Thread Tvrtko Ursulin


On 14/01/2019 21:17, Chris Wilson wrote:

Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu
notifiers") we have been able to report failure from
mmu_invalidate_range_start which allows us to use a trylock on the
struct_mutex to avoid potential recursion and report -EBUSY instead.
Furthermore, this allows us to pull the work into the main callback and
avoid the sleight-of-hand in using a workqueue to avoid lockdep.

However, not all paths to mmu_invalidate_range_start are prepared to
handle failure, so instead of reporting the recursion, deal with it by
propagating the failure upwards, who can decide themselves to handle it
or report it.

v2: Mark up the recursive lock behaviour and comment on the various weak
points.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375
References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu 
notifiers")
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
  drivers/gpu/drm/i915/i915_drv.h |   4 +-
  drivers/gpu/drm/i915/i915_gem.c |  30 +++-
  drivers/gpu/drm/i915/i915_gem_object.h  |   7 +
  drivers/gpu/drm/i915/i915_gem_userptr.c | 220 +++-
  4 files changed, 135 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fa99824f63b3..4b67037c0318 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2931,8 +2931,8 @@ enum i915_mm_subclass { /* lockdep subclass for 
obj->mm.lock/struct_mutex */
I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
  };
  
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,

-enum i915_mm_subclass subclass);
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+   enum i915_mm_subclass subclass);
  void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
  
  enum i915_map_type {

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0bfed33178e1..90c167f71345 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2303,8 +2303,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object 
*obj)
struct sg_table *pages;
  
  	pages = fetch_and_zero(>mm.pages);

-   if (!pages)
-   return NULL;
+   if (IS_ERR_OR_NULL(pages))
+   return pages;
  
  	spin_lock(>mm.obj_lock);

list_del(>mm.link);
@@ -2328,22 +2328,23 @@ __i915_gem_object_unset_pages(struct 
drm_i915_gem_object *obj)
return pages;
  }
  
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,

-enum i915_mm_subclass subclass)
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+   enum i915_mm_subclass subclass)
  {
struct sg_table *pages;
+   int ret;
  
  	if (i915_gem_object_has_pinned_pages(obj))

-   return;
+   return -EBUSY;
  
  	GEM_BUG_ON(obj->bind_count);

-   if (!i915_gem_object_has_pages(obj))
-   return;
  
  	/* May be called by shrinker from within get_pages() (on another bo) */

mutex_lock_nested(>mm.lock, subclass);
-   if (unlikely(atomic_read(>mm.pages_pin_count)))
+   if (unlikely(atomic_read(>mm.pages_pin_count))) {
+   ret = -EBUSY;
goto unlock;
+   }
  
  	/*

 * ->put_pages might need to allocate memory for the bit17 swizzle
@@ -2351,11 +2352,24 @@ void __i915_gem_object_put_pages(struct 
drm_i915_gem_object *obj,
 * lists early.
 */
pages = __i915_gem_object_unset_pages(obj);
+
+   /*
+* XXX Temporary hijinx to avoid updating all backends to handle
+* NULL pages. In the future, when we have more asynchronous
+* get_pages backends we should be better able to handle the
+* cancellation of the async task in a more uniform manner.
+*/
+   if (!pages && !i915_gem_object_needs_async_cancel(obj))
+   pages = ERR_PTR(-EINVAL);
+
if (!IS_ERR(pages))
obj->ops->put_pages(obj, pages);
  
+	ret = 0;

  unlock:
mutex_unlock(>mm.lock);
+
+   return ret;
  }
  
  bool i915_sg_trim(struct sg_table *orig_st)

diff --git a/drivers/gpu/drm/i915/i915_gem_object.h 
b/drivers/gpu/drm/i915/i915_gem_object.h
index ff3da64470dd..cb1b0144d274 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -57,6 +57,7 @@ struct drm_i915_gem_object_ops {
  #define I915_GEM_OBJECT_HAS_STRUCT_PAGE   BIT(0)
  #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
  #define I915_GEM_OBJECT_IS_PROXY  BIT(2)
+#define I915_GEM_OBJECT_ASYNC_CANCEL   BIT(3)
  
  	/* Interface between the GEM object and its backing storage.

 * get_pages() is called once prior to the use of the associated set
@@ -387,6 +388,12 @@ 

Re: [Intel-gfx] [PATCH 2/3] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start

2019-01-15 Thread Tvrtko Ursulin


On 15/01/2019 10:00, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2019-01-15 09:47:45)


On 14/01/2019 21:17, Chris Wilson wrote:

Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu
notifiers") we have been able to report failure from
mmu_invalidate_range_start which allows us to use a trylock on the
struct_mutex to avoid potential recursion and report -EBUSY instead.
Furthermore, this allows us to pull the work into the main callback and
avoid the sleight-of-hand in using a workqueue to avoid lockdep.

However, not all paths to mmu_invalidate_range_start are prepared to
handle failure, so instead of reporting the recursion, deal with it by
propagating the failure upwards, who can decide themselves to handle it
or report it.

v2: Mark up the recursive lock behaviour and comment on the various weak
points.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375
References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu 
notifiers")
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
   drivers/gpu/drm/i915/i915_drv.h |   4 +-
   drivers/gpu/drm/i915/i915_gem.c |  30 +++-
   drivers/gpu/drm/i915/i915_gem_object.h  |   7 +
   drivers/gpu/drm/i915/i915_gem_userptr.c | 220 +++-
   4 files changed, 135 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fa99824f63b3..4b67037c0318 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2931,8 +2931,8 @@ enum i915_mm_subclass { /* lockdep subclass for 
obj->mm.lock/struct_mutex */
   I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
   };
   
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,

-  enum i915_mm_subclass subclass);
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+ enum i915_mm_subclass subclass);
   void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
   
   enum i915_map_type {

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0bfed33178e1..90c167f71345 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2303,8 +2303,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object 
*obj)
   struct sg_table *pages;
   
   pages = fetch_and_zero(>mm.pages);

- if (!pages)
- return NULL;
+ if (IS_ERR_OR_NULL(pages))
+ return pages;
   
   spin_lock(>mm.obj_lock);

   list_del(>mm.link);
@@ -2328,22 +2328,23 @@ __i915_gem_object_unset_pages(struct 
drm_i915_gem_object *obj)
   return pages;
   }
   
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,

-  enum i915_mm_subclass subclass)
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+ enum i915_mm_subclass subclass)
   {
   struct sg_table *pages;
+ int ret;
   
   if (i915_gem_object_has_pinned_pages(obj))

- return;
+ return -EBUSY;
   
   GEM_BUG_ON(obj->bind_count);

- if (!i915_gem_object_has_pages(obj))
- return;
   
   /* May be called by shrinker from within get_pages() (on another bo) */

   mutex_lock_nested(>mm.lock, subclass);
- if (unlikely(atomic_read(>mm.pages_pin_count)))
+ if (unlikely(atomic_read(>mm.pages_pin_count))) {
+ ret = -EBUSY;
   goto unlock;
+ }
   
   /*

* ->put_pages might need to allocate memory for the bit17 swizzle
@@ -2351,11 +2352,24 @@ void __i915_gem_object_put_pages(struct 
drm_i915_gem_object *obj,
* lists early.
*/
   pages = __i915_gem_object_unset_pages(obj);
+
+ /*
+  * XXX Temporary hijinx to avoid updating all backends to handle
+  * NULL pages. In the future, when we have more asynchronous
+  * get_pages backends we should be better able to handle the
+  * cancellation of the async task in a more uniform manner.
+  */
+ if (!pages && !i915_gem_object_needs_async_cancel(obj))
+ pages = ERR_PTR(-EINVAL);
+
   if (!IS_ERR(pages))
   obj->ops->put_pages(obj, pages);
   
+ ret = 0;

   unlock:
   mutex_unlock(>mm.lock);
+
+ return ret;
   }
   
   bool i915_sg_trim(struct sg_table *orig_st)

diff --git a/drivers/gpu/drm/i915/i915_gem_object.h 
b/drivers/gpu/drm/i915/i915_gem_object.h
index ff3da64470dd..cb1b0144d274 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -57,6 +57,7 @@ struct drm_i915_gem_object_ops {
   #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
   #define I915_GEM_OBJECT_IS_SHRINKABLE   BIT(1)
   #define I915_GEM_OBJECT_IS_PROXYBIT(2)
+#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3)
   
   /* Interface between the GEM object and its backing storage.

* get_pages() is called 

Re: [Intel-gfx] [PATCH 2/3] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start

2019-01-15 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-01-15 09:47:45)
> 
> On 14/01/2019 21:17, Chris Wilson wrote:
> > Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu
> > notifiers") we have been able to report failure from
> > mmu_invalidate_range_start which allows us to use a trylock on the
> > struct_mutex to avoid potential recursion and report -EBUSY instead.
> > Furthermore, this allows us to pull the work into the main callback and
> > avoid the sleight-of-hand in using a workqueue to avoid lockdep.
> > 
> > However, not all paths to mmu_invalidate_range_start are prepared to
> > handle failure, so instead of reporting the recursion, deal with it by
> > propagating the failure upwards, who can decide themselves to handle it
> > or report it.
> > 
> > v2: Mark up the recursive lock behaviour and comment on the various weak
> > points.
> > 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375
> > References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu 
> > notifiers")
> > Signed-off-by: Chris Wilson 
> > Cc: Tvrtko Ursulin 
> > ---
> >   drivers/gpu/drm/i915/i915_drv.h |   4 +-
> >   drivers/gpu/drm/i915/i915_gem.c |  30 +++-
> >   drivers/gpu/drm/i915/i915_gem_object.h  |   7 +
> >   drivers/gpu/drm/i915/i915_gem_userptr.c | 220 +++-
> >   4 files changed, 135 insertions(+), 126 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h 
> > b/drivers/gpu/drm/i915/i915_drv.h
> > index fa99824f63b3..4b67037c0318 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2931,8 +2931,8 @@ enum i915_mm_subclass { /* lockdep subclass for 
> > obj->mm.lock/struct_mutex */
> >   I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
> >   };
> >   
> > -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> > -  enum i915_mm_subclass subclass);
> > +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> > + enum i915_mm_subclass subclass);
> >   void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
> >   
> >   enum i915_map_type {
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> > b/drivers/gpu/drm/i915/i915_gem.c
> > index 0bfed33178e1..90c167f71345 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -2303,8 +2303,8 @@ __i915_gem_object_unset_pages(struct 
> > drm_i915_gem_object *obj)
> >   struct sg_table *pages;
> >   
> >   pages = fetch_and_zero(>mm.pages);
> > - if (!pages)
> > - return NULL;
> > + if (IS_ERR_OR_NULL(pages))
> > + return pages;
> >   
> >   spin_lock(>mm.obj_lock);
> >   list_del(>mm.link);
> > @@ -2328,22 +2328,23 @@ __i915_gem_object_unset_pages(struct 
> > drm_i915_gem_object *obj)
> >   return pages;
> >   }
> >   
> > -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> > -  enum i915_mm_subclass subclass)
> > +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> > + enum i915_mm_subclass subclass)
> >   {
> >   struct sg_table *pages;
> > + int ret;
> >   
> >   if (i915_gem_object_has_pinned_pages(obj))
> > - return;
> > + return -EBUSY;
> >   
> >   GEM_BUG_ON(obj->bind_count);
> > - if (!i915_gem_object_has_pages(obj))
> > - return;
> >   
> >   /* May be called by shrinker from within get_pages() (on another bo) 
> > */
> >   mutex_lock_nested(>mm.lock, subclass);
> > - if (unlikely(atomic_read(>mm.pages_pin_count)))
> > + if (unlikely(atomic_read(>mm.pages_pin_count))) {
> > + ret = -EBUSY;
> >   goto unlock;
> > + }
> >   
> >   /*
> >* ->put_pages might need to allocate memory for the bit17 swizzle
> > @@ -2351,11 +2352,24 @@ void __i915_gem_object_put_pages(struct 
> > drm_i915_gem_object *obj,
> >* lists early.
> >*/
> >   pages = __i915_gem_object_unset_pages(obj);
> > +
> > + /*
> > +  * XXX Temporary hijinx to avoid updating all backends to handle
> > +  * NULL pages. In the future, when we have more asynchronous
> > +  * get_pages backends we should be better able to handle the
> > +  * cancellation of the async task in a more uniform manner.
> > +  */
> > + if (!pages && !i915_gem_object_needs_async_cancel(obj))
> > + pages = ERR_PTR(-EINVAL);
> > +
> >   if (!IS_ERR(pages))
> >   obj->ops->put_pages(obj, pages);
> >   
> > + ret = 0;
> >   unlock:
> >   mutex_unlock(>mm.lock);
> > +
> > + return ret;
> >   }
> >   
> >   bool i915_sg_trim(struct sg_table *orig_st)
> > diff --git a/drivers/gpu/drm/i915/i915_gem_object.h 
> > b/drivers/gpu/drm/i915/i915_gem_object.h
> > index ff3da64470dd..cb1b0144d274 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_object.h
> 

Re: [Intel-gfx] [PATCH 2/3] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start

2019-01-15 Thread Tvrtko Ursulin


On 14/01/2019 21:17, Chris Wilson wrote:

Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu
notifiers") we have been able to report failure from
mmu_invalidate_range_start which allows us to use a trylock on the
struct_mutex to avoid potential recursion and report -EBUSY instead.
Furthermore, this allows us to pull the work into the main callback and
avoid the sleight-of-hand in using a workqueue to avoid lockdep.

However, not all paths to mmu_invalidate_range_start are prepared to
handle failure, so instead of reporting the recursion, deal with it by
propagating the failure upwards, who can decide themselves to handle it
or report it.

v2: Mark up the recursive lock behaviour and comment on the various weak
points.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375
References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu 
notifiers")
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
  drivers/gpu/drm/i915/i915_drv.h |   4 +-
  drivers/gpu/drm/i915/i915_gem.c |  30 +++-
  drivers/gpu/drm/i915/i915_gem_object.h  |   7 +
  drivers/gpu/drm/i915/i915_gem_userptr.c | 220 +++-
  4 files changed, 135 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fa99824f63b3..4b67037c0318 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2931,8 +2931,8 @@ enum i915_mm_subclass { /* lockdep subclass for 
obj->mm.lock/struct_mutex */
I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
  };
  
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,

-enum i915_mm_subclass subclass);
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+   enum i915_mm_subclass subclass);
  void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
  
  enum i915_map_type {

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0bfed33178e1..90c167f71345 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2303,8 +2303,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object 
*obj)
struct sg_table *pages;
  
  	pages = fetch_and_zero(>mm.pages);

-   if (!pages)
-   return NULL;
+   if (IS_ERR_OR_NULL(pages))
+   return pages;
  
  	spin_lock(>mm.obj_lock);

list_del(>mm.link);
@@ -2328,22 +2328,23 @@ __i915_gem_object_unset_pages(struct 
drm_i915_gem_object *obj)
return pages;
  }
  
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,

-enum i915_mm_subclass subclass)
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+   enum i915_mm_subclass subclass)
  {
struct sg_table *pages;
+   int ret;
  
  	if (i915_gem_object_has_pinned_pages(obj))

-   return;
+   return -EBUSY;
  
  	GEM_BUG_ON(obj->bind_count);

-   if (!i915_gem_object_has_pages(obj))
-   return;
  
  	/* May be called by shrinker from within get_pages() (on another bo) */

mutex_lock_nested(>mm.lock, subclass);
-   if (unlikely(atomic_read(>mm.pages_pin_count)))
+   if (unlikely(atomic_read(>mm.pages_pin_count))) {
+   ret = -EBUSY;
goto unlock;
+   }
  
  	/*

 * ->put_pages might need to allocate memory for the bit17 swizzle
@@ -2351,11 +2352,24 @@ void __i915_gem_object_put_pages(struct 
drm_i915_gem_object *obj,
 * lists early.
 */
pages = __i915_gem_object_unset_pages(obj);
+
+   /*
+* XXX Temporary hijinx to avoid updating all backends to handle
+* NULL pages. In the future, when we have more asynchronous
+* get_pages backends we should be better able to handle the
+* cancellation of the async task in a more uniform manner.
+*/
+   if (!pages && !i915_gem_object_needs_async_cancel(obj))
+   pages = ERR_PTR(-EINVAL);
+
if (!IS_ERR(pages))
obj->ops->put_pages(obj, pages);
  
+	ret = 0;

  unlock:
mutex_unlock(>mm.lock);
+
+   return ret;
  }
  
  bool i915_sg_trim(struct sg_table *orig_st)

diff --git a/drivers/gpu/drm/i915/i915_gem_object.h 
b/drivers/gpu/drm/i915/i915_gem_object.h
index ff3da64470dd..cb1b0144d274 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -57,6 +57,7 @@ struct drm_i915_gem_object_ops {
  #define I915_GEM_OBJECT_HAS_STRUCT_PAGE   BIT(0)
  #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
  #define I915_GEM_OBJECT_IS_PROXY  BIT(2)
+#define I915_GEM_OBJECT_ASYNC_CANCEL   BIT(3)
  
  	/* Interface between the GEM object and its backing storage.

 * get_pages() is called once prior to the use of the associated set
@@ -387,6 +388,12 @@ 

[Intel-gfx] [PATCH 2/3] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start

2019-01-14 Thread Chris Wilson
Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu
notifiers") we have been able to report failure from
mmu_invalidate_range_start which allows us to use a trylock on the
struct_mutex to avoid potential recursion and report -EBUSY instead.
Furthermore, this allows us to pull the work into the main callback and
avoid the sleight-of-hand in using a workqueue to avoid lockdep.

However, not all paths to mmu_invalidate_range_start are prepared to
handle failure, so instead of reporting the recursion, deal with it by
propagating the failure upwards, who can decide themselves to handle it
or report it.

v2: Mark up the recursive lock behaviour and comment on the various weak
points.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375
References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu 
notifiers")
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.h |   4 +-
 drivers/gpu/drm/i915/i915_gem.c |  30 +++-
 drivers/gpu/drm/i915/i915_gem_object.h  |   7 +
 drivers/gpu/drm/i915/i915_gem_userptr.c | 220 +++-
 4 files changed, 135 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fa99824f63b3..4b67037c0318 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2931,8 +2931,8 @@ enum i915_mm_subclass { /* lockdep subclass for 
obj->mm.lock/struct_mutex */
I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
 };
 
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-enum i915_mm_subclass subclass);
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+   enum i915_mm_subclass subclass);
 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0bfed33178e1..90c167f71345 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2303,8 +2303,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object 
*obj)
struct sg_table *pages;
 
pages = fetch_and_zero(>mm.pages);
-   if (!pages)
-   return NULL;
+   if (IS_ERR_OR_NULL(pages))
+   return pages;
 
spin_lock(>mm.obj_lock);
list_del(>mm.link);
@@ -2328,22 +2328,23 @@ __i915_gem_object_unset_pages(struct 
drm_i915_gem_object *obj)
return pages;
 }
 
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-enum i915_mm_subclass subclass)
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+   enum i915_mm_subclass subclass)
 {
struct sg_table *pages;
+   int ret;
 
if (i915_gem_object_has_pinned_pages(obj))
-   return;
+   return -EBUSY;
 
GEM_BUG_ON(obj->bind_count);
-   if (!i915_gem_object_has_pages(obj))
-   return;
 
/* May be called by shrinker from within get_pages() (on another bo) */
mutex_lock_nested(>mm.lock, subclass);
-   if (unlikely(atomic_read(>mm.pages_pin_count)))
+   if (unlikely(atomic_read(>mm.pages_pin_count))) {
+   ret = -EBUSY;
goto unlock;
+   }
 
/*
 * ->put_pages might need to allocate memory for the bit17 swizzle
@@ -2351,11 +2352,24 @@ void __i915_gem_object_put_pages(struct 
drm_i915_gem_object *obj,
 * lists early.
 */
pages = __i915_gem_object_unset_pages(obj);
+
+   /*
+* XXX Temporary hijinx to avoid updating all backends to handle
+* NULL pages. In the future, when we have more asynchronous
+* get_pages backends we should be better able to handle the
+* cancellation of the async task in a more uniform manner.
+*/
+   if (!pages && !i915_gem_object_needs_async_cancel(obj))
+   pages = ERR_PTR(-EINVAL);
+
if (!IS_ERR(pages))
obj->ops->put_pages(obj, pages);
 
+   ret = 0;
 unlock:
mutex_unlock(>mm.lock);
+
+   return ret;
 }
 
 bool i915_sg_trim(struct sg_table *orig_st)
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h 
b/drivers/gpu/drm/i915/i915_gem_object.h
index ff3da64470dd..cb1b0144d274 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -57,6 +57,7 @@ struct drm_i915_gem_object_ops {
 #define I915_GEM_OBJECT_HAS_STRUCT_PAGEBIT(0)
 #define I915_GEM_OBJECT_IS_SHRINKABLE  BIT(1)
 #define I915_GEM_OBJECT_IS_PROXY   BIT(2)
+#define I915_GEM_OBJECT_ASYNC_CANCEL   BIT(3)
 
/* Interface between the GEM object and its backing storage.
 * get_pages() is called once prior to the use of the associated set
@@ -387,6 +388,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object