[Intel-gfx] Patch "drm/i915: Fix ref->mutex deadlock in i915_active_wait()" has been added to the 5.4-stable tree

2020-04-11 Thread gregkh


This is a note to let you know that I've just added the patch titled

drm/i915: Fix ref->mutex deadlock in i915_active_wait()

to the 5.4-stable tree which can be found at:

http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
 drm-i915-fix-ref-mutex-deadlock-in-i915_active_wait.patch
and it can be found in the queue-5.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let  know about it.


>From sul...@kerneltoast.com  Sat Apr 11 13:39:11 2020
From: Sultan Alsawaf 
Date: Tue,  7 Apr 2020 13:32:22 -0700
Subject: drm/i915: Fix ref->mutex deadlock in i915_active_wait()
To: sta...@vger.kernel.org
Cc: Greg KH , Jani Nikula , Joonas 
Lahtinen , Rodrigo Vivi 
, David Airlie , Daniel Vetter 
, Chris Wilson , 
intel-gfx@lists.freedesktop.org, dri-de...@lists.freedesktop.org, Sultan 
Alsawaf 
Message-ID: <20200407203222.2493-1-sul...@kerneltoast.com>

From: Sultan Alsawaf 

The following deadlock exists in i915_active_wait() due to a double lock
on ref->mutex (call chain listed in order from top to bottom):
 i915_active_wait();
 mutex_lock_interruptible(>mutex); <-- ref->mutex first acquired
 i915_active_request_retire();
 node_retire();
 active_retire();
 mutex_lock_nested(>mutex, SINGLE_DEPTH_NESTING); <-- DEADLOCK

Fix the deadlock by skipping the second ref->mutex lock when
active_retire() is called through i915_active_request_retire().

Note that this bug only affects 5.4 and has since been fixed in 5.5.
Normally, a backport of the fix from 5.5 would be in order, but the
patch set that fixes this deadlock involves massive changes that are
neither feasible nor desirable for backporting [1][2][3]. Therefore,
this small patch was made to address the deadlock specifically for 5.4.

[1] 274cbf20fd10 ("drm/i915: Push the i915_active.retire into a worker")
[2] 093b92287363 ("drm/i915: Split i915_active.mutex into an irq-safe spinlock 
for the rbtree")
[3] 750bde2fd4ff ("drm/i915: Serialise with remote retirement")

Fixes: 12c255b5dad1 ("drm/i915: Provide an i915_active.acquire callback")
Cc:  # 5.4.x
Signed-off-by: Sultan Alsawaf 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/gpu/drm/i915/i915_active.c |   29 +++--
 drivers/gpu/drm/i915/i915_active.h |4 ++--
 2 files changed, 21 insertions(+), 12 deletions(-)

--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -121,7 +121,7 @@ static inline void debug_active_assert(s
 #endif
 
 static void
-__active_retire(struct i915_active *ref)
+__active_retire(struct i915_active *ref, bool lock)
 {
struct active_node *it, *n;
struct rb_root root;
@@ -138,7 +138,8 @@ __active_retire(struct i915_active *ref)
retire = true;
}
 
-   mutex_unlock(>mutex);
+   if (likely(lock))
+   mutex_unlock(>mutex);
if (!retire)
return;
 
@@ -153,21 +154,28 @@ __active_retire(struct i915_active *ref)
 }
 
 static void
-active_retire(struct i915_active *ref)
+active_retire(struct i915_active *ref, bool lock)
 {
GEM_BUG_ON(!atomic_read(>count));
if (atomic_add_unless(>count, -1, 1))
return;
 
/* One active may be flushed from inside the acquire of another */
-   mutex_lock_nested(>mutex, SINGLE_DEPTH_NESTING);
-   __active_retire(ref);
+   if (likely(lock))
+   mutex_lock_nested(>mutex, SINGLE_DEPTH_NESTING);
+   __active_retire(ref, lock);
 }
 
 static void
 node_retire(struct i915_active_request *base, struct i915_request *rq)
 {
-   active_retire(node_from_active(base)->ref);
+   active_retire(node_from_active(base)->ref, true);
+}
+
+static void
+node_retire_nolock(struct i915_active_request *base, struct i915_request *rq)
+{
+   active_retire(node_from_active(base)->ref, false);
 }
 
 static struct i915_active_request *
@@ -364,7 +372,7 @@ int i915_active_acquire(struct i915_acti
 void i915_active_release(struct i915_active *ref)
 {
debug_active_assert(ref);
-   active_retire(ref);
+   active_retire(ref, true);
 }
 
 static void __active_ungrab(struct i915_active *ref)
@@ -391,7 +399,7 @@ void i915_active_ungrab(struct i915_acti
 {
GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, >flags));
 
-   active_retire(ref);
+   active_retire(ref, true);
__active_ungrab(ref);
 }
 
@@ -421,12 +429,13 @@ int i915_active_wait(struct i915_active
break;
}
 
-   err = i915_active_request_retire(>base, BKL(ref));
+   err = i915_active_request_retire(>base, BKL(ref),
+node_retire_nolock);
if (err)
break;
}
 
-   __active_retire(ref);
+   __active_retire(ref, true);
if (err)
return err;
 
--- a/drivers/gpu/drm/i915/i915_active.h
+++ 

[Intel-gfx] Patch "drm/i915: Fix ref->mutex deadlock in i915_active_wait()" has been added to the 5.4-stable tree

2020-04-10 Thread gregkh


This is a note to let you know that I've just added the patch titled

drm/i915: Fix ref->mutex deadlock in i915_active_wait()

to the 5.4-stable tree which can be found at:

http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
 drm-i915-fix-ref-mutex-deadlock-in-i915_active_wait.patch
and it can be found in the queue-5.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let  know about it.


>From sul...@kerneltoast.com  Fri Apr 10 11:07:34 2020
From: Sultan Alsawaf 
Date: Tue,  7 Apr 2020 00:18:09 -0700
Subject: drm/i915: Fix ref->mutex deadlock in i915_active_wait()
To: Greg KH 
Cc: sta...@vger.kernel.org, Jani Nikula , Joonas 
Lahtinen , Rodrigo Vivi 
, David Airlie , Daniel Vetter 
, Chris Wilson , 
intel-gfx@lists.freedesktop.org, dri-de...@lists.freedesktop.org, Sultan 
Alsawaf 
Message-ID: <20200407071809.3148-1-sul...@kerneltoast.com>

From: Sultan Alsawaf 

The following deadlock exists in i915_active_wait() due to a double lock
on ref->mutex (call chain listed in order from top to bottom):
 i915_active_wait();
 mutex_lock_interruptible(>mutex); <-- ref->mutex first acquired
 i915_active_request_retire();
 node_retire();
 active_retire();
 mutex_lock_nested(>mutex, SINGLE_DEPTH_NESTING); <-- DEADLOCK

Fix the deadlock by skipping the second ref->mutex lock when
active_retire() is called through i915_active_request_retire().

Note that this bug only affects 5.4 and has since been fixed in 5.5.
Normally, a backport of the fix from 5.5 would be in order, but the
patch set that fixes this deadlock involves massive changes that are
neither feasible nor desirable for backporting [1][2][3]. Therefore,
this small patch was made to address the deadlock specifically for 5.4.

[1] 274cbf20fd10 ("drm/i915: Push the i915_active.retire into a worker")
[2] 093b92287363 ("drm/i915: Split i915_active.mutex into an irq-safe spinlock 
for the rbtree")
[3] 750bde2fd4ff ("drm/i915: Serialise with remote retirement")

Fixes: 12c255b5dad1 ("drm/i915: Provide an i915_active.acquire callback")
Cc:  # 5.4.x
Signed-off-by: Sultan Alsawaf 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/gpu/drm/i915/i915_active.c |   27 +++
 drivers/gpu/drm/i915/i915_active.h |4 ++--
 2 files changed, 25 insertions(+), 6 deletions(-)

--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -120,13 +120,17 @@ static inline void debug_active_assert(s
 
 #endif
 
+#define I915_ACTIVE_RETIRE_NOLOCK BIT(0)
+
 static void
 __active_retire(struct i915_active *ref)
 {
struct active_node *it, *n;
struct rb_root root;
bool retire = false;
+   unsigned long bits;
 
+   ref = ptr_unpack_bits(ref, , 2);
lockdep_assert_held(>mutex);
 
/* return the unused nodes to our slabcache -- flushing the allocator */
@@ -138,7 +142,8 @@ __active_retire(struct i915_active *ref)
retire = true;
}
 
-   mutex_unlock(>mutex);
+   if (!(bits & I915_ACTIVE_RETIRE_NOLOCK))
+   mutex_unlock(>mutex);
if (!retire)
return;
 
@@ -155,13 +160,18 @@ __active_retire(struct i915_active *ref)
 static void
 active_retire(struct i915_active *ref)
 {
+   struct i915_active *ref_packed = ref;
+   unsigned long bits;
+
+   ref = ptr_unpack_bits(ref, , 2);
GEM_BUG_ON(!atomic_read(>count));
if (atomic_add_unless(>count, -1, 1))
return;
 
/* One active may be flushed from inside the acquire of another */
-   mutex_lock_nested(>mutex, SINGLE_DEPTH_NESTING);
-   __active_retire(ref);
+   if (!(bits & I915_ACTIVE_RETIRE_NOLOCK))
+   mutex_lock_nested(>mutex, SINGLE_DEPTH_NESTING);
+   __active_retire(ref_packed);
 }
 
 static void
@@ -170,6 +180,14 @@ node_retire(struct i915_active_request *
active_retire(node_from_active(base)->ref);
 }
 
+static void
+node_retire_nolock(struct i915_active_request *base, struct i915_request *rq)
+{
+   struct i915_active *ref = node_from_active(base)->ref;
+
+   active_retire(ptr_pack_bits(ref, I915_ACTIVE_RETIRE_NOLOCK, 2));
+}
+
 static struct i915_active_request *
 active_instance(struct i915_active *ref, struct intel_timeline *tl)
 {
@@ -421,7 +439,8 @@ int i915_active_wait(struct i915_active
break;
}
 
-   err = i915_active_request_retire(>base, BKL(ref));
+   err = i915_active_request_retire(>base, BKL(ref),
+node_retire_nolock);
if (err)
break;
}
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -309,7 +309,7 @@ i915_active_request_isset(const struct i
  */
 static inline int __must_check
 i915_active_request_retire(struct i915_active_request *active,
-