Re: [PATCH] drm/amdgpu: Optimize mutex usage (v3)
Hi Dave, Is it easy to reproduce this backtrace? If it is easy, would you give this patch a try? From 3c83e1f05352f4795ddc2a8c9acca65b4b58ded9 Mon Sep 17 00:00:00 2001 From: Alex XieDate: Wed, 19 Jul 2017 21:51:56 -0400 Subject: [PATCH] drm/amdgpu: Fix a warning on suspicious RCU usage From Dave Airlie [ 141.965723] = [ 141.965724] WARNING: suspicious RCU usage [ 141.965726] 4.12.0-rc7 #221 Not tainted [ 141.965727] - [ 141.965728] /home/airlied/devel/kernel/linux-2.6/include/linux/rcupdate.h:531 Illegal context switch in RCU read-side critical section! [ 141.965730] other info that might help us debug this: [ 141.965731] rcu_scheduler_active = 2, debug_locks = 0 [ 141.965732] 1 lock held by amdgpu_cs:0/1332: [ 141.965733] #0: (rcu_read_lock){..}, at: [] amdgpu_bo_list_get+0x0/0x109 [amdgpu] [ 141.965774] stack backtrace: [ 141.965776] CPU: 6 PID: 1332 Comm: amdgpu_cs:0 Not tainted 4.12.0-rc7 #221 [ 141.965777] Hardware name: To be filled by O.E.M. To be filled by O.E.M./M5A97 R2.0, BIOS 2603 06/26/2015 [ 141.965778] Call Trace: [ 141.965782] dump_stack+0x68/0x92 [ 141.965785] lockdep_rcu_suspicious+0xf7/0x100 [ 141.965788] ___might_sleep+0x56/0x1fc [ 141.965790] __might_sleep+0x68/0x6f [ 141.965793] __mutex_lock+0x4e/0x7b5 [ 141.965817] ? amdgpu_bo_list_get+0xa4/0x109 [amdgpu] [ 141.965820] ? lock_acquire+0x125/0x1b9 [ 141.965844] ? amdgpu_bo_list_set+0x464/0x464 [amdgpu] [ 141.965846] mutex_lock_nested+0x16/0x18 [ 141.965848] ? mutex_lock_nested+0x16/0x18 [ 141.965872] amdgpu_bo_list_get+0xa4/0x109 [amdgpu] [ 141.965895] amdgpu_cs_ioctl+0x4a0/0x17dd [amdgpu] [ 141.965898] ? radix_tree_node_alloc.constprop.11+0x77/0xab [ 141.965916] drm_ioctl+0x264/0x393 [drm] [ 141.965939] ? amdgpu_cs_find_mapping+0x83/0x83 [amdgpu] [ 141.965942] ? trace_hardirqs_on_caller+0x16a/0x186 Signed-off-by: Alex Xie --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index dc87962..565ca90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -198,12 +198,15 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id) result = idr_find(>bo_list_handles, id); if (result) { -if (kref_get_unless_zero(>refcount)) +if (kref_get_unless_zero(>refcount)) { +rcu_read_unlock(); mutex_lock(>lock); -else +} +else { +rcu_read_unlock(); result = NULL; +} } -rcu_read_unlock(); return result; } -- 2.7.4 Thanks, Alex Bin Xie On 2017-07-19 09:21 PM, Dave Airlie wrote: On 16 June 2017 at 23:08, Alex Xie wrote: In original function amdgpu_bo_list_get, the waiting for result->lock can be quite long while mutex bo_list_lock was holding. It can make other tasks waiting for bo_list_lock for long period. Secondly, this patch allows several tasks(readers of idr) to proceed at the same time. v2: use rcu and kref (Dave Airlie and Christian König) v3: update v1 commit message (Michel Dänzer) Got this, I'm guessing due to this patch. Dave. [ 141.965723] = [ 141.965724] WARNING: suspicious RCU usage [ 141.965726] 4.12.0-rc7 #221 Not tainted [ 141.965727] - [ 141.965728] /home/airlied/devel/kernel/linux-2.6/include/linux/rcupdate.h:531 Illegal context switch in RCU read-side critical section! [ 141.965730] other info that might help us debug this: [ 141.965731] rcu_scheduler_active = 2, debug_locks = 0 [ 141.965732] 1 lock held by amdgpu_cs:0/1332: [ 141.965733] #0: (rcu_read_lock){..}, at: [] amdgpu_bo_list_get+0x0/0x109 [amdgpu] [ 141.965774] stack backtrace: [ 141.965776] CPU: 6 PID: 1332 Comm: amdgpu_cs:0 Not tainted 4.12.0-rc7 #221 [ 141.965777] Hardware name: To be filled by O.E.M. To be filled by O.E.M./M5A97 R2.0, BIOS 2603 06/26/2015 [ 141.965778] Call Trace: [ 141.965782] dump_stack+0x68/0x92 [ 141.965785] lockdep_rcu_suspicious+0xf7/0x100 [ 141.965788] ___might_sleep+0x56/0x1fc [ 141.965790] __might_sleep+0x68/0x6f [ 141.965793] __mutex_lock+0x4e/0x7b5 [ 141.965817] ? amdgpu_bo_list_get+0xa4/0x109 [amdgpu] [ 141.965820] ? lock_acquire+0x125/0x1b9 [ 141.965844] ? amdgpu_bo_list_set+0x464/0x464 [amdgpu] [ 141.965846] mutex_lock_nested+0x16/0x18 [ 141.965848] ? mutex_lock_nested+0x16/0x18 [ 141.965872] amdgpu_bo_list_get+0xa4/0x109 [amdgpu] [ 141.965895] amdgpu_cs_ioctl+0x4a0/0x17dd [amdgpu] [ 141.965898] ? radix_tree_node_alloc.constprop.11+0x77/0xab [ 141.965916] drm_ioctl+0x264/0x393 [drm] [ 141.965939] ?
Re: [PATCH] drm/amdgpu: Optimize mutex usage (v3)
On 16 June 2017 at 23:08, Alex Xiewrote: > In original function amdgpu_bo_list_get, the waiting > for result->lock can be quite long while mutex > bo_list_lock was holding. It can make other tasks > waiting for bo_list_lock for long period. > > Secondly, this patch allows several tasks(readers of idr) > to proceed at the same time. > > v2: use rcu and kref (Dave Airlie and Christian König) > v3: update v1 commit message (Michel Dänzer) Got this, I'm guessing due to this patch. Dave. [ 141.965723] = [ 141.965724] WARNING: suspicious RCU usage [ 141.965726] 4.12.0-rc7 #221 Not tainted [ 141.965727] - [ 141.965728] /home/airlied/devel/kernel/linux-2.6/include/linux/rcupdate.h:531 Illegal context switch in RCU read-side critical section! [ 141.965730] other info that might help us debug this: [ 141.965731] rcu_scheduler_active = 2, debug_locks = 0 [ 141.965732] 1 lock held by amdgpu_cs:0/1332: [ 141.965733] #0: (rcu_read_lock){..}, at: [] amdgpu_bo_list_get+0x0/0x109 [amdgpu] [ 141.965774] stack backtrace: [ 141.965776] CPU: 6 PID: 1332 Comm: amdgpu_cs:0 Not tainted 4.12.0-rc7 #221 [ 141.965777] Hardware name: To be filled by O.E.M. To be filled by O.E.M./M5A97 R2.0, BIOS 2603 06/26/2015 [ 141.965778] Call Trace: [ 141.965782] dump_stack+0x68/0x92 [ 141.965785] lockdep_rcu_suspicious+0xf7/0x100 [ 141.965788] ___might_sleep+0x56/0x1fc [ 141.965790] __might_sleep+0x68/0x6f [ 141.965793] __mutex_lock+0x4e/0x7b5 [ 141.965817] ? amdgpu_bo_list_get+0xa4/0x109 [amdgpu] [ 141.965820] ? lock_acquire+0x125/0x1b9 [ 141.965844] ? amdgpu_bo_list_set+0x464/0x464 [amdgpu] [ 141.965846] mutex_lock_nested+0x16/0x18 [ 141.965848] ? mutex_lock_nested+0x16/0x18 [ 141.965872] amdgpu_bo_list_get+0xa4/0x109 [amdgpu] [ 141.965895] amdgpu_cs_ioctl+0x4a0/0x17dd [amdgpu] [ 141.965898] ? radix_tree_node_alloc.constprop.11+0x77/0xab [ 141.965916] drm_ioctl+0x264/0x393 [drm] [ 141.965939] ? amdgpu_cs_find_mapping+0x83/0x83 [amdgpu] [ 141.965942] ? trace_hardirqs_on_caller+0x16a/0x186 > > Signed-off-by: Alex Xie > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 40 > - > 2 files changed, 30 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 063fc73..e9b3981 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -871,6 +871,8 @@ struct amdgpu_fpriv { > > struct amdgpu_bo_list { > struct mutex lock; > + struct rcu_head rhead; > + struct kref refcount; > struct amdgpu_bo *gds_obj; > struct amdgpu_bo *gws_obj; > struct amdgpu_bo *oa_obj; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > index 5af956f..efa6903 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > @@ -41,6 +41,20 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, > struct drm_amdgpu_bo_list_entry *info, > unsigned num_entries); > > +static void amdgpu_bo_list_release_rcu(struct kref *ref) > +{ > + unsigned i; > + struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list, > + refcount); > + > + for (i = 0; i < list->num_entries; ++i) > + amdgpu_bo_unref(>array[i].robj); > + > + mutex_destroy(>lock); > + drm_free_large(list->array); > + kfree_rcu(list, rhead); > +} > + > static int amdgpu_bo_list_create(struct amdgpu_device *adev, > struct drm_file *filp, > struct drm_amdgpu_bo_list_entry *info, > @@ -57,7 +71,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev, > > /* initialize bo list*/ > mutex_init(>lock); > - > + kref_init(>refcount); > r = amdgpu_bo_list_set(adev, filp, list, info, num_entries); > if (r) { > kfree(list); > @@ -83,14 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv > *fpriv, int id) > > mutex_lock(>bo_list_lock); > list = idr_remove(>bo_list_handles, id); > - if (list) { > - /* Another user may have a reference to this list still */ > - mutex_lock(>lock); > - mutex_unlock(>lock); > - amdgpu_bo_list_free(list); > - } > - > mutex_unlock(>bo_list_lock); > + if (list) > + kref_put(>refcount, amdgpu_bo_list_release_rcu); > } > > static int amdgpu_bo_list_set(struct amdgpu_device *adev, > @@ -185,11 +194,17 @@ amdgpu_bo_list_get(struct
[PATCH] drm/amdgpu: Optimize mutex usage (v3)
In original function amdgpu_bo_list_get, the waiting for result->lock can be quite long while mutex bo_list_lock was holding. It can make other tasks waiting for bo_list_lock for long period. Secondly, this patch allows several tasks(readers of idr) to proceed at the same time. v2: use rcu and kref (Dave Airlie and Christian König) v3: update v1 commit message (Michel Dänzer) Signed-off-by: Alex Xie--- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 40 - 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 063fc73..e9b3981 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -871,6 +871,8 @@ struct amdgpu_fpriv { struct amdgpu_bo_list { struct mutex lock; + struct rcu_head rhead; + struct kref refcount; struct amdgpu_bo *gds_obj; struct amdgpu_bo *gws_obj; struct amdgpu_bo *oa_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 5af956f..efa6903 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -41,6 +41,20 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, struct drm_amdgpu_bo_list_entry *info, unsigned num_entries); +static void amdgpu_bo_list_release_rcu(struct kref *ref) +{ + unsigned i; + struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list, + refcount); + + for (i = 0; i < list->num_entries; ++i) + amdgpu_bo_unref(>array[i].robj); + + mutex_destroy(>lock); + drm_free_large(list->array); + kfree_rcu(list, rhead); +} + static int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_bo_list_entry *info, @@ -57,7 +71,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev, /* initialize bo list*/ mutex_init(>lock); - + kref_init(>refcount); r = amdgpu_bo_list_set(adev, filp, list, info, num_entries); if (r) { kfree(list); @@ -83,14 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) mutex_lock(>bo_list_lock); list = idr_remove(>bo_list_handles, id); - if (list) { - /* Another user may have a reference to this list still */ - mutex_lock(>lock); - mutex_unlock(>lock); - amdgpu_bo_list_free(list); - } - mutex_unlock(>bo_list_lock); + if (list) + kref_put(>refcount, amdgpu_bo_list_release_rcu); } static int amdgpu_bo_list_set(struct amdgpu_device *adev, @@ -185,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id) { struct amdgpu_bo_list *result; - mutex_lock(>bo_list_lock); + rcu_read_lock(); result = idr_find(>bo_list_handles, id); - if (result) - mutex_lock(>lock); - mutex_unlock(>bo_list_lock); + + if (result) { + if (kref_get_unless_zero(>refcount)) + mutex_lock(>lock); + else + result = NULL; + } + rcu_read_unlock(); + return result; } @@ -227,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, void amdgpu_bo_list_put(struct amdgpu_bo_list *list) { mutex_unlock(>lock); + kref_put(>refcount, amdgpu_bo_list_release_rcu); } void amdgpu_bo_list_free(struct amdgpu_bo_list *list) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx