[PATCH] dma-buf/fence-array: enable_signaling from wq
On Thu, Nov 03, 2016 at 07:34:02PM -0400, Rob Clark wrote: > On Thu, Nov 3, 2016 at 5:41 PM, Chris Wilson > wrote: > > static bool dma_fence_array_enable_signaling(struct dma_fence *fence) > > { > > struct dma_fence_array *array = to_dma_fence_array(fence); > > int num_pending = atomic_read(>num_pending); > > int i; > > > > for (i = 0; i < array->num_fences; i++) > > if (is_signaled(array->fences[i]) && !--num_pending) { > > atomic_set(>num_pending, 0); > > return false; > > } > > > > dma_fence_get(>base); > > queue_work(system_unbound_wq, >enable_signaling_worker); > > } > > hmm, I guess just to try to avoid the wq? Yeah, not all fences are capable of reporting the current status, and some others may only report signaled() after enable_signaling, but for i915/nouveau even msm can report the current status without the extra step. For those it seems worth skipping the wq. -Chris -- Chris Wilson, Intel Open Source Technology Centre
[PATCH] dma-buf/fence-array: enable_signaling from wq
On Thu, Nov 03, 2016 at 04:31:14PM -0400, Rob Clark wrote: > Currently with fence-array, we have a potential deadlock situation. If we > fence_add_callback() on an array-fence, the array-fence's lock is aquired > first, and in it's ->enable_signaling() callback, it will install cb's on > it's array-member fences, so the array-member's lock is acquired second. > > But in the signal path, the array-member's lock is acquired first, and the > array-fence's lock acquired second. Could mention that this is only an issue if the same fence->lock appeared more than once in the array. Which is possible. > diff --git a/drivers/dma-buf/dma-fence-array.c > b/drivers/dma-buf/dma-fence-array.c > index 67eb7c8..274bbb5 100644 > --- a/drivers/dma-buf/dma-fence-array.c > +++ b/drivers/dma-buf/dma-fence-array.c > @@ -43,9 +43,10 @@ static void dma_fence_array_cb_func(struct dma_fence *f, > dma_fence_put(>base); > } > > -static bool dma_fence_array_enable_signaling(struct dma_fence *fence) > +static void enable_signaling_worker(struct work_struct *w) > { > - struct dma_fence_array *array = to_dma_fence_array(fence); > + struct dma_fence_array *array = > + container_of(w, struct dma_fence_array, > enable_signaling_worker); > struct dma_fence_array_cb *cb = (void *)([1]); > unsigned i; > > @@ -63,11 +64,18 @@ static bool dma_fence_array_enable_signaling(struct > dma_fence *fence) > if (dma_fence_add_callback(array->fences[i], [i].cb, > dma_fence_array_cb_func)) { > dma_fence_put(>base); > - if (atomic_dec_and_test(>num_pending)) > - return false; > + if (atomic_dec_and_test(>num_pending)) { > + dma_fence_signal(>base); > + return; > + } > } > } > +} > > +static bool dma_fence_array_enable_signaling(struct dma_fence *fence) > +{ > + struct dma_fence_array *array = to_dma_fence_array(fence); > + queue_work(system_unbound_wq, >enable_signaling_worker); I think you need a dma_fence_get(fence) here with a corresponding put in the worker. Sadly I still can't poke a hole in the lockdep warning. What I might suggest is that we try static bool is_signaled(struct dma_fence *fence) { if (test_bit(SIGNALED_BIT, >flags)) return true; return fence->ops->signaled && fence->ops->signaled(fence); } static bool dma_fence_array_enable_signaling(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); int num_pending = atomic_read(>num_pending); int i; for (i = 0; i < array->num_fences; i++) if (is_signaled(array->fences[i]) && !--num_pending) { atomic_set(>num_pending, 0); return false; } dma_fence_get(>base); queue_work(system_unbound_wq, >enable_signaling_worker); } -Chris -- Chris Wilson, Intel Open Source Technology Centre
[PATCH] dma-buf/fence-array: enable_signaling from wq
On Thu, Nov 3, 2016 at 5:41 PM, Chris Wilson wrote: > On Thu, Nov 03, 2016 at 04:31:14PM -0400, Rob Clark wrote: >> Currently with fence-array, we have a potential deadlock situation. If we >> fence_add_callback() on an array-fence, the array-fence's lock is aquired >> first, and in it's ->enable_signaling() callback, it will install cb's on >> it's array-member fences, so the array-member's lock is acquired second. >> >> But in the signal path, the array-member's lock is acquired first, and the >> array-fence's lock acquired second. > > Could mention that this is only an issue if the same fence->lock > appeared more than once in the array. Which is possible. hmm, lockdep will care more about lock classes than lock instances.. >> diff --git a/drivers/dma-buf/dma-fence-array.c >> b/drivers/dma-buf/dma-fence-array.c >> index 67eb7c8..274bbb5 100644 >> --- a/drivers/dma-buf/dma-fence-array.c >> +++ b/drivers/dma-buf/dma-fence-array.c >> @@ -43,9 +43,10 @@ static void dma_fence_array_cb_func(struct dma_fence *f, >> dma_fence_put(>base); >> } >> >> -static bool dma_fence_array_enable_signaling(struct dma_fence *fence) >> +static void enable_signaling_worker(struct work_struct *w) >> { >> - struct dma_fence_array *array = to_dma_fence_array(fence); >> + struct dma_fence_array *array = >> + container_of(w, struct dma_fence_array, >> enable_signaling_worker); >> struct dma_fence_array_cb *cb = (void *)([1]); >> unsigned i; >> >> @@ -63,11 +64,18 @@ static bool dma_fence_array_enable_signaling(struct >> dma_fence *fence) >> if (dma_fence_add_callback(array->fences[i], [i].cb, >> dma_fence_array_cb_func)) { >> dma_fence_put(>base); >> - if (atomic_dec_and_test(>num_pending)) >> - return false; >> + if (atomic_dec_and_test(>num_pending)) { >> + dma_fence_signal(>base); >> + return; >> + } >> } >> } >> +} >> >> +static bool dma_fence_array_enable_signaling(struct dma_fence *fence) >> +{ >> + struct dma_fence_array *array = to_dma_fence_array(fence); >> + queue_work(system_unbound_wq, >enable_signaling_worker); > > I think you need a dma_fence_get(fence) here with a corresponding put in > the worker. Sadly I still can't poke a hole in the lockdep warning. oh, right, extra get/put makes sense.. fwiw it should be easy enough to trigger with some code that merges a sw-sync fence w/ other fences.. > What I might suggest is that we try > > static bool is_signaled(struct dma_fence *fence) > { > if (test_bit(SIGNALED_BIT, >flags)) > return true; > > return fence->ops->signaled && fence->ops->signaled(fence); > } > > static bool dma_fence_array_enable_signaling(struct dma_fence *fence) > { > struct dma_fence_array *array = to_dma_fence_array(fence); > int num_pending = atomic_read(>num_pending); > int i; > > for (i = 0; i < array->num_fences; i++) > if (is_signaled(array->fences[i]) && !--num_pending) { > atomic_set(>num_pending, 0); > return false; > } > > dma_fence_get(>base); > queue_work(system_unbound_wq, >enable_signaling_worker); > } hmm, I guess just to try to avoid the wq? BR, -R > -Chris > > -- > Chris Wilson, Intel Open Source Technology Centre
[PATCH] dma-buf/fence-array: enable_signaling from wq
Currently with fence-array, we have a potential deadlock situation. If we fence_add_callback() on an array-fence, the array-fence's lock is aquired first, and in it's ->enable_signaling() callback, it will install cb's on it's array-member fences, so the array-member's lock is acquired second. But in the signal path, the array-member's lock is acquired first, and the array-fence's lock acquired second. To solve that, punt adding the callbacks on the array member fences to a worker. lockdep splat: == [ INFO: possible circular locking dependency detected ] 4.7.0-rc7+ #489 Not tainted --- surfaceflinger/2034 is trying to acquire lock: (&(>lock)->rlock){..}, at: [] fence_signal+0x5c/0xf8 but task is already holding lock: (&(>child_list_lock)->rlock){..}, at: [] sw_sync_ioctl+0x228/0x3b0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&(>child_list_lock)->rlock){..}: [] __lock_acquire+0x173c/0x18d8 [] lock_acquire+0x4c/0x68 [] _raw_spin_lock_irqsave+0x54/0x70 [] fence_add_callback+0x3c/0x100 [] fence_array_enable_signaling+0x80/0x170 [] fence_add_callback+0xb8/0x100 [] sync_file_poll+0xd4/0xf0 [] do_sys_poll+0x220/0x438 [] SyS_ppoll+0x1b0/0x1d8 [] el0_svc_naked+0x24/0x28 -> #0 (&(>lock)->rlock){..}: [] print_circular_bug+0x80/0x2e0 [] __lock_acquire+0x17c4/0x18d8 [] lock_acquire+0x4c/0x68 [] _raw_spin_lock_irqsave+0x54/0x70 [] fence_signal+0x5c/0xf8 [] fence_array_cb_func+0x78/0x88 [] fence_signal_locked+0x80/0xe0 [] sw_sync_ioctl+0x2f8/0x3b0 [] do_vfs_ioctl+0xa4/0x790 [] SyS_ioctl+0x8c/0xa0 [] el0_svc_naked+0x24/0x28 other info that might help us debug this: Possible unsafe locking scenario: CPU0CPU1 lock(&(>child_list_lock)->rlock); lock(&(>lock)->rlock); lock(&(>child_list_lock)->rlock); lock(&(>lock)->rlock); *** DEADLOCK *** 1 lock held by surfaceflinger/2034: #0: (&(>child_list_lock)->rlock){..}, at: [] sw_sync_ioctl+0x228/0x3b0 Signed-off-by: Rob Clark --- drivers/dma-buf/dma-fence-array.c | 18 ++ include/linux/dma-fence-array.h | 4 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 67eb7c8..274bbb5 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -43,9 +43,10 @@ static void dma_fence_array_cb_func(struct dma_fence *f, dma_fence_put(>base); } -static bool dma_fence_array_enable_signaling(struct dma_fence *fence) +static void enable_signaling_worker(struct work_struct *w) { - struct dma_fence_array *array = to_dma_fence_array(fence); + struct dma_fence_array *array = + container_of(w, struct dma_fence_array, enable_signaling_worker); struct dma_fence_array_cb *cb = (void *)([1]); unsigned i; @@ -63,11 +64,18 @@ static bool dma_fence_array_enable_signaling(struct dma_fence *fence) if (dma_fence_add_callback(array->fences[i], [i].cb, dma_fence_array_cb_func)) { dma_fence_put(>base); - if (atomic_dec_and_test(>num_pending)) - return false; + if (atomic_dec_and_test(>num_pending)) { + dma_fence_signal(>base); + return; + } } } +} +static bool dma_fence_array_enable_signaling(struct dma_fence *fence) +{ + struct dma_fence_array *array = to_dma_fence_array(fence); + queue_work(system_unbound_wq, >enable_signaling_worker); return true; } @@ -141,6 +149,8 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, atomic_set(>num_pending, signal_on_any ? 1 : num_fences); array->fences = fences; + INIT_WORK(>enable_signaling_worker, enable_signaling_worker); + return array; } EXPORT_SYMBOL(dma_fence_array_create); diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 5900945..f48e8f4 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -35,6 +35,8 @@ struct dma_fence_array_cb { /** * struct dma_fence_array - fence to represent an array of fences * @base: fence base class + * @enable_signaling_worker: _struct for deferring enable_signaling + *to context not holding fence->lock * @lock: spinlock for fence handling * @num_fences: number of fences in the array * @num_pending: fences in the array still pending @@ -43,6 +45,8 @@ struct