[PATCH] dma-buf/fence-array: enable_signaling from wq

2016-11-04 Thread Chris Wilson
On Thu, Nov 03, 2016 at 07:34:02PM -0400, Rob Clark wrote:
> On Thu, Nov 3, 2016 at 5:41 PM, Chris Wilson  
> wrote:
> > static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
> > {
> > struct dma_fence_array *array = to_dma_fence_array(fence);
> > int num_pending = atomic_read(>num_pending);
> > int i;
> >
> > for (i = 0; i < array->num_fences; i++)
> > if (is_signaled(array->fences[i]) && !--num_pending) {
> > atomic_set(>num_pending, 0);
> > return false;
> > }
> >
> > dma_fence_get(>base);
> > queue_work(system_unbound_wq, >enable_signaling_worker);
> > }
> 
> hmm, I guess just to try to avoid the wq?

Yeah, not all fences are capable of reporting the current status, and
some others may only report signaled() after enable_signaling, but for
i915/nouveau even msm can report the current status without the extra
step. For those it seems worth skipping the wq.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre


[PATCH] dma-buf/fence-array: enable_signaling from wq

2016-11-03 Thread Chris Wilson
On Thu, Nov 03, 2016 at 04:31:14PM -0400, Rob Clark wrote:
> Currently with fence-array, we have a potential deadlock situation.  If we
> fence_add_callback() on an array-fence, the array-fence's lock is aquired
> first, and in it's ->enable_signaling() callback, it will install cb's on
> it's array-member fences, so the array-member's lock is acquired second.
> 
> But in the signal path, the array-member's lock is acquired first, and the
> array-fence's lock acquired second.

Could mention that this is only an issue if the same fence->lock
appeared more than once in the array. Which is possible.

> diff --git a/drivers/dma-buf/dma-fence-array.c 
> b/drivers/dma-buf/dma-fence-array.c
> index 67eb7c8..274bbb5 100644
> --- a/drivers/dma-buf/dma-fence-array.c
> +++ b/drivers/dma-buf/dma-fence-array.c
> @@ -43,9 +43,10 @@ static void dma_fence_array_cb_func(struct dma_fence *f,
>   dma_fence_put(>base);
>  }
>  
> -static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
> +static void enable_signaling_worker(struct work_struct *w)
>  {
> - struct dma_fence_array *array = to_dma_fence_array(fence);
> + struct dma_fence_array *array =
> + container_of(w, struct dma_fence_array, 
> enable_signaling_worker);
>   struct dma_fence_array_cb *cb = (void *)([1]);
>   unsigned i;
>  
> @@ -63,11 +64,18 @@ static bool dma_fence_array_enable_signaling(struct 
> dma_fence *fence)
>   if (dma_fence_add_callback(array->fences[i], [i].cb,
>  dma_fence_array_cb_func)) {
>   dma_fence_put(>base);
> - if (atomic_dec_and_test(>num_pending))
> - return false;
> + if (atomic_dec_and_test(>num_pending)) {
> + dma_fence_signal(>base);
> + return;
> + }
>   }
>   }
> +}
>  
> +static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
> +{
> + struct dma_fence_array *array = to_dma_fence_array(fence);
> + queue_work(system_unbound_wq, >enable_signaling_worker);

I think you need a dma_fence_get(fence) here with a corresponding put in
the worker. Sadly I still can't poke a hole in the lockdep warning.

What I might suggest is that we try

static bool is_signaled(struct dma_fence *fence)
{
if (test_bit(SIGNALED_BIT, >flags))
return true;

return fence->ops->signaled && fence->ops->signaled(fence);
}

static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
{
struct dma_fence_array *array = to_dma_fence_array(fence);
int num_pending = atomic_read(>num_pending);
int i;

for (i = 0; i < array->num_fences; i++)
if (is_signaled(array->fences[i]) && !--num_pending) {
atomic_set(>num_pending, 0);
return false;
}

dma_fence_get(>base);
queue_work(system_unbound_wq, >enable_signaling_worker);
}
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre


[PATCH] dma-buf/fence-array: enable_signaling from wq

2016-11-03 Thread Rob Clark
On Thu, Nov 3, 2016 at 5:41 PM, Chris Wilson  
wrote:
> On Thu, Nov 03, 2016 at 04:31:14PM -0400, Rob Clark wrote:
>> Currently with fence-array, we have a potential deadlock situation.  If we
>> fence_add_callback() on an array-fence, the array-fence's lock is aquired
>> first, and in it's ->enable_signaling() callback, it will install cb's on
>> it's array-member fences, so the array-member's lock is acquired second.
>>
>> But in the signal path, the array-member's lock is acquired first, and the
>> array-fence's lock acquired second.
>
> Could mention that this is only an issue if the same fence->lock
> appeared more than once in the array. Which is possible.

hmm, lockdep will care more about lock classes than lock instances..

>> diff --git a/drivers/dma-buf/dma-fence-array.c 
>> b/drivers/dma-buf/dma-fence-array.c
>> index 67eb7c8..274bbb5 100644
>> --- a/drivers/dma-buf/dma-fence-array.c
>> +++ b/drivers/dma-buf/dma-fence-array.c
>> @@ -43,9 +43,10 @@ static void dma_fence_array_cb_func(struct dma_fence *f,
>>   dma_fence_put(>base);
>>  }
>>
>> -static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
>> +static void enable_signaling_worker(struct work_struct *w)
>>  {
>> - struct dma_fence_array *array = to_dma_fence_array(fence);
>> + struct dma_fence_array *array =
>> + container_of(w, struct dma_fence_array, 
>> enable_signaling_worker);
>>   struct dma_fence_array_cb *cb = (void *)([1]);
>>   unsigned i;
>>
>> @@ -63,11 +64,18 @@ static bool dma_fence_array_enable_signaling(struct 
>> dma_fence *fence)
>>   if (dma_fence_add_callback(array->fences[i], [i].cb,
>>  dma_fence_array_cb_func)) {
>>   dma_fence_put(>base);
>> - if (atomic_dec_and_test(>num_pending))
>> - return false;
>> + if (atomic_dec_and_test(>num_pending)) {
>> + dma_fence_signal(>base);
>> + return;
>> + }
>>   }
>>   }
>> +}
>>
>> +static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
>> +{
>> + struct dma_fence_array *array = to_dma_fence_array(fence);
>> + queue_work(system_unbound_wq, >enable_signaling_worker);
>
> I think you need a dma_fence_get(fence) here with a corresponding put in
> the worker. Sadly I still can't poke a hole in the lockdep warning.

oh, right, extra get/put makes sense.. fwiw it should be easy enough
to trigger with some code that merges a sw-sync fence w/ other
fences..

> What I might suggest is that we try
>
> static bool is_signaled(struct dma_fence *fence)
> {
> if (test_bit(SIGNALED_BIT, >flags))
> return true;
>
> return fence->ops->signaled && fence->ops->signaled(fence);
> }
>
> static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
> {
> struct dma_fence_array *array = to_dma_fence_array(fence);
> int num_pending = atomic_read(>num_pending);
> int i;
>
> for (i = 0; i < array->num_fences; i++)
> if (is_signaled(array->fences[i]) && !--num_pending) {
> atomic_set(>num_pending, 0);
> return false;
> }
>
> dma_fence_get(>base);
> queue_work(system_unbound_wq, >enable_signaling_worker);
> }

hmm, I guess just to try to avoid the wq?

BR,
-R

> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre


[PATCH] dma-buf/fence-array: enable_signaling from wq

2016-11-03 Thread Rob Clark
Currently with fence-array, we have a potential deadlock situation.  If we
fence_add_callback() on an array-fence, the array-fence's lock is aquired
first, and in it's ->enable_signaling() callback, it will install cb's on
it's array-member fences, so the array-member's lock is acquired second.

But in the signal path, the array-member's lock is acquired first, and the
array-fence's lock acquired second.

To solve that, punt adding the callbacks on the array member fences to a
worker.

lockdep splat:

 ==
[ INFO: possible circular locking dependency detected ]
4.7.0-rc7+ #489 Not tainted
---
surfaceflinger/2034 is trying to acquire lock:
 (&(>lock)->rlock){..}, at: [] 
fence_signal+0x5c/0xf8

but task is already holding lock:
 (&(>child_list_lock)->rlock){..}, at: [] 
sw_sync_ioctl+0x228/0x3b0

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&(>child_list_lock)->rlock){..}:
   [] __lock_acquire+0x173c/0x18d8
   [] lock_acquire+0x4c/0x68
   [] _raw_spin_lock_irqsave+0x54/0x70
   [] fence_add_callback+0x3c/0x100
   [] fence_array_enable_signaling+0x80/0x170
   [] fence_add_callback+0xb8/0x100
   [] sync_file_poll+0xd4/0xf0
   [] do_sys_poll+0x220/0x438
   [] SyS_ppoll+0x1b0/0x1d8
   [] el0_svc_naked+0x24/0x28

-> #0 (&(>lock)->rlock){..}:
   [] print_circular_bug+0x80/0x2e0
   [] __lock_acquire+0x17c4/0x18d8
   [] lock_acquire+0x4c/0x68
   [] _raw_spin_lock_irqsave+0x54/0x70
   [] fence_signal+0x5c/0xf8
   [] fence_array_cb_func+0x78/0x88
   [] fence_signal_locked+0x80/0xe0
   [] sw_sync_ioctl+0x2f8/0x3b0
   [] do_vfs_ioctl+0xa4/0x790
   [] SyS_ioctl+0x8c/0xa0
   [] el0_svc_naked+0x24/0x28

other info that might help us debug this:

 Possible unsafe locking scenario:

   CPU0CPU1
   
  lock(&(>child_list_lock)->rlock);
   lock(&(>lock)->rlock);
   lock(&(>child_list_lock)->rlock);
  lock(&(>lock)->rlock);

 *** DEADLOCK ***

1 lock held by surfaceflinger/2034:
 #0:  (&(>child_list_lock)->rlock){..}, at: [] 
sw_sync_ioctl+0x228/0x3b0

Signed-off-by: Rob Clark 
---
 drivers/dma-buf/dma-fence-array.c | 18 ++
 include/linux/dma-fence-array.h   |  4 
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/dma-buf/dma-fence-array.c 
b/drivers/dma-buf/dma-fence-array.c
index 67eb7c8..274bbb5 100644
--- a/drivers/dma-buf/dma-fence-array.c
+++ b/drivers/dma-buf/dma-fence-array.c
@@ -43,9 +43,10 @@ static void dma_fence_array_cb_func(struct dma_fence *f,
dma_fence_put(>base);
 }

-static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
+static void enable_signaling_worker(struct work_struct *w)
 {
-   struct dma_fence_array *array = to_dma_fence_array(fence);
+   struct dma_fence_array *array =
+   container_of(w, struct dma_fence_array, 
enable_signaling_worker);
struct dma_fence_array_cb *cb = (void *)([1]);
unsigned i;

@@ -63,11 +64,18 @@ static bool dma_fence_array_enable_signaling(struct 
dma_fence *fence)
if (dma_fence_add_callback(array->fences[i], [i].cb,
   dma_fence_array_cb_func)) {
dma_fence_put(>base);
-   if (atomic_dec_and_test(>num_pending))
-   return false;
+   if (atomic_dec_and_test(>num_pending)) {
+   dma_fence_signal(>base);
+   return;
+   }
}
}
+}

+static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
+{
+   struct dma_fence_array *array = to_dma_fence_array(fence);
+   queue_work(system_unbound_wq, >enable_signaling_worker);
return true;
 }

@@ -141,6 +149,8 @@ struct dma_fence_array *dma_fence_array_create(int 
num_fences,
atomic_set(>num_pending, signal_on_any ? 1 : num_fences);
array->fences = fences;

+   INIT_WORK(>enable_signaling_worker, enable_signaling_worker);
+
return array;
 }
 EXPORT_SYMBOL(dma_fence_array_create);
diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h
index 5900945..f48e8f4 100644
--- a/include/linux/dma-fence-array.h
+++ b/include/linux/dma-fence-array.h
@@ -35,6 +35,8 @@ struct dma_fence_array_cb {
 /**
  * struct dma_fence_array - fence to represent an array of fences
  * @base: fence base class
+ * @enable_signaling_worker: _struct for deferring enable_signaling
+ *to context not holding fence->lock
  * @lock: spinlock for fence handling
  * @num_fences: number of fences in the array
  * @num_pending: fences in the array still pending
@@ -43,6 +45,8 @@ struct