Re: [PATCH 1/4] sync_file: add a mutex to protect fence and callback members. (v3)

2017-03-20 Thread Daniel Vetter
On Mon, Mar 20, 2017 at 08:16:36AM +, Chris Wilson wrote:
> On Mon, Mar 20, 2017 at 05:03:04PM +1000, Dave Airlie wrote:
> > From: Dave Airlie 
> > 
> > This patch allows the underlying fence in a sync_file to be changed
> > or set to NULL. This isn't currently required but for Vulkan
> > semaphores we need to be able to swap and reset the fence.
> > 
> > In order to faciliate this, it uses rcu to protect the fence,
> > along with a new mutex. The mutex also protects the callback.
> > It also checks for NULL when retrieving the rcu protected
> > fence in case it has been reset.
> > 
> > v1.1: fix the locking (Julia Lawall).
> > v2: use rcu try one
> > v3: fix poll to use proper rcu, fixup merge/fill ioctls
> > to not crash on NULL fence cases.
> > 
> > Signed-off-by: Dave Airlie 
> > ---
> > @@ -80,7 +87,9 @@ struct sync_file *sync_file_create(struct dma_fence 
> > *fence)
> > if (!sync_file)
> > return NULL;
> >  
> > -   sync_file->fence = dma_fence_get(fence);
> > +   dma_fence_get(fence);
> > +
> > +   RCU_INIT_POINTER(sync_file->fence, fence);
> >  
> > snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
> >  fence->ops->get_driver_name(fence),
> > @@ -124,13 +133,26 @@ struct dma_fence *sync_file_get_fence(int fd)
> > if (!sync_file)
> > return NULL;
> >  
> > -   fence = dma_fence_get(sync_file->fence);
> > +   if (!rcu_access_pointer(sync_file->fence))
> > +   return NULL;
> 
> Missed fput.
> 
> > +
> > +   rcu_read_lock();
> > +   fence = dma_fence_get_rcu_safe(_file->fence);
> > +   rcu_read_unlock();
> > +
> > fput(sync_file->file);
> >  
> > return fence;
> >  }
> >  EXPORT_SYMBOL(sync_file_get_fence);
> >  
> > @@ -204,10 +234,16 @@ static struct sync_file *sync_file_merge(const char 
> > *name, struct sync_file *a,
> > if (!sync_file)
> > return NULL;
> >  
> > +   mutex_lock(>lock);
> > +   mutex_lock(>lock);
> 
> This allows userspace to trigger lockdep (just merge the same pair of
> sync_files again in opposite order). if (b < a) swap(a, b); ?

Do we even need the look? 1) rcu-lookup the fences (which are invariant)
2) merge them 3) create new syncfile for them. I don't see a need for
taking locks here.

> 
> > a_fences = get_fences(a, _num_fences);
> > b_fences = get_fences(b, _num_fences);
> > -   if (a_num_fences > INT_MAX - b_num_fences)
> > -   return NULL;
> > +   if (!a_num_fences || !b_num_fences)
> > +   goto unlock;
> > +
> > +   if (a_num_fences > INT_MAX - b_num_fences) {
> > +   goto unlock;
> > +   }
> >  
> > num_fences = a_num_fences + b_num_fences;
> >  
> > @@ -281,10 +323,15 @@ static void sync_file_free(struct kref *kref)
> >  {
> > struct sync_file *sync_file = container_of(kref, struct sync_file,
> >  kref);
> > +   struct dma_fence *fence;
> > +
> 
> Somewhere, here?, it would be useful to add a comment that the rcu
> delayed free is provided by fput.
> 
> > +   fence = rcu_dereference_protected(sync_file->fence, 1);
> > +   if (fence) {
> > +   if (test_bit(POLL_ENABLED, >flags))
> > +   dma_fence_remove_callback(fence, _file->cb);
> > +   dma_fence_put(fence);
> > +   }
> >  
> > -   if (test_bit(POLL_ENABLED, _file->fence->flags))
> > -   dma_fence_remove_callback(sync_file->fence, _file->cb);
> > -   dma_fence_put(sync_file->fence);
> > kfree(sync_file);
> >  }
> >  
> > @@ -299,16 +346,25 @@ static int sync_file_release(struct inode *inode, 
> > struct file *file)
> >  static unsigned int sync_file_poll(struct file *file, poll_table *wait)
> >  {
> > struct sync_file *sync_file = file->private_data;
> > +   unsigned int ret_val = 0;
> > +   struct dma_fence *fence;
> >  
> > poll_wait(file, _file->wq, wait);
> >  
> > -   if (!test_and_set_bit(POLL_ENABLED, _file->fence->flags)) {
> > -   if (dma_fence_add_callback(sync_file->fence, _file->cb,
> > -  fence_check_cb_func) < 0)
> > -   wake_up_all(_file->wq);
> > +   mutex_lock(_file->lock);
> > +
> > +   fence = sync_file_get_fence_locked(sync_file);
> 
> Why do you need the locked version here and not just the rcu variant?

+1 :-) I think the lock should only be needed when you update the fence,
everywhere else we should be able to get away with rcu.

> > +   if (fence) {
> > +   if (!test_and_set_bit(POLL_ENABLED, >flags)) {
> > +   if (dma_fence_add_callback(fence, _file->cb,
> > +  fence_check_cb_func) < 0)
> > +   wake_up_all(_file->wq);
> > +   }
> > +   ret_val = dma_fence_is_signaled(fence) ? POLLIN : 0;
> > }
> > +   mutex_unlock(_file->lock);
> 
> So an empty sync_file is incomplete and blocks forever? Why? It's the
> opposite behaviour to e.g. 

Re: [PATCH 1/4] sync_file: add a mutex to protect fence and callback members. (v3)

2017-03-20 Thread Chris Wilson
On Mon, Mar 20, 2017 at 05:03:04PM +1000, Dave Airlie wrote:
> From: Dave Airlie 
> 
> This patch allows the underlying fence in a sync_file to be changed
> or set to NULL. This isn't currently required but for Vulkan
> semaphores we need to be able to swap and reset the fence.
> 
> In order to faciliate this, it uses rcu to protect the fence,
> along with a new mutex. The mutex also protects the callback.
> It also checks for NULL when retrieving the rcu protected
> fence in case it has been reset.
> 
> v1.1: fix the locking (Julia Lawall).
> v2: use rcu try one
> v3: fix poll to use proper rcu, fixup merge/fill ioctls
> to not crash on NULL fence cases.
> 
> Signed-off-by: Dave Airlie 
> ---
> @@ -124,13 +133,26 @@ struct dma_fence *sync_file_get_fence(int fd)
>   if (!sync_file)
>   return NULL;
>  
> - fence = dma_fence_get(sync_file->fence);
> + if (!rcu_access_pointer(sync_file->fence))
> + return NULL;
> +
> + rcu_read_lock();
> + fence = dma_fence_get_rcu_safe(_file->fence);
> + rcu_read_unlock();
> +
>   fput(sync_file->file);

So poll will wait until the fence is set before the sync_file is
signaled, but here we return NULL. At the moment this is interpretted by
the callers as an error (since we can't distinguish between the lookup
error and the empty sync_file). However, if it is empty we also want to
delay the dependent execution until the fence is set to match the poll
semantics.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/4] sync_file: add a mutex to protect fence and callback members. (v3)

2017-03-20 Thread Chris Wilson
On Mon, Mar 20, 2017 at 05:03:04PM +1000, Dave Airlie wrote:
> From: Dave Airlie 
> 
> This patch allows the underlying fence in a sync_file to be changed
> or set to NULL. This isn't currently required but for Vulkan
> semaphores we need to be able to swap and reset the fence.
> 
> In order to faciliate this, it uses rcu to protect the fence,
> along with a new mutex. The mutex also protects the callback.
> It also checks for NULL when retrieving the rcu protected
> fence in case it has been reset.
> 
> v1.1: fix the locking (Julia Lawall).
> v2: use rcu try one
> v3: fix poll to use proper rcu, fixup merge/fill ioctls
> to not crash on NULL fence cases.
> 
> Signed-off-by: Dave Airlie 
> ---
> @@ -80,7 +87,9 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
>   if (!sync_file)
>   return NULL;
>  
> - sync_file->fence = dma_fence_get(fence);
> + dma_fence_get(fence);
> +
> + RCU_INIT_POINTER(sync_file->fence, fence);
>  
>   snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
>fence->ops->get_driver_name(fence),
> @@ -124,13 +133,26 @@ struct dma_fence *sync_file_get_fence(int fd)
>   if (!sync_file)
>   return NULL;
>  
> - fence = dma_fence_get(sync_file->fence);
> + if (!rcu_access_pointer(sync_file->fence))
> + return NULL;

Missed fput.

> +
> + rcu_read_lock();
> + fence = dma_fence_get_rcu_safe(_file->fence);
> + rcu_read_unlock();
> +
>   fput(sync_file->file);
>  
>   return fence;
>  }
>  EXPORT_SYMBOL(sync_file_get_fence);
>  
> @@ -204,10 +234,16 @@ static struct sync_file *sync_file_merge(const char 
> *name, struct sync_file *a,
>   if (!sync_file)
>   return NULL;
>  
> + mutex_lock(>lock);
> + mutex_lock(>lock);

This allows userspace to trigger lockdep (just merge the same pair of
sync_files again in opposite order). if (b < a) swap(a, b); ?

>   a_fences = get_fences(a, _num_fences);
>   b_fences = get_fences(b, _num_fences);
> - if (a_num_fences > INT_MAX - b_num_fences)
> - return NULL;
> + if (!a_num_fences || !b_num_fences)
> + goto unlock;
> +
> + if (a_num_fences > INT_MAX - b_num_fences) {
> + goto unlock;
> + }
>  
>   num_fences = a_num_fences + b_num_fences;
>  
> @@ -281,10 +323,15 @@ static void sync_file_free(struct kref *kref)
>  {
>   struct sync_file *sync_file = container_of(kref, struct sync_file,
>kref);
> + struct dma_fence *fence;
> +

Somewhere, here?, it would be useful to add a comment that the rcu
delayed free is provided by fput.

> + fence = rcu_dereference_protected(sync_file->fence, 1);
> + if (fence) {
> + if (test_bit(POLL_ENABLED, >flags))
> + dma_fence_remove_callback(fence, _file->cb);
> + dma_fence_put(fence);
> + }
>  
> - if (test_bit(POLL_ENABLED, _file->fence->flags))
> - dma_fence_remove_callback(sync_file->fence, _file->cb);
> - dma_fence_put(sync_file->fence);
>   kfree(sync_file);
>  }
>  
> @@ -299,16 +346,25 @@ static int sync_file_release(struct inode *inode, 
> struct file *file)
>  static unsigned int sync_file_poll(struct file *file, poll_table *wait)
>  {
>   struct sync_file *sync_file = file->private_data;
> + unsigned int ret_val = 0;
> + struct dma_fence *fence;
>  
>   poll_wait(file, _file->wq, wait);
>  
> - if (!test_and_set_bit(POLL_ENABLED, _file->fence->flags)) {
> - if (dma_fence_add_callback(sync_file->fence, _file->cb,
> -fence_check_cb_func) < 0)
> - wake_up_all(_file->wq);
> + mutex_lock(_file->lock);
> +
> + fence = sync_file_get_fence_locked(sync_file);

Why do you need the locked version here and not just the rcu variant?

> + if (fence) {
> + if (!test_and_set_bit(POLL_ENABLED, >flags)) {
> + if (dma_fence_add_callback(fence, _file->cb,
> +fence_check_cb_func) < 0)
> + wake_up_all(_file->wq);
> + }
> + ret_val = dma_fence_is_signaled(fence) ? POLLIN : 0;
>   }
> + mutex_unlock(_file->lock);

So an empty sync_file is incomplete and blocks forever? Why? It's the
opposite behaviour to e.g. reservation_object so a quick explanation of
how that is used by VkSemaphore will cement the differences.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/4] sync_file: add a mutex to protect fence and callback members. (v3)

2017-03-20 Thread Dave Airlie
From: Dave Airlie 

This patch allows the underlying fence in a sync_file to be changed
or set to NULL. This isn't currently required but for Vulkan
semaphores we need to be able to swap and reset the fence.

In order to faciliate this, it uses rcu to protect the fence,
along with a new mutex. The mutex also protects the callback.
It also checks for NULL when retrieving the rcu protected
fence in case it has been reset.

v1.1: fix the locking (Julia Lawall).
v2: use rcu try one
v3: fix poll to use proper rcu, fixup merge/fill ioctls
to not crash on NULL fence cases.

Signed-off-by: Dave Airlie 
---
 drivers/dma-buf/sync_file.c | 112 +++-
 include/linux/sync_file.h   |   5 +-
 2 files changed, 94 insertions(+), 23 deletions(-)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 2321035..dcba931 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -28,6 +28,10 @@
 
 static const struct file_operations sync_file_fops;
 
+#define sync_file_held(obj) lockdep_is_held(&(obj)->lock)
+#define sync_file_assert_held(obj) \
+   lockdep_assert_held(&(obj)->lock)
+
 static struct sync_file *sync_file_alloc(void)
 {
struct sync_file *sync_file;
@@ -47,6 +51,9 @@ static struct sync_file *sync_file_alloc(void)
 
INIT_LIST_HEAD(_file->cb.node);
 
+   RCU_INIT_POINTER(sync_file->fence, NULL);
+
+   mutex_init(_file->lock);
return sync_file;
 
 err:
@@ -80,7 +87,9 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
if (!sync_file)
return NULL;
 
-   sync_file->fence = dma_fence_get(fence);
+   dma_fence_get(fence);
+
+   RCU_INIT_POINTER(sync_file->fence, fence);
 
snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
 fence->ops->get_driver_name(fence),
@@ -124,13 +133,26 @@ struct dma_fence *sync_file_get_fence(int fd)
if (!sync_file)
return NULL;
 
-   fence = dma_fence_get(sync_file->fence);
+   if (!rcu_access_pointer(sync_file->fence))
+   return NULL;
+
+   rcu_read_lock();
+   fence = dma_fence_get_rcu_safe(_file->fence);
+   rcu_read_unlock();
+
fput(sync_file->file);
 
return fence;
 }
 EXPORT_SYMBOL(sync_file_get_fence);
 
+static inline struct dma_fence *
+sync_file_get_fence_locked(struct sync_file *sync_file)
+{
+   return rcu_dereference_protected(sync_file->fence,
+sync_file_held(sync_file));
+}
+
 static int sync_file_set_fence(struct sync_file *sync_file,
   struct dma_fence **fences, int num_fences)
 {
@@ -143,7 +165,7 @@ static int sync_file_set_fence(struct sync_file *sync_file,
 * we own the reference of the dma_fence_array creation.
 */
if (num_fences == 1) {
-   sync_file->fence = fences[0];
+   RCU_INIT_POINTER(sync_file->fence, fences[0]);
kfree(fences);
} else {
array = dma_fence_array_create(num_fences, fences,
@@ -152,17 +174,25 @@ static int sync_file_set_fence(struct sync_file 
*sync_file,
if (!array)
return -ENOMEM;
 
-   sync_file->fence = >base;
+   RCU_INIT_POINTER(sync_file->fence, >base);
}
 
return 0;
 }
 
+/* must be called with sync_file lock taken */
 static struct dma_fence **get_fences(struct sync_file *sync_file,
 int *num_fences)
 {
-   if (dma_fence_is_array(sync_file->fence)) {
-   struct dma_fence_array *array = 
to_dma_fence_array(sync_file->fence);
+   struct dma_fence *fence = sync_file_get_fence_locked(sync_file);
+
+   if (!fence) {
+   *num_fences = 0;
+   return NULL;
+   }
+
+   if (dma_fence_is_array(fence)) {
+   struct dma_fence_array *array = to_dma_fence_array(fence);
 
*num_fences = array->num_fences;
return array->fences;
@@ -204,10 +234,16 @@ static struct sync_file *sync_file_merge(const char 
*name, struct sync_file *a,
if (!sync_file)
return NULL;
 
+   mutex_lock(>lock);
+   mutex_lock(>lock);
a_fences = get_fences(a, _num_fences);
b_fences = get_fences(b, _num_fences);
-   if (a_num_fences > INT_MAX - b_num_fences)
-   return NULL;
+   if (!a_num_fences || !b_num_fences)
+   goto unlock;
+
+   if (a_num_fences > INT_MAX - b_num_fences) {
+   goto unlock;
+   }
 
num_fences = a_num_fences + b_num_fences;
 
@@ -268,11 +304,17 @@ static struct sync_file *sync_file_merge(const char 
*name, struct sync_file *a,
goto err;
}
 
+   mutex_unlock(>lock);
+   mutex_unlock(>lock);
+
strlcpy(sync_file->name, name, sizeof(sync_file->name));