Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

2017-08-10 Thread Jason Ekstrand
On Thu, Aug 10, 2017 at 4:00 AM, Chris Wilson 
wrote:

> Quoting Jason Ekstrand (2017-08-10 00:53:00)
> > On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson 
> wrote:
> > However, this installs the proxy into syncobj->fence with the result
> > that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour
> > of drm_syncobj is then quite inconsistent, sometimes it will wait
> for a
> > future fence, sometimes it will report an error.
> >
> >
> > Yeah, that's not good.  I thought about a variety of solutions to try and
> > re-use more core dma_fence code.  Ultimately I chose the current one
> because it
> > takes a snapshot of the syncobjs and then, from that point forward, it's
> > consistent with its snapshot.  Nothing I was able to come up with based
> on core
> > dma_fence wait routines does that.
>
> So if we choose to keep the proxy local to the fence-array and not replace
> syncobj->fence, we can still reduce this to a plain dma-fence-array +
> wait.
>
> Pertinent details:
>
> +static void syncobj_notify(struct drm_syncobj *syncobj, struct dma_fence
> *fence)
> +{
> +   struct drm_syncobj_cb *cb, *cn;
> +   unsigned long flags;
> +
> +   /* This is just an opencoded waitqueue; FIXME! */
> +   spin_lock_irqsave(>lock, flags);
> +   list_for_each_entry_safe(cb, cn, >cb_list, link)
> +   cb->func(cb, fence);
> +   INIT_LIST_HEAD(>cb_list);
> +   spin_unlock_irqrestore(>lock, flags);
> +}
> +
>  /**
>   * drm_syncobj_replace_fence - replace fence in a sync object.
>   * @syncobj: Sync object to replace fence in
> @@ -89,7 +109,10 @@ void drm_syncobj_replace_fence(struct drm_syncobj
> *syncobj,
>
> if (fence)
> dma_fence_get(fence);
> +
> old_fence = xchg(>fence, fence);
> +   if (!old_fence && fence)
> +   syncobj_notify(syncobj, fence);
>
> dma_fence_put(old_fence);
>  }
> @@ -124,6 +147,9 @@ void drm_syncobj_free(struct kref *kref)
> struct drm_syncobj *syncobj = container_of(kref,
>struct drm_syncobj,
>refcount);
> +
> +   syncobj_notify(syncobj, NULL);
> +
> dma_fence_put(syncobj->fence);
> kfree(syncobj);
>  }
> @@ -140,6 +166,8 @@ static int drm_syncobj_create(struct drm_file
> *file_private,
> return -ENOMEM;
>
> kref_init(>refcount);
> +   spin_lock_init(>lock);
> +   INIT_LIST_HEAD(>cb_list);
>
> idr_preload(GFP_KERNEL);
> spin_lock(_private->syncobj_table_lock);
>
> struct future_fence {
> +   struct drm_syncobj_cb base;
> +   struct dma_fence **slot;
> +};
> +
> +static void future_fence_cb(struct drm_syncobj_cb *cb, struct dma_fence
> *fence)
> +{
> +   struct future_fence *ff = container_of(cb, typeof(*ff), base);
> +
> +   if (fence)
> +   dma_fence_replace_proxy(ff->slot, fence);
>

Where does this come from?  I can't find it on dri-devel and "proxy"
doesn't show up anywhere in the dam-buf sources.  What does it do?


> +   else
> +   dma_fence_signal(*ff->slot);
> +
> +   kfree(ff);
> +}
> +
> +int
> +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
> +  struct drm_file *file_private)
> +{
> +   struct drm_syncobj_wait *args = data;
> +   struct dma_fence_array *array;
> +   struct dma_fence **fences;
> +   unsigned int count, i;
> +   long timeout;
> +   u32 *handles;
> +   int ret;
> +
> +   BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
> +
> +   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +   return -ENODEV;
> +
> +   if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
> +   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
> +   return -EINVAL;
> +
> +   count = args->count_handles;
> +   if (!count)
> +   return -EINVAL;
> +
> +   /* Get the handles from userspace */
> +   fences = kmalloc_array(count,
> +  sizeof(struct dma_fence *),
> +  __GFP_NOWARN | GFP_KERNEL);
> +   if (!fences)
> +   return -ENOMEM;
> +
> +   handles = (void *)fences + count * (sizeof(*fences) -
> sizeof(*handles));
> +   if (copy_from_user(handles,
> +  u64_to_user_ptr(args->handles),
> +  sizeof(u32) * count)) {
> +   ret = -EFAULT;
> +   goto err;
> +   }
> +
> +   for (i = 0; i < count; i++) {
> +   struct drm_syncobj *s;
> +
> +   ret = -ENOENT;
> +   s = drm_syncobj_find(file_private, handles[i]);
> +   if (s) {
> +   ret = 0;
> +   spin_lock_irq(>lock);
> +   if (s->fence) {
> +

Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

2017-08-10 Thread Christian König

Am 10.08.2017 um 16:32 schrieb Jason Ekstrand:
On Thu, Aug 10, 2017 at 5:26 AM, Christian König 
> wrote:


Am 10.08.2017 um 01:53 schrieb Jason Ekstrand:

On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson
> wrote:

Quoting Chris Wilson (2017-08-09 18:57:44)
> So we are taking a snapshot here. It looks like this could have been
> done using a dma_fence_array + dma_fence_proxy for
capturing the future
> fence.

A quick sketch of this idea looks like:

 void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
   struct dma_fence *fence)
 {
-   struct dma_fence *old_fence;
+   unsigned long flags;

-   if (fence)
-   dma_fence_get(fence);
-   old_fence = xchg(>fence, fence);
-
-   dma_fence_put(old_fence);
+  spin_lock_irqsave(>lock, flags);
+   dma_fence_replace_proxy(>fence, fence);
+   spin_unlock_irqrestore(>lock, flags);
 }

+int
+drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file_private)
+{
+   struct drm_syncobj_wait *args = data;
+   struct dma_fence **fences;
+   struct dma_fence_array *array;
+   unsigned long timeout;
+   unsigned int count;
+   u32 *handles;
+   int ret = 0;
+   u32 i;
+
+   BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
+
+   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   return -ENODEV;
+
+   if (args->flags != 0 && args->flags !=
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+   return -EINVAL;
+
+   count = args->count_handles;
+   if (!count)
+   return -EINVAL;
+
+   /* Get the handles from userspace */
+   fences = kmalloc_array(count,
+ sizeof(struct dma_fence *),
+ __GFP_NOWARN | GFP_KERNEL);
+   if (!fences)
+   return -ENOMEM;
+
+   handles = (void *)fences + count * (sizeof(*fences) -
sizeof(*handles));
+   if (copy_from_user(handles,
+ u64_to_user_ptr(args->handles),
+  sizeof(u32) * count)) {
+   ret = -EFAULT;
+   goto err;
+   }
+
+   for (i = 0; i < count; i++) {
+   struct drm_syncobj *s;
+
+   ret = -ENOENT;
+   s = drm_syncobj_find(file_private, handles[i]);
+   if (s) {
+   ret = 0;
+  spin_lock_irq(>lock);
+   if (!s->fence) {
+   if (args->flags &
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
+  s->fence = dma_fence_create_proxy();
+   else
+ret = -EINVAL;
+   }
+   if (s->fence)
+   fences[i] =
dma_fence_get(s->fence);
+  spin_unlock_irq(>lock);
+   }
+   if (ret) {
+   count = i;
+   goto err_fences;
+   }
+   }
+
+   array = dma_fence_array_create(count, fences, 0, 0,
+ !(args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL));
+   if (!array) {
+   ret = -ENOMEM;
+   goto err_fences;
+   }
+
+   timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec);
+   timeout = dma_fence_wait_timeout(>base, true,
timeout);
+   args->first_signaled = array->first_signaled;
+  dma_fence_put(>base);
+
+   return timeout < 0 ? timeout : 0;
+
+err_fences:
+   for (i = 0; i < count; i++)
+   dma_fence_put(fences[i]);
+err:
+   kfree(fences);
+   return ret;
+}

The key advantage is that this translate the ioctl into a
dma-fence-array
which already has to deal with the mess, sharing the burden.
(But it
does require a trivial patch to dma-fence-array to record the
first
signaled fence.)

However, this installs the proxy into syncobj->fence with the
result
that any concurrent wait also become a WAIT_FOR_SUBMIT. The
behaviour
of drm_syncobj is then 

Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

2017-08-10 Thread Jason Ekstrand
On Thu, Aug 10, 2017 at 5:26 AM, Christian König 
wrote:

> Am 10.08.2017 um 01:53 schrieb Jason Ekstrand:
>
> On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson 
> wrote:
>
>> Quoting Chris Wilson (2017-08-09 18:57:44)
>> > So we are taking a snapshot here. It looks like this could have been
>> > done using a dma_fence_array + dma_fence_proxy for capturing the future
>> > fence.
>>
>> A quick sketch of this idea looks like:
>>
>>  void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>>struct dma_fence *fence)
>>  {
>> -   struct dma_fence *old_fence;
>> +   unsigned long flags;
>>
>> -   if (fence)
>> -   dma_fence_get(fence);
>> -   old_fence = xchg(>fence, fence);
>> -
>> -   dma_fence_put(old_fence);
>> +   spin_lock_irqsave(>lock, flags);
>> +   dma_fence_replace_proxy(>fence, fence);
>> +   spin_unlock_irqrestore(>lock, flags);
>>  }
>>
>> +int
>> +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>> +  struct drm_file *file_private)
>> +{
>> +   struct drm_syncobj_wait *args = data;
>> +   struct dma_fence **fences;
>> +   struct dma_fence_array *array;
>> +   unsigned long timeout;
>> +   unsigned int count;
>> +   u32 *handles;
>> +   int ret = 0;
>> +   u32 i;
>> +
>> +   BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
>> +
>> +   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>> +   return -ENODEV;
>> +
>> +   if (args->flags != 0 && args->flags !=
>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
>> +   return -EINVAL;
>> +
>> +   count = args->count_handles;
>> +   if (!count)
>> +   return -EINVAL;
>> +
>> +   /* Get the handles from userspace */
>> +   fences = kmalloc_array(count,
>> +  sizeof(struct dma_fence *),
>> +  __GFP_NOWARN | GFP_KERNEL);
>> +   if (!fences)
>> +   return -ENOMEM;
>> +
>> +   handles = (void *)fences + count * (sizeof(*fences) -
>> sizeof(*handles));
>> +   if (copy_from_user(handles,
>> +  u64_to_user_ptr(args->handles),
>> +  sizeof(u32) * count)) {
>> +   ret = -EFAULT;
>> +   goto err;
>> +   }
>> +
>> +   for (i = 0; i < count; i++) {
>> +   struct drm_syncobj *s;
>> +
>> +   ret = -ENOENT;
>> +   s = drm_syncobj_find(file_private, handles[i]);
>> +   if (s) {
>> +   ret = 0;
>> +   spin_lock_irq(>lock);
>> +   if (!s->fence) {
>> +   if (args->flags &
>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>> +   s->fence =
>> dma_fence_create_proxy();
>> +   else
>> +   ret = -EINVAL;
>> +   }
>> +   if (s->fence)
>> +   fences[i] = dma_fence_get(s->fence);
>> +   spin_unlock_irq(>lock);
>> +   }
>> +   if (ret) {
>> +   count = i;
>> +   goto err_fences;
>> +   }
>> +   }
>> +
>> +   array = dma_fence_array_create(count, fences, 0, 0,
>> +  !(args->flags &
>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL));
>> +   if (!array) {
>> +   ret = -ENOMEM;
>> +   goto err_fences;
>> +   }
>> +
>> +   timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec);
>> +   timeout = dma_fence_wait_timeout(>base, true, timeout);
>> +   args->first_signaled = array->first_signaled;
>> +   dma_fence_put(>base);
>> +
>> +   return timeout < 0 ? timeout : 0;
>> +
>> +err_fences:
>> +   for (i = 0; i < count; i++)
>> +   dma_fence_put(fences[i]);
>> +err:
>> +   kfree(fences);
>> +   return ret;
>> +}
>>
>> The key advantage is that this translate the ioctl into a dma-fence-array
>> which already has to deal with the mess, sharing the burden. (But it
>> does require a trivial patch to dma-fence-array to record the first
>> signaled fence.)
>>
>> However, this installs the proxy into syncobj->fence with the result
>> that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour
>> of drm_syncobj is then quite inconsistent, sometimes it will wait for a
>> future fence, sometimes it will report an error.
>>
>
> Yeah, that's not good.  I thought about a variety of solutions to try and
> re-use more core dma_fence code.  Ultimately I chose the current one
> because it takes a snapshot of the syncobjs and then, from that point
> forward, it's consistent with its snapshot.  Nothing I was able to come up
> with based on core dma_fence wait routines does that.
>
>
> As Chris pointed out, that's 

Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

2017-08-10 Thread Christian König

Am 09.08.2017 um 23:31 schrieb Chris Wilson:

Quoting Jason Ekstrand (2017-08-09 18:00:54)

+static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj 
**syncobjs,
+ uint32_t count,
+ uint32_t flags,
+ signed long timeout,
+ uint32_t *idx)
+{
+   struct syncobj_wait_entry *entries;
+   struct dma_fence *fence;
+   signed long ret;
+   uint32_t signaled_count, i;
+
+   if (timeout == 0) {
+   signaled_count = 0;
+   for (i = 0; i < count; ++i) {
+   ret = drm_syncobj_signaled(syncobjs[i], flags);
+   if (ret < 0)
+   return ret;
+   if (ret == 0)
+   continue;
+   if (signaled_count == 0 && idx)
+   *idx = i;
+   signaled_count++;
+   }
+
+   if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+   return signaled_count == count ? 1 : 0;
+   else
+   return signaled_count > 0 ? 1 : 0;

There's a very annoying laxness in the dma_fence API here, in that
backends are not required to automatically report when a fence is
signaled prior to fence->ops->enable_signaling() being called.
So here if we fail to match signaled_count, we need to fallthough and
try a 0 timeout wait!

Christian, dma_fence_wait_any_timeout() has this same bug you told me off
for, e.g. commit 698c0f7ff216 ("dma-buf/fence: revert "don't wait when
specified timeout is zero" (v2)")!


Thanks for pointing this out, going to take care of this issue.

Christian.


-Chris
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel



___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

2017-08-10 Thread Christian König

Am 10.08.2017 um 01:53 schrieb Jason Ekstrand:
On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson > wrote:


Quoting Chris Wilson (2017-08-09 18:57:44)
> So we are taking a snapshot here. It looks like this could have been
> done using a dma_fence_array + dma_fence_proxy for capturing the
future
> fence.

A quick sketch of this idea looks like:

 void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
   struct dma_fence *fence)
 {
-   struct dma_fence *old_fence;
+   unsigned long flags;

-   if (fence)
-   dma_fence_get(fence);
-   old_fence = xchg(>fence, fence);
-
-   dma_fence_put(old_fence);
+   spin_lock_irqsave(>lock, flags);
+   dma_fence_replace_proxy(>fence, fence);
+   spin_unlock_irqrestore(>lock, flags);
 }

+int
+drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file_private)
+{
+   struct drm_syncobj_wait *args = data;
+   struct dma_fence **fences;
+   struct dma_fence_array *array;
+   unsigned long timeout;
+   unsigned int count;
+   u32 *handles;
+   int ret = 0;
+   u32 i;
+
+   BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
+
+   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   return -ENODEV;
+
+   if (args->flags != 0 && args->flags !=
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+   return -EINVAL;
+
+   count = args->count_handles;
+   if (!count)
+   return -EINVAL;
+
+   /* Get the handles from userspace */
+   fences = kmalloc_array(count,
+  sizeof(struct dma_fence *),
+  __GFP_NOWARN | GFP_KERNEL);
+   if (!fences)
+   return -ENOMEM;
+
+   handles = (void *)fences + count * (sizeof(*fences) -
sizeof(*handles));
+   if (copy_from_user(handles,
+ u64_to_user_ptr(args->handles),
+  sizeof(u32) * count)) {
+   ret = -EFAULT;
+   goto err;
+   }
+
+   for (i = 0; i < count; i++) {
+   struct drm_syncobj *s;
+
+   ret = -ENOENT;
+   s = drm_syncobj_find(file_private, handles[i]);
+   if (s) {
+   ret = 0;
+   spin_lock_irq(>lock);
+   if (!s->fence) {
+   if (args->flags &
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
+   s->fence =
dma_fence_create_proxy();
+   else
+   ret = -EINVAL;
+   }
+   if (s->fence)
+   fences[i] = dma_fence_get(s->fence);
+   spin_unlock_irq(>lock);
+   }
+   if (ret) {
+   count = i;
+   goto err_fences;
+   }
+   }
+
+   array = dma_fence_array_create(count, fences, 0, 0,
+  !(args->flags &
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL));
+   if (!array) {
+   ret = -ENOMEM;
+   goto err_fences;
+   }
+
+   timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec);
+   timeout = dma_fence_wait_timeout(>base, true, timeout);
+   args->first_signaled = array->first_signaled;
+   dma_fence_put(>base);
+
+   return timeout < 0 ? timeout : 0;
+
+err_fences:
+   for (i = 0; i < count; i++)
+   dma_fence_put(fences[i]);
+err:
+   kfree(fences);
+   return ret;
+}

The key advantage is that this translate the ioctl into a
dma-fence-array
which already has to deal with the mess, sharing the burden. (But it
does require a trivial patch to dma-fence-array to record the first
signaled fence.)

However, this installs the proxy into syncobj->fence with the result
that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour
of drm_syncobj is then quite inconsistent, sometimes it will wait
for a
future fence, sometimes it will report an error.


Yeah, that's not good.  I thought about a variety of solutions to try 
and re-use more core dma_fence code.  Ultimately I chose the current 
one because it takes a snapshot of the syncobjs and then, from that 
point forward, it's consistent with its snapshot.  Nothing I was able 
to come up with based on core dma_fence wait routines does that.


As Chris 

Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

2017-08-10 Thread Chris Wilson
Quoting Jason Ekstrand (2017-08-10 00:53:00)
> On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson  wrote:
> However, this installs the proxy into syncobj->fence with the result
> that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour
> of drm_syncobj is then quite inconsistent, sometimes it will wait for a
> future fence, sometimes it will report an error.
> 
> 
> Yeah, that's not good.  I thought about a variety of solutions to try and
> re-use more core dma_fence code.  Ultimately I chose the current one because 
> it
> takes a snapshot of the syncobjs and then, from that point forward, it's
> consistent with its snapshot.  Nothing I was able to come up with based on 
> core
> dma_fence wait routines does that.

So if we choose to keep the proxy local to the fence-array and not replace
syncobj->fence, we can still reduce this to a plain dma-fence-array +
wait.

Pertinent details:

+static void syncobj_notify(struct drm_syncobj *syncobj, struct dma_fence 
*fence)
+{
+   struct drm_syncobj_cb *cb, *cn;
+   unsigned long flags;
+
+   /* This is just an opencoded waitqueue; FIXME! */
+   spin_lock_irqsave(>lock, flags);
+   list_for_each_entry_safe(cb, cn, >cb_list, link)
+   cb->func(cb, fence);
+   INIT_LIST_HEAD(>cb_list);
+   spin_unlock_irqrestore(>lock, flags);
+}
+
 /**
  * drm_syncobj_replace_fence - replace fence in a sync object.
  * @syncobj: Sync object to replace fence in
@@ -89,7 +109,10 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 
if (fence)
dma_fence_get(fence);
+
old_fence = xchg(>fence, fence);
+   if (!old_fence && fence)
+   syncobj_notify(syncobj, fence);
 
dma_fence_put(old_fence);
 }
@@ -124,6 +147,9 @@ void drm_syncobj_free(struct kref *kref)
struct drm_syncobj *syncobj = container_of(kref,
   struct drm_syncobj,
   refcount);
+
+   syncobj_notify(syncobj, NULL);
+
dma_fence_put(syncobj->fence);
kfree(syncobj);
 }
@@ -140,6 +166,8 @@ static int drm_syncobj_create(struct drm_file *file_private,
return -ENOMEM;
 
kref_init(>refcount);
+   spin_lock_init(>lock);
+   INIT_LIST_HEAD(>cb_list);
 
idr_preload(GFP_KERNEL);
spin_lock(_private->syncobj_table_lock);

struct future_fence {
+   struct drm_syncobj_cb base;
+   struct dma_fence **slot;
+};
+
+static void future_fence_cb(struct drm_syncobj_cb *cb, struct dma_fence *fence)
+{
+   struct future_fence *ff = container_of(cb, typeof(*ff), base);
+
+   if (fence)
+   dma_fence_replace_proxy(ff->slot, fence);
+   else
+   dma_fence_signal(*ff->slot);
+
+   kfree(ff);
+}
+
+int
+drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file_private)
+{
+   struct drm_syncobj_wait *args = data;
+   struct dma_fence_array *array;
+   struct dma_fence **fences;
+   unsigned int count, i;
+   long timeout;
+   u32 *handles;
+   int ret;
+
+   BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
+
+   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   return -ENODEV;
+
+   if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
+   return -EINVAL;
+
+   count = args->count_handles;
+   if (!count)
+   return -EINVAL;
+
+   /* Get the handles from userspace */
+   fences = kmalloc_array(count,
+  sizeof(struct dma_fence *),
+  __GFP_NOWARN | GFP_KERNEL);
+   if (!fences)
+   return -ENOMEM;
+
+   handles = (void *)fences + count * (sizeof(*fences) - sizeof(*handles));
+   if (copy_from_user(handles,
+  u64_to_user_ptr(args->handles),
+  sizeof(u32) * count)) {
+   ret = -EFAULT;
+   goto err;
+   }
+
+   for (i = 0; i < count; i++) {
+   struct drm_syncobj *s;
+
+   ret = -ENOENT;
+   s = drm_syncobj_find(file_private, handles[i]);
+   if (s) {
+   ret = 0;
+   spin_lock_irq(>lock);
+   if (s->fence) {
+   fences[i] = dma_fence_get(s->fence);
+   } else if (args->flags & 
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
+   struct future_fence *cb;
+
+   cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+   fences[i] = dma_fence_create_proxy();
+   if (cb && fences[i]) {
+   cb->slot = [i];
+   

Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

2017-08-09 Thread Jason Ekstrand
On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson 
wrote:

> Quoting Chris Wilson (2017-08-09 18:57:44)
> > So we are taking a snapshot here. It looks like this could have been
> > done using a dma_fence_array + dma_fence_proxy for capturing the future
> > fence.
>
> A quick sketch of this idea looks like:
>
>  void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>struct dma_fence *fence)
>  {
> -   struct dma_fence *old_fence;
> +   unsigned long flags;
>
> -   if (fence)
> -   dma_fence_get(fence);
> -   old_fence = xchg(>fence, fence);
> -
> -   dma_fence_put(old_fence);
> +   spin_lock_irqsave(>lock, flags);
> +   dma_fence_replace_proxy(>fence, fence);
> +   spin_unlock_irqrestore(>lock, flags);
>  }
>
> +int
> +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
> +  struct drm_file *file_private)
> +{
> +   struct drm_syncobj_wait *args = data;
> +   struct dma_fence **fences;
> +   struct dma_fence_array *array;
> +   unsigned long timeout;
> +   unsigned int count;
> +   u32 *handles;
> +   int ret = 0;
> +   u32 i;
> +
> +   BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
> +
> +   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +   return -ENODEV;
> +
> +   if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_
> ALL)
> +   return -EINVAL;
> +
> +   count = args->count_handles;
> +   if (!count)
> +   return -EINVAL;
> +
> +   /* Get the handles from userspace */
> +   fences = kmalloc_array(count,
> +  sizeof(struct dma_fence *),
> +  __GFP_NOWARN | GFP_KERNEL);
> +   if (!fences)
> +   return -ENOMEM;
> +
> +   handles = (void *)fences + count * (sizeof(*fences) -
> sizeof(*handles));
> +   if (copy_from_user(handles,
> +  u64_to_user_ptr(args->handles),
> +  sizeof(u32) * count)) {
> +   ret = -EFAULT;
> +   goto err;
> +   }
> +
> +   for (i = 0; i < count; i++) {
> +   struct drm_syncobj *s;
> +
> +   ret = -ENOENT;
> +   s = drm_syncobj_find(file_private, handles[i]);
> +   if (s) {
> +   ret = 0;
> +   spin_lock_irq(>lock);
> +   if (!s->fence) {
> +   if (args->flags &
> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
> +   s->fence =
> dma_fence_create_proxy();
> +   else
> +   ret = -EINVAL;
> +   }
> +   if (s->fence)
> +   fences[i] = dma_fence_get(s->fence);
> +   spin_unlock_irq(>lock);
> +   }
> +   if (ret) {
> +   count = i;
> +   goto err_fences;
> +   }
> +   }
> +
> +   array = dma_fence_array_create(count, fences, 0, 0,
> +  !(args->flags &
> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL));
> +   if (!array) {
> +   ret = -ENOMEM;
> +   goto err_fences;
> +   }
> +
> +   timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec);
> +   timeout = dma_fence_wait_timeout(>base, true, timeout);
> +   args->first_signaled = array->first_signaled;
> +   dma_fence_put(>base);
> +
> +   return timeout < 0 ? timeout : 0;
> +
> +err_fences:
> +   for (i = 0; i < count; i++)
> +   dma_fence_put(fences[i]);
> +err:
> +   kfree(fences);
> +   return ret;
> +}
>
> The key advantage is that this translate the ioctl into a dma-fence-array
> which already has to deal with the mess, sharing the burden. (But it
> does require a trivial patch to dma-fence-array to record the first
> signaled fence.)
>
> However, this installs the proxy into syncobj->fence with the result
> that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour
> of drm_syncobj is then quite inconsistent, sometimes it will wait for a
> future fence, sometimes it will report an error.
>

Yeah, that's not good.  I thought about a variety of solutions to try and
re-use more core dma_fence code.  Ultimately I chose the current one
because it takes a snapshot of the syncobjs and then, from that point
forward, it's consistent with its snapshot.  Nothing I was able to come up
with based on core dma_fence wait routines does that.

--Jason
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

2017-08-09 Thread Chris Wilson
Quoting Chris Wilson (2017-08-09 18:57:44)
> So we are taking a snapshot here. It looks like this could have been
> done using a dma_fence_array + dma_fence_proxy for capturing the future
> fence.

A quick sketch of this idea looks like:

 void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
   struct dma_fence *fence)
 {
-   struct dma_fence *old_fence;
+   unsigned long flags;
 
-   if (fence)
-   dma_fence_get(fence);
-   old_fence = xchg(>fence, fence);
-
-   dma_fence_put(old_fence);
+   spin_lock_irqsave(>lock, flags);
+   dma_fence_replace_proxy(>fence, fence);
+   spin_unlock_irqrestore(>lock, flags);
 }

+int
+drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file_private)
+{
+   struct drm_syncobj_wait *args = data;
+   struct dma_fence **fences;
+   struct dma_fence_array *array;
+   unsigned long timeout;
+   unsigned int count;
+   u32 *handles;
+   int ret = 0;
+   u32 i;
+
+   BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
+
+   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   return -ENODEV;
+
+   if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+   return -EINVAL;
+
+   count = args->count_handles;
+   if (!count)
+   return -EINVAL;
+
+   /* Get the handles from userspace */
+   fences = kmalloc_array(count,
+  sizeof(struct dma_fence *),
+  __GFP_NOWARN | GFP_KERNEL);
+   if (!fences)
+   return -ENOMEM;
+
+   handles = (void *)fences + count * (sizeof(*fences) - sizeof(*handles));
+   if (copy_from_user(handles,
+  u64_to_user_ptr(args->handles),
+  sizeof(u32) * count)) {
+   ret = -EFAULT;
+   goto err;
+   }
+
+   for (i = 0; i < count; i++) {
+   struct drm_syncobj *s;
+
+   ret = -ENOENT;
+   s = drm_syncobj_find(file_private, handles[i]);
+   if (s) {
+   ret = 0;
+   spin_lock_irq(>lock);
+   if (!s->fence) {
+   if (args->flags & 
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
+   s->fence = dma_fence_create_proxy();
+   else
+   ret = -EINVAL;
+   }
+   if (s->fence)
+   fences[i] = dma_fence_get(s->fence);
+   spin_unlock_irq(>lock);
+   }
+   if (ret) {
+   count = i;
+   goto err_fences;
+   }
+   }
+
+   array = dma_fence_array_create(count, fences, 0, 0,
+  !(args->flags & 
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL));
+   if (!array) {
+   ret = -ENOMEM;
+   goto err_fences;
+   }
+
+   timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec);
+   timeout = dma_fence_wait_timeout(>base, true, timeout);
+   args->first_signaled = array->first_signaled;
+   dma_fence_put(>base);
+
+   return timeout < 0 ? timeout : 0;
+
+err_fences:
+   for (i = 0; i < count; i++)
+   dma_fence_put(fences[i]);
+err:
+   kfree(fences);
+   return ret;
+}

The key advantage is that this translate the ioctl into a dma-fence-array
which already has to deal with the mess, sharing the burden. (But it
does require a trivial patch to dma-fence-array to record the first
signaled fence.)

However, this installs the proxy into syncobj->fence with the result
that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour
of drm_syncobj is then quite inconsistent, sometimes it will wait for a
future fence, sometimes it will report an error.

Furthermore, do we want future-fences in the execbuf API? etc.
-Chris
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

2017-08-09 Thread Jason Ekstrand
On Wed, Aug 9, 2017 at 2:31 PM, Chris Wilson 
wrote:

> Quoting Jason Ekstrand (2017-08-09 18:00:54)
> > +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj
> **syncobjs,
> > + uint32_t count,
> > + uint32_t flags,
> > + signed long timeout,
> > + uint32_t *idx)
> > +{
> > +   struct syncobj_wait_entry *entries;
> > +   struct dma_fence *fence;
> > +   signed long ret;
> > +   uint32_t signaled_count, i;
> > +
> > +   if (timeout == 0) {
> > +   signaled_count = 0;
> > +   for (i = 0; i < count; ++i) {
> > +   ret = drm_syncobj_signaled(syncobjs[i], flags);
> > +   if (ret < 0)
> > +   return ret;
> > +   if (ret == 0)
> > +   continue;
> > +   if (signaled_count == 0 && idx)
> > +   *idx = i;
> > +   signaled_count++;
> > +   }
> > +
> > +   if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
> > +   return signaled_count == count ? 1 : 0;
> > +   else
> > +   return signaled_count > 0 ? 1 : 0;
>
> There's a very annoying laxness in the dma_fence API here, in that
> backends are not required to automatically report when a fence is
> signaled prior to fence->ops->enable_signaling() being called.
> So here if we fail to match signaled_count, we need to fallthough and
> try a 0 timeout wait!
>

That is very annoying!  I'll see how bad the fall-through is...


> Christian, dma_fence_wait_any_timeout() has this same bug you told me off
> for, e.g. commit 698c0f7ff216 ("dma-buf/fence: revert "don't wait when
> specified timeout is zero" (v2)")!
> -Chris
>
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

2017-08-09 Thread Chris Wilson
Quoting Jason Ekstrand (2017-08-09 18:00:54)
> +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj 
> **syncobjs,
> + uint32_t count,
> + uint32_t flags,
> + signed long timeout,
> + uint32_t *idx)
> +{
> +   struct syncobj_wait_entry *entries;
> +   struct dma_fence *fence;
> +   signed long ret;
> +   uint32_t signaled_count, i;
> +
> +   if (timeout == 0) {
> +   signaled_count = 0;
> +   for (i = 0; i < count; ++i) {
> +   ret = drm_syncobj_signaled(syncobjs[i], flags);
> +   if (ret < 0)
> +   return ret;
> +   if (ret == 0)
> +   continue;
> +   if (signaled_count == 0 && idx)
> +   *idx = i;
> +   signaled_count++;
> +   }
> +
> +   if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
> +   return signaled_count == count ? 1 : 0;
> +   else
> +   return signaled_count > 0 ? 1 : 0;

There's a very annoying laxness in the dma_fence API here, in that
backends are not required to automatically report when a fence is
signaled prior to fence->ops->enable_signaling() being called.
So here if we fail to match signaled_count, we need to fallthough and
try a 0 timeout wait!

Christian, dma_fence_wait_any_timeout() has this same bug you told me off
for, e.g. commit 698c0f7ff216 ("dma-buf/fence: revert "don't wait when
specified timeout is zero" (v2)")!
-Chris
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

2017-08-09 Thread Jason Ekstrand
On Wed, Aug 9, 2017 at 11:25 AM, Christian König 
wrote:

> Am 09.08.2017 um 19:57 schrieb Chris Wilson:
>
>> Quoting Jason Ekstrand (2017-08-09 18:00:54)
>>
>>> Vulkan VkFence semantics require that the application be able to perform
>>> a CPU wait on work which may not yet have been submitted.  This is
>>> perfectly safe because the CPU wait has a timeout which will get
>>> triggered eventually if no work is ever submitted.  This behavior is
>>> advantageous for multi-threaded workloads because, so long as all of the
>>> threads agree on what fences to use up-front, you don't have the extra
>>> cross-thread synchronization cost of thread A telling thread B that it
>>> has submitted its dependent work and thread B is now free to wait.
>>>
>>> Within a single process, this can be implemented in the userspace driver
>>> by doing exactly the same kind of tracking the app would have to do
>>> using posix condition variables or similar.  However, in order for this
>>> to work cross-process (as is required by VK_KHR_external_fence), we need
>>> to handle this in the kernel.
>>>
>>> This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
>>> instructs the IOCTL to wait for the syncobj to have a non-null fence and
>>> then wait on the fence.  Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
>>> easily get the Vulkan behavior.
>>>
>>> v2:
>>>   - Fix a bug in the invalid syncobj error path
>>>   - Unify the wait-all and wait-any cases
>>>
>>> Signed-off-by: Jason Ekstrand 
>>> Cc: Dave Airlie 
>>> ---
>>>
>>> I realized today (this is what happens when you sleep) that it takes
>>> almost
>>> no work to make the wait-any path also handle wait-all.  By unifying the
>>> two, I deleted over a hundred lines of code from the implementation.
>>>
>>>   drivers/gpu/drm/drm_syncobj.c | 243 ++
>>> 
>>>   include/uapi/drm/drm.h|   1 +
>>>   2 files changed, 199 insertions(+), 45 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>> b/drivers/gpu/drm/drm_syncobj.c
>>> index 510dfc2..5e7f654 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -51,6 +51,7 @@
>>>   #include 
>>>   #include 
>>>   #include 
>>> +#include 
>>> #include "drm_internal.h"
>>>   #include 
>>> @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct
>>> drm_syncobj *syncobj,
>>>
>> This is a bit of the puzzle that is missing.
>>
>
?  It's in the previous patch.


>  list_add_tail(>node, >cb_list);
>>>   }
>>>   +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj
>>> *syncobj,
>>> +struct dma_fence
>>> **fence,
>>> +struct drm_syncobj_cb
>>> *cb,
>>> +drm_syncobj_func_t func)
>>> +{
>>> +   int ret;
>>> +
>>> +   spin_lock(>lock);
>>> +   if (syncobj->fence) {
>>> +   *fence = dma_fence_get(syncobj->fence);
>>> +   ret = 1;
>>> +   } else {
>>> +   *fence = NULL;
>>> +   drm_syncobj_add_callback_locked(syncobj, cb, func);
>>> +   ret = 0;
>>> +   }
>>> +   spin_unlock(>lock);
>>> +
>>> +   return ret;
>>> +}
>>> +
>>>   /**
>>>* drm_syncobj_add_callback - adds a callback to syncobj::cb_list
>>>* @syncobj: Sync object to which to add the callback
>>> @@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device
>>> *dev, void *data,
>>>  >handle);
>>>   }
>>>   +static int drm_syncobj_signaled(struct drm_syncobj *syncobj,
>>> +   uint32_t wait_flags)
>>> +{
>>> +   struct dma_fence *fence;
>>> +   int ret;
>>> +
>>> +   fence = drm_syncobj_fence_get(syncobj);
>>> +   if (!fence) {
>>> +   if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>>> +   return 0;
>>> +   else
>>> +   return -EINVAL;
>>> +   }
>>> +
>>> +   ret = dma_fence_is_signaled(fence) ? 1 : 0;
>>> +
>>> +   dma_fence_put(fence);
>>> +
>>> +   return ret;
>>> +}
>>> +
>>> +struct syncobj_wait_entry {
>>> +   struct task_struct *task;
>>> +   struct dma_fence *fence;
>>> +   struct dma_fence_cb fence_cb;
>>> +   struct drm_syncobj_cb syncobj_cb;
>>> +};
>>> +
>>> +static void syncobj_wait_fence_func(struct dma_fence *fence,
>>> +   struct dma_fence_cb *cb)
>>> +{
>>> +   struct syncobj_wait_entry *wait =
>>> +   container_of(cb, struct syncobj_wait_entry, fence_cb);
>>> +
>>> +   wake_up_process(wait->task);
>>> +}
>>> +
>>> +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
>>> + struct drm_syncobj_cb *cb)
>>> +{
>>> +   struct 

Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

2017-08-09 Thread Christian König

Am 09.08.2017 um 19:57 schrieb Chris Wilson:

Quoting Jason Ekstrand (2017-08-09 18:00:54)

Vulkan VkFence semantics require that the application be able to perform
a CPU wait on work which may not yet have been submitted.  This is
perfectly safe because the CPU wait has a timeout which will get
triggered eventually if no work is ever submitted.  This behavior is
advantageous for multi-threaded workloads because, so long as all of the
threads agree on what fences to use up-front, you don't have the extra
cross-thread synchronization cost of thread A telling thread B that it
has submitted its dependent work and thread B is now free to wait.

Within a single process, this can be implemented in the userspace driver
by doing exactly the same kind of tracking the app would have to do
using posix condition variables or similar.  However, in order for this
to work cross-process (as is required by VK_KHR_external_fence), we need
to handle this in the kernel.

This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
instructs the IOCTL to wait for the syncobj to have a non-null fence and
then wait on the fence.  Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
easily get the Vulkan behavior.

v2:
  - Fix a bug in the invalid syncobj error path
  - Unify the wait-all and wait-any cases

Signed-off-by: Jason Ekstrand 
Cc: Dave Airlie 
---

I realized today (this is what happens when you sleep) that it takes almost
no work to make the wait-any path also handle wait-all.  By unifying the
two, I deleted over a hundred lines of code from the implementation.

  drivers/gpu/drm/drm_syncobj.c | 243 ++
  include/uapi/drm/drm.h|   1 +
  2 files changed, 199 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 510dfc2..5e7f654 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -51,6 +51,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include "drm_internal.h"

  #include 
@@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct 
drm_syncobj *syncobj,

This is a bit of the puzzle that is missing.


 list_add_tail(>node, >cb_list);
  }
  
+static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,

+struct dma_fence **fence,
+struct drm_syncobj_cb *cb,
+drm_syncobj_func_t func)
+{
+   int ret;
+
+   spin_lock(>lock);
+   if (syncobj->fence) {
+   *fence = dma_fence_get(syncobj->fence);
+   ret = 1;
+   } else {
+   *fence = NULL;
+   drm_syncobj_add_callback_locked(syncobj, cb, func);
+   ret = 0;
+   }
+   spin_unlock(>lock);
+
+   return ret;
+}
+
  /**
   * drm_syncobj_add_callback - adds a callback to syncobj::cb_list
   * @syncobj: Sync object to which to add the callback
@@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, 
void *data,
 >handle);
  }
  
+static int drm_syncobj_signaled(struct drm_syncobj *syncobj,

+   uint32_t wait_flags)
+{
+   struct dma_fence *fence;
+   int ret;
+
+   fence = drm_syncobj_fence_get(syncobj);
+   if (!fence) {
+   if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
+   return 0;
+   else
+   return -EINVAL;
+   }
+
+   ret = dma_fence_is_signaled(fence) ? 1 : 0;
+
+   dma_fence_put(fence);
+
+   return ret;
+}
+
+struct syncobj_wait_entry {
+   struct task_struct *task;
+   struct dma_fence *fence;
+   struct dma_fence_cb fence_cb;
+   struct drm_syncobj_cb syncobj_cb;
+};
+
+static void syncobj_wait_fence_func(struct dma_fence *fence,
+   struct dma_fence_cb *cb)
+{
+   struct syncobj_wait_entry *wait =
+   container_of(cb, struct syncobj_wait_entry, fence_cb);
+
+   wake_up_process(wait->task);
+}
+
+static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
+ struct drm_syncobj_cb *cb)
+{
+   struct syncobj_wait_entry *wait =
+   container_of(cb, struct syncobj_wait_entry, syncobj_cb);
+
+   /* This happens inside the syncobj lock */
+   wait->fence = dma_fence_get(syncobj->fence);
+   wake_up_process(wait->task);
+}
+
+static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj 
**syncobjs,
+ uint32_t count,
+ uint32_t flags,
+ signed long timeout,
+ uint32_t *idx)
+{
+   struct 

Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

2017-08-09 Thread Chris Wilson
Quoting Jason Ekstrand (2017-08-09 18:00:54)
> Vulkan VkFence semantics require that the application be able to perform
> a CPU wait on work which may not yet have been submitted.  This is
> perfectly safe because the CPU wait has a timeout which will get
> triggered eventually if no work is ever submitted.  This behavior is
> advantageous for multi-threaded workloads because, so long as all of the
> threads agree on what fences to use up-front, you don't have the extra
> cross-thread synchronization cost of thread A telling thread B that it
> has submitted its dependent work and thread B is now free to wait.
> 
> Within a single process, this can be implemented in the userspace driver
> by doing exactly the same kind of tracking the app would have to do
> using posix condition variables or similar.  However, in order for this
> to work cross-process (as is required by VK_KHR_external_fence), we need
> to handle this in the kernel.
> 
> This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
> instructs the IOCTL to wait for the syncobj to have a non-null fence and
> then wait on the fence.  Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
> easily get the Vulkan behavior.
> 
> v2:
>  - Fix a bug in the invalid syncobj error path
>  - Unify the wait-all and wait-any cases
> 
> Signed-off-by: Jason Ekstrand 
> Cc: Dave Airlie 
> ---
> 
> I realized today (this is what happens when you sleep) that it takes almost
> no work to make the wait-any path also handle wait-all.  By unifying the
> two, I deleted over a hundred lines of code from the implementation.
> 
>  drivers/gpu/drm/drm_syncobj.c | 243 
> ++
>  include/uapi/drm/drm.h|   1 +
>  2 files changed, 199 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 510dfc2..5e7f654 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -51,6 +51,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "drm_internal.h"
>  #include 
> @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct 
> drm_syncobj *syncobj,

This is a bit of the puzzle that is missing.

> list_add_tail(>node, >cb_list);
>  }
>  
> +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
> +struct dma_fence **fence,
> +struct drm_syncobj_cb *cb,
> +drm_syncobj_func_t func)
> +{
> +   int ret;
> +
> +   spin_lock(>lock);
> +   if (syncobj->fence) {
> +   *fence = dma_fence_get(syncobj->fence);
> +   ret = 1;
> +   } else {
> +   *fence = NULL;
> +   drm_syncobj_add_callback_locked(syncobj, cb, func);
> +   ret = 0;
> +   }
> +   spin_unlock(>lock);
> +
> +   return ret;
> +}
> +
>  /**
>   * drm_syncobj_add_callback - adds a callback to syncobj::cb_list
>   * @syncobj: Sync object to which to add the callback
> @@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, 
> void *data,
> >handle);
>  }
>  
> +static int drm_syncobj_signaled(struct drm_syncobj *syncobj,
> +   uint32_t wait_flags)
> +{
> +   struct dma_fence *fence;
> +   int ret;
> +
> +   fence = drm_syncobj_fence_get(syncobj);
> +   if (!fence) {
> +   if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
> +   return 0;
> +   else
> +   return -EINVAL;
> +   }
> +
> +   ret = dma_fence_is_signaled(fence) ? 1 : 0;
> +
> +   dma_fence_put(fence);
> +
> +   return ret;
> +}
> +
> +struct syncobj_wait_entry {
> +   struct task_struct *task;
> +   struct dma_fence *fence;
> +   struct dma_fence_cb fence_cb;
> +   struct drm_syncobj_cb syncobj_cb;
> +};
> +
> +static void syncobj_wait_fence_func(struct dma_fence *fence,
> +   struct dma_fence_cb *cb)
> +{
> +   struct syncobj_wait_entry *wait =
> +   container_of(cb, struct syncobj_wait_entry, fence_cb);
> +
> +   wake_up_process(wait->task);
> +}
> +
> +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
> + struct drm_syncobj_cb *cb)
> +{
> +   struct syncobj_wait_entry *wait =
> +   container_of(cb, struct syncobj_wait_entry, syncobj_cb);
> +
> +   /* This happens inside the syncobj lock */
> +   wait->fence = dma_fence_get(syncobj->fence);
> +   wake_up_process(wait->task);
> +}
> +
> +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj 
> **syncobjs,
> + uint32_t count,
> +