Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
On Thu, Aug 10, 2017 at 4:00 AM, Chris Wilsonwrote: > Quoting Jason Ekstrand (2017-08-10 00:53:00) > > On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson > wrote: > > However, this installs the proxy into syncobj->fence with the result > > that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour > > of drm_syncobj is then quite inconsistent, sometimes it will wait > for a > > future fence, sometimes it will report an error. > > > > > > Yeah, that's not good. I thought about a variety of solutions to try and > > re-use more core dma_fence code. Ultimately I chose the current one > because it > > takes a snapshot of the syncobjs and then, from that point forward, it's > > consistent with its snapshot. Nothing I was able to come up with based > on core > > dma_fence wait routines does that. > > So if we choose to keep the proxy local to the fence-array and not replace > syncobj->fence, we can still reduce this to a plain dma-fence-array + > wait. > > Pertinent details: > > +static void syncobj_notify(struct drm_syncobj *syncobj, struct dma_fence > *fence) > +{ > + struct drm_syncobj_cb *cb, *cn; > + unsigned long flags; > + > + /* This is just an opencoded waitqueue; FIXME! */ > + spin_lock_irqsave(>lock, flags); > + list_for_each_entry_safe(cb, cn, >cb_list, link) > + cb->func(cb, fence); > + INIT_LIST_HEAD(>cb_list); > + spin_unlock_irqrestore(>lock, flags); > +} > + > /** > * drm_syncobj_replace_fence - replace fence in a sync object. > * @syncobj: Sync object to replace fence in > @@ -89,7 +109,10 @@ void drm_syncobj_replace_fence(struct drm_syncobj > *syncobj, > > if (fence) > dma_fence_get(fence); > + > old_fence = xchg(>fence, fence); > + if (!old_fence && fence) > + syncobj_notify(syncobj, fence); > > dma_fence_put(old_fence); > } > @@ -124,6 +147,9 @@ void drm_syncobj_free(struct kref *kref) > struct drm_syncobj *syncobj = container_of(kref, >struct drm_syncobj, >refcount); > + > + syncobj_notify(syncobj, NULL); > + > dma_fence_put(syncobj->fence); > kfree(syncobj); > } > @@ -140,6 +166,8 @@ static int drm_syncobj_create(struct drm_file > *file_private, > return -ENOMEM; > > kref_init(>refcount); > + spin_lock_init(>lock); > + INIT_LIST_HEAD(>cb_list); > > idr_preload(GFP_KERNEL); > spin_lock(_private->syncobj_table_lock); > > struct future_fence { > + struct drm_syncobj_cb base; > + struct dma_fence **slot; > +}; > + > +static void future_fence_cb(struct drm_syncobj_cb *cb, struct dma_fence > *fence) > +{ > + struct future_fence *ff = container_of(cb, typeof(*ff), base); > + > + if (fence) > + dma_fence_replace_proxy(ff->slot, fence); > Where does this come from? I can't find it on dri-devel and "proxy" doesn't show up anywhere in the dam-buf sources. What does it do? > + else > + dma_fence_signal(*ff->slot); > + > + kfree(ff); > +} > + > +int > +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private) > +{ > + struct drm_syncobj_wait *args = data; > + struct dma_fence_array *array; > + struct dma_fence **fences; > + unsigned int count, i; > + long timeout; > + u32 *handles; > + int ret; > + > + BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles)); > + > + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) > + return -ENODEV; > + > + if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | > + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)) > + return -EINVAL; > + > + count = args->count_handles; > + if (!count) > + return -EINVAL; > + > + /* Get the handles from userspace */ > + fences = kmalloc_array(count, > + sizeof(struct dma_fence *), > + __GFP_NOWARN | GFP_KERNEL); > + if (!fences) > + return -ENOMEM; > + > + handles = (void *)fences + count * (sizeof(*fences) - > sizeof(*handles)); > + if (copy_from_user(handles, > + u64_to_user_ptr(args->handles), > + sizeof(u32) * count)) { > + ret = -EFAULT; > + goto err; > + } > + > + for (i = 0; i < count; i++) { > + struct drm_syncobj *s; > + > + ret = -ENOENT; > + s = drm_syncobj_find(file_private, handles[i]); > + if (s) { > + ret = 0; > + spin_lock_irq(>lock); > + if (s->fence) { > +
Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
Am 10.08.2017 um 16:32 schrieb Jason Ekstrand: On Thu, Aug 10, 2017 at 5:26 AM, Christian König> wrote: Am 10.08.2017 um 01:53 schrieb Jason Ekstrand: On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson > wrote: Quoting Chris Wilson (2017-08-09 18:57:44) > So we are taking a snapshot here. It looks like this could have been > done using a dma_fence_array + dma_fence_proxy for capturing the future > fence. A quick sketch of this idea looks like: void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, struct dma_fence *fence) { - struct dma_fence *old_fence; + unsigned long flags; - if (fence) - dma_fence_get(fence); - old_fence = xchg(>fence, fence); - - dma_fence_put(old_fence); + spin_lock_irqsave(>lock, flags); + dma_fence_replace_proxy(>fence, fence); + spin_unlock_irqrestore(>lock, flags); } +int +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_wait *args = data; + struct dma_fence **fences; + struct dma_fence_array *array; + unsigned long timeout; + unsigned int count; + u32 *handles; + int ret = 0; + u32 i; + + BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles)); + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) + return -EINVAL; + + count = args->count_handles; + if (!count) + return -EINVAL; + + /* Get the handles from userspace */ + fences = kmalloc_array(count, + sizeof(struct dma_fence *), + __GFP_NOWARN | GFP_KERNEL); + if (!fences) + return -ENOMEM; + + handles = (void *)fences + count * (sizeof(*fences) - sizeof(*handles)); + if (copy_from_user(handles, + u64_to_user_ptr(args->handles), + sizeof(u32) * count)) { + ret = -EFAULT; + goto err; + } + + for (i = 0; i < count; i++) { + struct drm_syncobj *s; + + ret = -ENOENT; + s = drm_syncobj_find(file_private, handles[i]); + if (s) { + ret = 0; + spin_lock_irq(>lock); + if (!s->fence) { + if (args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) + s->fence = dma_fence_create_proxy(); + else +ret = -EINVAL; + } + if (s->fence) + fences[i] = dma_fence_get(s->fence); + spin_unlock_irq(>lock); + } + if (ret) { + count = i; + goto err_fences; + } + } + + array = dma_fence_array_create(count, fences, 0, 0, + !(args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)); + if (!array) { + ret = -ENOMEM; + goto err_fences; + } + + timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec); + timeout = dma_fence_wait_timeout(>base, true, timeout); + args->first_signaled = array->first_signaled; + dma_fence_put(>base); + + return timeout < 0 ? timeout : 0; + +err_fences: + for (i = 0; i < count; i++) + dma_fence_put(fences[i]); +err: + kfree(fences); + return ret; +} The key advantage is that this translate the ioctl into a dma-fence-array which already has to deal with the mess, sharing the burden. (But it does require a trivial patch to dma-fence-array to record the first signaled fence.) However, this installs the proxy into syncobj->fence with the result that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour of drm_syncobj is then
Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
On Thu, Aug 10, 2017 at 5:26 AM, Christian Königwrote: > Am 10.08.2017 um 01:53 schrieb Jason Ekstrand: > > On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson > wrote: > >> Quoting Chris Wilson (2017-08-09 18:57:44) >> > So we are taking a snapshot here. It looks like this could have been >> > done using a dma_fence_array + dma_fence_proxy for capturing the future >> > fence. >> >> A quick sketch of this idea looks like: >> >> void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, >>struct dma_fence *fence) >> { >> - struct dma_fence *old_fence; >> + unsigned long flags; >> >> - if (fence) >> - dma_fence_get(fence); >> - old_fence = xchg(>fence, fence); >> - >> - dma_fence_put(old_fence); >> + spin_lock_irqsave(>lock, flags); >> + dma_fence_replace_proxy(>fence, fence); >> + spin_unlock_irqrestore(>lock, flags); >> } >> >> +int >> +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, >> + struct drm_file *file_private) >> +{ >> + struct drm_syncobj_wait *args = data; >> + struct dma_fence **fences; >> + struct dma_fence_array *array; >> + unsigned long timeout; >> + unsigned int count; >> + u32 *handles; >> + int ret = 0; >> + u32 i; >> + >> + BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles)); >> + >> + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) >> + return -ENODEV; >> + >> + if (args->flags != 0 && args->flags != >> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) >> + return -EINVAL; >> + >> + count = args->count_handles; >> + if (!count) >> + return -EINVAL; >> + >> + /* Get the handles from userspace */ >> + fences = kmalloc_array(count, >> + sizeof(struct dma_fence *), >> + __GFP_NOWARN | GFP_KERNEL); >> + if (!fences) >> + return -ENOMEM; >> + >> + handles = (void *)fences + count * (sizeof(*fences) - >> sizeof(*handles)); >> + if (copy_from_user(handles, >> + u64_to_user_ptr(args->handles), >> + sizeof(u32) * count)) { >> + ret = -EFAULT; >> + goto err; >> + } >> + >> + for (i = 0; i < count; i++) { >> + struct drm_syncobj *s; >> + >> + ret = -ENOENT; >> + s = drm_syncobj_find(file_private, handles[i]); >> + if (s) { >> + ret = 0; >> + spin_lock_irq(>lock); >> + if (!s->fence) { >> + if (args->flags & >> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) >> + s->fence = >> dma_fence_create_proxy(); >> + else >> + ret = -EINVAL; >> + } >> + if (s->fence) >> + fences[i] = dma_fence_get(s->fence); >> + spin_unlock_irq(>lock); >> + } >> + if (ret) { >> + count = i; >> + goto err_fences; >> + } >> + } >> + >> + array = dma_fence_array_create(count, fences, 0, 0, >> + !(args->flags & >> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)); >> + if (!array) { >> + ret = -ENOMEM; >> + goto err_fences; >> + } >> + >> + timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec); >> + timeout = dma_fence_wait_timeout(>base, true, timeout); >> + args->first_signaled = array->first_signaled; >> + dma_fence_put(>base); >> + >> + return timeout < 0 ? timeout : 0; >> + >> +err_fences: >> + for (i = 0; i < count; i++) >> + dma_fence_put(fences[i]); >> +err: >> + kfree(fences); >> + return ret; >> +} >> >> The key advantage is that this translate the ioctl into a dma-fence-array >> which already has to deal with the mess, sharing the burden. (But it >> does require a trivial patch to dma-fence-array to record the first >> signaled fence.) >> >> However, this installs the proxy into syncobj->fence with the result >> that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour >> of drm_syncobj is then quite inconsistent, sometimes it will wait for a >> future fence, sometimes it will report an error. >> > > Yeah, that's not good. I thought about a variety of solutions to try and > re-use more core dma_fence code. Ultimately I chose the current one > because it takes a snapshot of the syncobjs and then, from that point > forward, it's consistent with its snapshot. Nothing I was able to come up > with based on core dma_fence wait routines does that. > > > As Chris pointed out, that's
Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
Am 09.08.2017 um 23:31 schrieb Chris Wilson: Quoting Jason Ekstrand (2017-08-09 18:00:54) +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, + uint32_t count, + uint32_t flags, + signed long timeout, + uint32_t *idx) +{ + struct syncobj_wait_entry *entries; + struct dma_fence *fence; + signed long ret; + uint32_t signaled_count, i; + + if (timeout == 0) { + signaled_count = 0; + for (i = 0; i < count; ++i) { + ret = drm_syncobj_signaled(syncobjs[i], flags); + if (ret < 0) + return ret; + if (ret == 0) + continue; + if (signaled_count == 0 && idx) + *idx = i; + signaled_count++; + } + + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) + return signaled_count == count ? 1 : 0; + else + return signaled_count > 0 ? 1 : 0; There's a very annoying laxness in the dma_fence API here, in that backends are not required to automatically report when a fence is signaled prior to fence->ops->enable_signaling() being called. So here if we fail to match signaled_count, we need to fallthough and try a 0 timeout wait! Christian, dma_fence_wait_any_timeout() has this same bug you told me off for, e.g. commit 698c0f7ff216 ("dma-buf/fence: revert "don't wait when specified timeout is zero" (v2)")! Thanks for pointing this out, going to take care of this issue. Christian. -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
Am 10.08.2017 um 01:53 schrieb Jason Ekstrand: On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson> wrote: Quoting Chris Wilson (2017-08-09 18:57:44) > So we are taking a snapshot here. It looks like this could have been > done using a dma_fence_array + dma_fence_proxy for capturing the future > fence. A quick sketch of this idea looks like: void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, struct dma_fence *fence) { - struct dma_fence *old_fence; + unsigned long flags; - if (fence) - dma_fence_get(fence); - old_fence = xchg(>fence, fence); - - dma_fence_put(old_fence); + spin_lock_irqsave(>lock, flags); + dma_fence_replace_proxy(>fence, fence); + spin_unlock_irqrestore(>lock, flags); } +int +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_wait *args = data; + struct dma_fence **fences; + struct dma_fence_array *array; + unsigned long timeout; + unsigned int count; + u32 *handles; + int ret = 0; + u32 i; + + BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles)); + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) + return -EINVAL; + + count = args->count_handles; + if (!count) + return -EINVAL; + + /* Get the handles from userspace */ + fences = kmalloc_array(count, + sizeof(struct dma_fence *), + __GFP_NOWARN | GFP_KERNEL); + if (!fences) + return -ENOMEM; + + handles = (void *)fences + count * (sizeof(*fences) - sizeof(*handles)); + if (copy_from_user(handles, + u64_to_user_ptr(args->handles), + sizeof(u32) * count)) { + ret = -EFAULT; + goto err; + } + + for (i = 0; i < count; i++) { + struct drm_syncobj *s; + + ret = -ENOENT; + s = drm_syncobj_find(file_private, handles[i]); + if (s) { + ret = 0; + spin_lock_irq(>lock); + if (!s->fence) { + if (args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) + s->fence = dma_fence_create_proxy(); + else + ret = -EINVAL; + } + if (s->fence) + fences[i] = dma_fence_get(s->fence); + spin_unlock_irq(>lock); + } + if (ret) { + count = i; + goto err_fences; + } + } + + array = dma_fence_array_create(count, fences, 0, 0, + !(args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)); + if (!array) { + ret = -ENOMEM; + goto err_fences; + } + + timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec); + timeout = dma_fence_wait_timeout(>base, true, timeout); + args->first_signaled = array->first_signaled; + dma_fence_put(>base); + + return timeout < 0 ? timeout : 0; + +err_fences: + for (i = 0; i < count; i++) + dma_fence_put(fences[i]); +err: + kfree(fences); + return ret; +} The key advantage is that this translate the ioctl into a dma-fence-array which already has to deal with the mess, sharing the burden. (But it does require a trivial patch to dma-fence-array to record the first signaled fence.) However, this installs the proxy into syncobj->fence with the result that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour of drm_syncobj is then quite inconsistent, sometimes it will wait for a future fence, sometimes it will report an error. Yeah, that's not good. I thought about a variety of solutions to try and re-use more core dma_fence code. Ultimately I chose the current one because it takes a snapshot of the syncobjs and then, from that point forward, it's consistent with its snapshot. Nothing I was able to come up with based on core dma_fence wait routines does that. As Chris
Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
Quoting Jason Ekstrand (2017-08-10 00:53:00) > On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilsonwrote: > However, this installs the proxy into syncobj->fence with the result > that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour > of drm_syncobj is then quite inconsistent, sometimes it will wait for a > future fence, sometimes it will report an error. > > > Yeah, that's not good. I thought about a variety of solutions to try and > re-use more core dma_fence code. Ultimately I chose the current one because > it > takes a snapshot of the syncobjs and then, from that point forward, it's > consistent with its snapshot. Nothing I was able to come up with based on > core > dma_fence wait routines does that. So if we choose to keep the proxy local to the fence-array and not replace syncobj->fence, we can still reduce this to a plain dma-fence-array + wait. Pertinent details: +static void syncobj_notify(struct drm_syncobj *syncobj, struct dma_fence *fence) +{ + struct drm_syncobj_cb *cb, *cn; + unsigned long flags; + + /* This is just an opencoded waitqueue; FIXME! */ + spin_lock_irqsave(>lock, flags); + list_for_each_entry_safe(cb, cn, >cb_list, link) + cb->func(cb, fence); + INIT_LIST_HEAD(>cb_list); + spin_unlock_irqrestore(>lock, flags); +} + /** * drm_syncobj_replace_fence - replace fence in a sync object. * @syncobj: Sync object to replace fence in @@ -89,7 +109,10 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, if (fence) dma_fence_get(fence); + old_fence = xchg(>fence, fence); + if (!old_fence && fence) + syncobj_notify(syncobj, fence); dma_fence_put(old_fence); } @@ -124,6 +147,9 @@ void drm_syncobj_free(struct kref *kref) struct drm_syncobj *syncobj = container_of(kref, struct drm_syncobj, refcount); + + syncobj_notify(syncobj, NULL); + dma_fence_put(syncobj->fence); kfree(syncobj); } @@ -140,6 +166,8 @@ static int drm_syncobj_create(struct drm_file *file_private, return -ENOMEM; kref_init(>refcount); + spin_lock_init(>lock); + INIT_LIST_HEAD(>cb_list); idr_preload(GFP_KERNEL); spin_lock(_private->syncobj_table_lock); struct future_fence { + struct drm_syncobj_cb base; + struct dma_fence **slot; +}; + +static void future_fence_cb(struct drm_syncobj_cb *cb, struct dma_fence *fence) +{ + struct future_fence *ff = container_of(cb, typeof(*ff), base); + + if (fence) + dma_fence_replace_proxy(ff->slot, fence); + else + dma_fence_signal(*ff->slot); + + kfree(ff); +} + +int +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_wait *args = data; + struct dma_fence_array *array; + struct dma_fence **fences; + unsigned int count, i; + long timeout; + u32 *handles; + int ret; + + BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles)); + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)) + return -EINVAL; + + count = args->count_handles; + if (!count) + return -EINVAL; + + /* Get the handles from userspace */ + fences = kmalloc_array(count, + sizeof(struct dma_fence *), + __GFP_NOWARN | GFP_KERNEL); + if (!fences) + return -ENOMEM; + + handles = (void *)fences + count * (sizeof(*fences) - sizeof(*handles)); + if (copy_from_user(handles, + u64_to_user_ptr(args->handles), + sizeof(u32) * count)) { + ret = -EFAULT; + goto err; + } + + for (i = 0; i < count; i++) { + struct drm_syncobj *s; + + ret = -ENOENT; + s = drm_syncobj_find(file_private, handles[i]); + if (s) { + ret = 0; + spin_lock_irq(>lock); + if (s->fence) { + fences[i] = dma_fence_get(s->fence); + } else if (args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + struct future_fence *cb; + + cb = kmalloc(sizeof(*cb), GFP_KERNEL); + fences[i] = dma_fence_create_proxy(); + if (cb && fences[i]) { + cb->slot = [i]; +
Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilsonwrote: > Quoting Chris Wilson (2017-08-09 18:57:44) > > So we are taking a snapshot here. It looks like this could have been > > done using a dma_fence_array + dma_fence_proxy for capturing the future > > fence. > > A quick sketch of this idea looks like: > > void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, >struct dma_fence *fence) > { > - struct dma_fence *old_fence; > + unsigned long flags; > > - if (fence) > - dma_fence_get(fence); > - old_fence = xchg(>fence, fence); > - > - dma_fence_put(old_fence); > + spin_lock_irqsave(>lock, flags); > + dma_fence_replace_proxy(>fence, fence); > + spin_unlock_irqrestore(>lock, flags); > } > > +int > +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private) > +{ > + struct drm_syncobj_wait *args = data; > + struct dma_fence **fences; > + struct dma_fence_array *array; > + unsigned long timeout; > + unsigned int count; > + u32 *handles; > + int ret = 0; > + u32 i; > + > + BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles)); > + > + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) > + return -ENODEV; > + > + if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ > ALL) > + return -EINVAL; > + > + count = args->count_handles; > + if (!count) > + return -EINVAL; > + > + /* Get the handles from userspace */ > + fences = kmalloc_array(count, > + sizeof(struct dma_fence *), > + __GFP_NOWARN | GFP_KERNEL); > + if (!fences) > + return -ENOMEM; > + > + handles = (void *)fences + count * (sizeof(*fences) - > sizeof(*handles)); > + if (copy_from_user(handles, > + u64_to_user_ptr(args->handles), > + sizeof(u32) * count)) { > + ret = -EFAULT; > + goto err; > + } > + > + for (i = 0; i < count; i++) { > + struct drm_syncobj *s; > + > + ret = -ENOENT; > + s = drm_syncobj_find(file_private, handles[i]); > + if (s) { > + ret = 0; > + spin_lock_irq(>lock); > + if (!s->fence) { > + if (args->flags & > DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) > + s->fence = > dma_fence_create_proxy(); > + else > + ret = -EINVAL; > + } > + if (s->fence) > + fences[i] = dma_fence_get(s->fence); > + spin_unlock_irq(>lock); > + } > + if (ret) { > + count = i; > + goto err_fences; > + } > + } > + > + array = dma_fence_array_create(count, fences, 0, 0, > + !(args->flags & > DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)); > + if (!array) { > + ret = -ENOMEM; > + goto err_fences; > + } > + > + timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec); > + timeout = dma_fence_wait_timeout(>base, true, timeout); > + args->first_signaled = array->first_signaled; > + dma_fence_put(>base); > + > + return timeout < 0 ? timeout : 0; > + > +err_fences: > + for (i = 0; i < count; i++) > + dma_fence_put(fences[i]); > +err: > + kfree(fences); > + return ret; > +} > > The key advantage is that this translate the ioctl into a dma-fence-array > which already has to deal with the mess, sharing the burden. (But it > does require a trivial patch to dma-fence-array to record the first > signaled fence.) > > However, this installs the proxy into syncobj->fence with the result > that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour > of drm_syncobj is then quite inconsistent, sometimes it will wait for a > future fence, sometimes it will report an error. > Yeah, that's not good. I thought about a variety of solutions to try and re-use more core dma_fence code. Ultimately I chose the current one because it takes a snapshot of the syncobjs and then, from that point forward, it's consistent with its snapshot. Nothing I was able to come up with based on core dma_fence wait routines does that. --Jason ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
Quoting Chris Wilson (2017-08-09 18:57:44) > So we are taking a snapshot here. It looks like this could have been > done using a dma_fence_array + dma_fence_proxy for capturing the future > fence. A quick sketch of this idea looks like: void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, struct dma_fence *fence) { - struct dma_fence *old_fence; + unsigned long flags; - if (fence) - dma_fence_get(fence); - old_fence = xchg(>fence, fence); - - dma_fence_put(old_fence); + spin_lock_irqsave(>lock, flags); + dma_fence_replace_proxy(>fence, fence); + spin_unlock_irqrestore(>lock, flags); } +int +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_wait *args = data; + struct dma_fence **fences; + struct dma_fence_array *array; + unsigned long timeout; + unsigned int count; + u32 *handles; + int ret = 0; + u32 i; + + BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles)); + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) + return -EINVAL; + + count = args->count_handles; + if (!count) + return -EINVAL; + + /* Get the handles from userspace */ + fences = kmalloc_array(count, + sizeof(struct dma_fence *), + __GFP_NOWARN | GFP_KERNEL); + if (!fences) + return -ENOMEM; + + handles = (void *)fences + count * (sizeof(*fences) - sizeof(*handles)); + if (copy_from_user(handles, + u64_to_user_ptr(args->handles), + sizeof(u32) * count)) { + ret = -EFAULT; + goto err; + } + + for (i = 0; i < count; i++) { + struct drm_syncobj *s; + + ret = -ENOENT; + s = drm_syncobj_find(file_private, handles[i]); + if (s) { + ret = 0; + spin_lock_irq(>lock); + if (!s->fence) { + if (args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) + s->fence = dma_fence_create_proxy(); + else + ret = -EINVAL; + } + if (s->fence) + fences[i] = dma_fence_get(s->fence); + spin_unlock_irq(>lock); + } + if (ret) { + count = i; + goto err_fences; + } + } + + array = dma_fence_array_create(count, fences, 0, 0, + !(args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)); + if (!array) { + ret = -ENOMEM; + goto err_fences; + } + + timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec); + timeout = dma_fence_wait_timeout(>base, true, timeout); + args->first_signaled = array->first_signaled; + dma_fence_put(>base); + + return timeout < 0 ? timeout : 0; + +err_fences: + for (i = 0; i < count; i++) + dma_fence_put(fences[i]); +err: + kfree(fences); + return ret; +} The key advantage is that this translate the ioctl into a dma-fence-array which already has to deal with the mess, sharing the burden. (But it does require a trivial patch to dma-fence-array to record the first signaled fence.) However, this installs the proxy into syncobj->fence with the result that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour of drm_syncobj is then quite inconsistent, sometimes it will wait for a future fence, sometimes it will report an error. Furthermore, do we want future-fences in the execbuf API? etc. -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
On Wed, Aug 9, 2017 at 2:31 PM, Chris Wilsonwrote: > Quoting Jason Ekstrand (2017-08-09 18:00:54) > > +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj > **syncobjs, > > + uint32_t count, > > + uint32_t flags, > > + signed long timeout, > > + uint32_t *idx) > > +{ > > + struct syncobj_wait_entry *entries; > > + struct dma_fence *fence; > > + signed long ret; > > + uint32_t signaled_count, i; > > + > > + if (timeout == 0) { > > + signaled_count = 0; > > + for (i = 0; i < count; ++i) { > > + ret = drm_syncobj_signaled(syncobjs[i], flags); > > + if (ret < 0) > > + return ret; > > + if (ret == 0) > > + continue; > > + if (signaled_count == 0 && idx) > > + *idx = i; > > + signaled_count++; > > + } > > + > > + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) > > + return signaled_count == count ? 1 : 0; > > + else > > + return signaled_count > 0 ? 1 : 0; > > There's a very annoying laxness in the dma_fence API here, in that > backends are not required to automatically report when a fence is > signaled prior to fence->ops->enable_signaling() being called. > So here if we fail to match signaled_count, we need to fallthough and > try a 0 timeout wait! > That is very annoying! I'll see how bad the fall-through is... > Christian, dma_fence_wait_any_timeout() has this same bug you told me off > for, e.g. commit 698c0f7ff216 ("dma-buf/fence: revert "don't wait when > specified timeout is zero" (v2)")! > -Chris > ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
Quoting Jason Ekstrand (2017-08-09 18:00:54) > +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj > **syncobjs, > + uint32_t count, > + uint32_t flags, > + signed long timeout, > + uint32_t *idx) > +{ > + struct syncobj_wait_entry *entries; > + struct dma_fence *fence; > + signed long ret; > + uint32_t signaled_count, i; > + > + if (timeout == 0) { > + signaled_count = 0; > + for (i = 0; i < count; ++i) { > + ret = drm_syncobj_signaled(syncobjs[i], flags); > + if (ret < 0) > + return ret; > + if (ret == 0) > + continue; > + if (signaled_count == 0 && idx) > + *idx = i; > + signaled_count++; > + } > + > + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) > + return signaled_count == count ? 1 : 0; > + else > + return signaled_count > 0 ? 1 : 0; There's a very annoying laxness in the dma_fence API here, in that backends are not required to automatically report when a fence is signaled prior to fence->ops->enable_signaling() being called. So here if we fail to match signaled_count, we need to fallthough and try a 0 timeout wait! Christian, dma_fence_wait_any_timeout() has this same bug you told me off for, e.g. commit 698c0f7ff216 ("dma-buf/fence: revert "don't wait when specified timeout is zero" (v2)")! -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
On Wed, Aug 9, 2017 at 11:25 AM, Christian Königwrote: > Am 09.08.2017 um 19:57 schrieb Chris Wilson: > >> Quoting Jason Ekstrand (2017-08-09 18:00:54) >> >>> Vulkan VkFence semantics require that the application be able to perform >>> a CPU wait on work which may not yet have been submitted. This is >>> perfectly safe because the CPU wait has a timeout which will get >>> triggered eventually if no work is ever submitted. This behavior is >>> advantageous for multi-threaded workloads because, so long as all of the >>> threads agree on what fences to use up-front, you don't have the extra >>> cross-thread synchronization cost of thread A telling thread B that it >>> has submitted its dependent work and thread B is now free to wait. >>> >>> Within a single process, this can be implemented in the userspace driver >>> by doing exactly the same kind of tracking the app would have to do >>> using posix condition variables or similar. However, in order for this >>> to work cross-process (as is required by VK_KHR_external_fence), we need >>> to handle this in the kernel. >>> >>> This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which >>> instructs the IOCTL to wait for the syncobj to have a non-null fence and >>> then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can >>> easily get the Vulkan behavior. >>> >>> v2: >>> - Fix a bug in the invalid syncobj error path >>> - Unify the wait-all and wait-any cases >>> >>> Signed-off-by: Jason Ekstrand >>> Cc: Dave Airlie >>> --- >>> >>> I realized today (this is what happens when you sleep) that it takes >>> almost >>> no work to make the wait-any path also handle wait-all. By unifying the >>> two, I deleted over a hundred lines of code from the implementation. >>> >>> drivers/gpu/drm/drm_syncobj.c | 243 ++ >>> >>> include/uapi/drm/drm.h| 1 + >>> 2 files changed, 199 insertions(+), 45 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/drm_syncobj.c >>> b/drivers/gpu/drm/drm_syncobj.c >>> index 510dfc2..5e7f654 100644 >>> --- a/drivers/gpu/drm/drm_syncobj.c >>> +++ b/drivers/gpu/drm/drm_syncobj.c >>> @@ -51,6 +51,7 @@ >>> #include >>> #include >>> #include >>> +#include >>> #include "drm_internal.h" >>> #include >>> @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct >>> drm_syncobj *syncobj, >>> >> This is a bit of the puzzle that is missing. >> > ? It's in the previous patch. > list_add_tail(>node, >cb_list); >>> } >>> +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj >>> *syncobj, >>> +struct dma_fence >>> **fence, >>> +struct drm_syncobj_cb >>> *cb, >>> +drm_syncobj_func_t func) >>> +{ >>> + int ret; >>> + >>> + spin_lock(>lock); >>> + if (syncobj->fence) { >>> + *fence = dma_fence_get(syncobj->fence); >>> + ret = 1; >>> + } else { >>> + *fence = NULL; >>> + drm_syncobj_add_callback_locked(syncobj, cb, func); >>> + ret = 0; >>> + } >>> + spin_unlock(>lock); >>> + >>> + return ret; >>> +} >>> + >>> /** >>>* drm_syncobj_add_callback - adds a callback to syncobj::cb_list >>>* @syncobj: Sync object to which to add the callback >>> @@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device >>> *dev, void *data, >>> >handle); >>> } >>> +static int drm_syncobj_signaled(struct drm_syncobj *syncobj, >>> + uint32_t wait_flags) >>> +{ >>> + struct dma_fence *fence; >>> + int ret; >>> + >>> + fence = drm_syncobj_fence_get(syncobj); >>> + if (!fence) { >>> + if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) >>> + return 0; >>> + else >>> + return -EINVAL; >>> + } >>> + >>> + ret = dma_fence_is_signaled(fence) ? 1 : 0; >>> + >>> + dma_fence_put(fence); >>> + >>> + return ret; >>> +} >>> + >>> +struct syncobj_wait_entry { >>> + struct task_struct *task; >>> + struct dma_fence *fence; >>> + struct dma_fence_cb fence_cb; >>> + struct drm_syncobj_cb syncobj_cb; >>> +}; >>> + >>> +static void syncobj_wait_fence_func(struct dma_fence *fence, >>> + struct dma_fence_cb *cb) >>> +{ >>> + struct syncobj_wait_entry *wait = >>> + container_of(cb, struct syncobj_wait_entry, fence_cb); >>> + >>> + wake_up_process(wait->task); >>> +} >>> + >>> +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, >>> + struct drm_syncobj_cb *cb) >>> +{ >>> + struct
Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
Am 09.08.2017 um 19:57 schrieb Chris Wilson: Quoting Jason Ekstrand (2017-08-09 18:00:54) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases Signed-off-by: Jason EkstrandCc: Dave Airlie --- I realized today (this is what happens when you sleep) that it takes almost no work to make the wait-any path also handle wait-all. By unifying the two, I deleted over a hundred lines of code from the implementation. drivers/gpu/drm/drm_syncobj.c | 243 ++ include/uapi/drm/drm.h| 1 + 2 files changed, 199 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 510dfc2..5e7f654 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "drm_internal.h" #include @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj, This is a bit of the puzzle that is missing. list_add_tail(>node, >cb_list); } +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, +struct dma_fence **fence, +struct drm_syncobj_cb *cb, +drm_syncobj_func_t func) +{ + int ret; + + spin_lock(>lock); + if (syncobj->fence) { + *fence = dma_fence_get(syncobj->fence); + ret = 1; + } else { + *fence = NULL; + drm_syncobj_add_callback_locked(syncobj, cb, func); + ret = 0; + } + spin_unlock(>lock); + + return ret; +} + /** * drm_syncobj_add_callback - adds a callback to syncobj::cb_list * @syncobj: Sync object to which to add the callback @@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, >handle); } +static int drm_syncobj_signaled(struct drm_syncobj *syncobj, + uint32_t wait_flags) +{ + struct dma_fence *fence; + int ret; + + fence = drm_syncobj_fence_get(syncobj); + if (!fence) { + if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) + return 0; + else + return -EINVAL; + } + + ret = dma_fence_is_signaled(fence) ? 1 : 0; + + dma_fence_put(fence); + + return ret; +} + +struct syncobj_wait_entry { + struct task_struct *task; + struct dma_fence *fence; + struct dma_fence_cb fence_cb; + struct drm_syncobj_cb syncobj_cb; +}; + +static void syncobj_wait_fence_func(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct syncobj_wait_entry *wait = + container_of(cb, struct syncobj_wait_entry, fence_cb); + + wake_up_process(wait->task); +} + +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb) +{ + struct syncobj_wait_entry *wait = + container_of(cb, struct syncobj_wait_entry, syncobj_cb); + + /* This happens inside the syncobj lock */ + wait->fence = dma_fence_get(syncobj->fence); + wake_up_process(wait->task); +} + +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, + uint32_t count, + uint32_t flags, + signed long timeout, + uint32_t *idx) +{ + struct
Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
Quoting Jason Ekstrand (2017-08-09 18:00:54) > Vulkan VkFence semantics require that the application be able to perform > a CPU wait on work which may not yet have been submitted. This is > perfectly safe because the CPU wait has a timeout which will get > triggered eventually if no work is ever submitted. This behavior is > advantageous for multi-threaded workloads because, so long as all of the > threads agree on what fences to use up-front, you don't have the extra > cross-thread synchronization cost of thread A telling thread B that it > has submitted its dependent work and thread B is now free to wait. > > Within a single process, this can be implemented in the userspace driver > by doing exactly the same kind of tracking the app would have to do > using posix condition variables or similar. However, in order for this > to work cross-process (as is required by VK_KHR_external_fence), we need > to handle this in the kernel. > > This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which > instructs the IOCTL to wait for the syncobj to have a non-null fence and > then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can > easily get the Vulkan behavior. > > v2: > - Fix a bug in the invalid syncobj error path > - Unify the wait-all and wait-any cases > > Signed-off-by: Jason Ekstrand> Cc: Dave Airlie > --- > > I realized today (this is what happens when you sleep) that it takes almost > no work to make the wait-any path also handle wait-all. By unifying the > two, I deleted over a hundred lines of code from the implementation. > > drivers/gpu/drm/drm_syncobj.c | 243 > ++ > include/uapi/drm/drm.h| 1 + > 2 files changed, 199 insertions(+), 45 deletions(-) > > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index 510dfc2..5e7f654 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -51,6 +51,7 @@ > #include > #include > #include > +#include > > #include "drm_internal.h" > #include > @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct > drm_syncobj *syncobj, This is a bit of the puzzle that is missing. > list_add_tail(>node, >cb_list); > } > > +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, > +struct dma_fence **fence, > +struct drm_syncobj_cb *cb, > +drm_syncobj_func_t func) > +{ > + int ret; > + > + spin_lock(>lock); > + if (syncobj->fence) { > + *fence = dma_fence_get(syncobj->fence); > + ret = 1; > + } else { > + *fence = NULL; > + drm_syncobj_add_callback_locked(syncobj, cb, func); > + ret = 0; > + } > + spin_unlock(>lock); > + > + return ret; > +} > + > /** > * drm_syncobj_add_callback - adds a callback to syncobj::cb_list > * @syncobj: Sync object to which to add the callback > @@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, > void *data, > >handle); > } > > +static int drm_syncobj_signaled(struct drm_syncobj *syncobj, > + uint32_t wait_flags) > +{ > + struct dma_fence *fence; > + int ret; > + > + fence = drm_syncobj_fence_get(syncobj); > + if (!fence) { > + if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) > + return 0; > + else > + return -EINVAL; > + } > + > + ret = dma_fence_is_signaled(fence) ? 1 : 0; > + > + dma_fence_put(fence); > + > + return ret; > +} > + > +struct syncobj_wait_entry { > + struct task_struct *task; > + struct dma_fence *fence; > + struct dma_fence_cb fence_cb; > + struct drm_syncobj_cb syncobj_cb; > +}; > + > +static void syncobj_wait_fence_func(struct dma_fence *fence, > + struct dma_fence_cb *cb) > +{ > + struct syncobj_wait_entry *wait = > + container_of(cb, struct syncobj_wait_entry, fence_cb); > + > + wake_up_process(wait->task); > +} > + > +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, > + struct drm_syncobj_cb *cb) > +{ > + struct syncobj_wait_entry *wait = > + container_of(cb, struct syncobj_wait_entry, syncobj_cb); > + > + /* This happens inside the syncobj lock */ > + wait->fence = dma_fence_get(syncobj->fence); > + wake_up_process(wait->task); > +} > + > +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj > **syncobjs, > + uint32_t count, > +