Am 17.08.19 um 16:47 schrieb Chris Wilson:
> The timestamp and the cb_list are mutually exclusive, the cb_list can
> only be added to prior to being signaled (and once signaled we drain),
> while the timestamp is only valid upon being signaled. Both the
> timestamp and the cb_list are only valid while the fence is alive, and
> as soon as no references are held can be replaced by the rcu_head.
>
> By reusing the union for the timestamp, we squeeze the base dma_fence
> struct to 64 bytes on x86-64.
>
> Suggested-by: Christian König <christian.koe...@amd.com>
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Christian König <christian.koe...@amd.com>
> ---
>   drivers/dma-buf/dma-fence.c                 | 16 +++++++++-------
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 13 +++++++------
>   drivers/gpu/drm/vmwgfx/vmwgfx_fence.c       |  3 +++
>   include/linux/dma-fence.h                   | 17 +++++++++++++----
>   4 files changed, 32 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 89d96e3e6df6..2c21115b1a37 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -129,6 +129,7 @@ EXPORT_SYMBOL(dma_fence_context_alloc);
>   int dma_fence_signal_locked(struct dma_fence *fence)
>   {
>       struct dma_fence_cb *cur, *tmp;
> +     struct list_head cb_list;
>   
>       lockdep_assert_held(fence->lock);
>   
> @@ -136,16 +137,16 @@ int dma_fence_signal_locked(struct dma_fence *fence)
>                                     &fence->flags)))
>               return -EINVAL;
>   
> +     /* Stash the cb_list before replacing it with the timestamp */
> +     list_replace(&fence->cb_list, &cb_list);

Stashing the timestamp instead is probably less bytes to modify.

> +
>       fence->timestamp = ktime_get();
>       set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
>       trace_dma_fence_signaled(fence);
>   
> -     if (!list_empty(&fence->cb_list)) {
> -             list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
> -                     INIT_LIST_HEAD(&cur->node);
> -                     cur->func(fence, cur);
> -             }
> -             INIT_LIST_HEAD(&fence->cb_list);
> +     list_for_each_entry_safe(cur, tmp, &cb_list, node) {
> +             INIT_LIST_HEAD(&cur->node);
> +             cur->func(fence, cur);
>       }
>   
>       return 0;
> @@ -234,7 +235,8 @@ void dma_fence_release(struct kref *kref)
>   
>       trace_dma_fence_destroy(fence);
>   
> -     if (WARN(!list_empty(&fence->cb_list),
> +     if (WARN(!list_empty(&fence->cb_list) &&
> +              !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags),
>                "Fence %s:%s:%llx:%llx released with pending signals!\n",
>                fence->ops->get_driver_name(fence),
>                fence->ops->get_timeline_name(fence),
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c 
> b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 2bc9c460e78d..09c68dda2098 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -114,18 +114,18 @@ __dma_fence_signal__timestamp(struct dma_fence *fence, 
> ktime_t timestamp)
>   }
>   
>   static void
> -__dma_fence_signal__notify(struct dma_fence *fence)
> +__dma_fence_signal__notify(struct dma_fence *fence,
> +                        const struct list_head *list)
>   {
>       struct dma_fence_cb *cur, *tmp;
>   
>       lockdep_assert_held(fence->lock);
>       lockdep_assert_irqs_disabled();
>   
> -     list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
> +     list_for_each_entry_safe(cur, tmp, list, node) {
>               INIT_LIST_HEAD(&cur->node);
>               cur->func(fence, cur);
>       }
> -     INIT_LIST_HEAD(&fence->cb_list);
>   }
>   
>   void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
> @@ -187,11 +187,12 @@ void intel_engine_breadcrumbs_irq(struct 
> intel_engine_cs *engine)
>       list_for_each_safe(pos, next, &signal) {
>               struct i915_request *rq =
>                       list_entry(pos, typeof(*rq), signal_link);
> -
> -             __dma_fence_signal__timestamp(&rq->fence, timestamp);
> +             struct list_head cb_list;
>   
>               spin_lock(&rq->lock);
> -             __dma_fence_signal__notify(&rq->fence);
> +             list_replace(&rq->fence.cb_list, &cb_list);
> +             __dma_fence_signal__timestamp(&rq->fence, timestamp);
> +             __dma_fence_signal__notify(&rq->fence, &cb_list);
>               spin_unlock(&rq->lock);
>   
>               i915_request_put(rq);
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c 
> b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> index 434dfadb0e52..178a6cd1a06f 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> @@ -185,6 +185,9 @@ static long vmw_fence_wait(struct dma_fence *f, bool 
> intr, signed long timeout)
>   
>       spin_lock(f->lock);
>   
> +     if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &f->flags))
> +             goto out;
> +
>       if (intr && signal_pending(current)) {
>               ret = -ERESTARTSYS;
>               goto out;
> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> index 2ce4d877d33e..6c36502a0562 100644
> --- a/include/linux/dma-fence.h
> +++ b/include/linux/dma-fence.h
> @@ -65,17 +65,26 @@ struct dma_fence_cb;
>   struct dma_fence {
>       spinlock_t *lock;
>       const struct dma_fence_ops *ops;
> -     /* We clear the callback list on kref_put so that by the time we
> -      * release the fence it is unused. No one should be adding to the 
> cb_list
> -      * that they don't themselves hold a reference for.
> +     /*
> +      * We clear the callback list on kref_put so that by the time we
> +      * release the fence it is unused. No one should be adding to the
> +      * cb_list that they don't themselves hold a reference for.
> +      *
> +      * The lifetime of the timestamp is similarly tied to both the
> +      * rcu freelist and the cb_list. The timestamp is only set upon
> +      * signaling while simultaneously notifying the cb_list. Ergo, we
> +      * only use either the cb_list of timestamp. Upon destruction,
> +      * neither are accessible, and so we can use the rcu. This means
> +      * that the cb_list is *only* valid until the signal bit is set,
> +      * and to read either you *must* hold a reference to the fence.
>        */
>       union {
>               struct rcu_head rcu;
>               struct list_head cb_list;
> +             ktime_t timestamp;

Maybe sort this cb_list, timestamp, rcu to better note in which order it 
is used in the lifetime of a fence.

Apart from that looks good to me,
Christian.

>       };
>       u64 context;
>       u64 seqno;
> -     ktime_t timestamp;
>       unsigned long flags;
>       struct kref refcount;
>       int error;

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to