Re: [PATCH v2] dma-fence: Store the timestamp in the same union as the cb_list

2019-08-17 Thread Koenig, Christian
Am 17.08.19 um 17:30 schrieb Chris Wilson:
> The timestamp and the cb_list are mutually exclusive, the cb_list can
> only be added to prior to being signaled (and once signaled we drain),
> while the timestamp is only valid upon being signaled. Both the
> timestamp and the cb_list are only valid while the fence is alive, and
> as soon as no references are held can be replaced by the rcu_head.
>
> By reusing the union for the timestamp, we squeeze the base dma_fence
> struct to 64 bytes on x86-64.
>
> v2: Sort the union chronologically
>
> Suggested-by: Christian König 
> Signed-off-by: Chris Wilson 
> Cc: Christian König 

I can't judge about the correctness of the vmw and Intel stuff, so only 
Acked-by: Christian König .

> ---
>   drivers/dma-buf/dma-fence.c | 16 +++---
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 13 ++--
>   drivers/gpu/drm/vmwgfx/vmwgfx_fence.c   |  3 +++
>   include/linux/dma-fence.h   | 23 -
>   4 files changed, 37 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 8a6d0250285d..2c136aee3e79 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -129,6 +129,7 @@ EXPORT_SYMBOL(dma_fence_context_alloc);
>   int dma_fence_signal_locked(struct dma_fence *fence)
>   {
>   struct dma_fence_cb *cur, *tmp;
> + struct list_head cb_list;
>   
>   lockdep_assert_held(fence->lock);
>   
> @@ -136,16 +137,16 @@ int dma_fence_signal_locked(struct dma_fence *fence)
> >flags)))
>   return -EINVAL;
>   
> + /* Stash the cb_list before replacing it with the timestamp */
> + list_replace(>cb_list, _list);
> +
>   fence->timestamp = ktime_get();
>   set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, >flags);
>   trace_dma_fence_signaled(fence);
>   
> - if (!list_empty(>cb_list)) {
> - list_for_each_entry_safe(cur, tmp, >cb_list, node) {
> - INIT_LIST_HEAD(>node);
> - cur->func(fence, cur);
> - }
> - INIT_LIST_HEAD(>cb_list);
> + list_for_each_entry_safe(cur, tmp, _list, node) {
> + INIT_LIST_HEAD(>node);
> + cur->func(fence, cur);
>   }
>   
>   return 0;
> @@ -231,7 +232,8 @@ void dma_fence_release(struct kref *kref)
>   
>   trace_dma_fence_destroy(fence);
>   
> - if (WARN(!list_empty(>cb_list),
> + if (WARN(!list_empty(>cb_list) &&
> +  !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, >flags),
>"Fence %s:%s:%llx:%llx released with pending signals!\n",
>fence->ops->get_driver_name(fence),
>fence->ops->get_timeline_name(fence),
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c 
> b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 2bc9c460e78d..09c68dda2098 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -114,18 +114,18 @@ __dma_fence_signal__timestamp(struct dma_fence *fence, 
> ktime_t timestamp)
>   }
>   
>   static void
> -__dma_fence_signal__notify(struct dma_fence *fence)
> +__dma_fence_signal__notify(struct dma_fence *fence,
> +const struct list_head *list)
>   {
>   struct dma_fence_cb *cur, *tmp;
>   
>   lockdep_assert_held(fence->lock);
>   lockdep_assert_irqs_disabled();
>   
> - list_for_each_entry_safe(cur, tmp, >cb_list, node) {
> + list_for_each_entry_safe(cur, tmp, list, node) {
>   INIT_LIST_HEAD(>node);
>   cur->func(fence, cur);
>   }
> - INIT_LIST_HEAD(>cb_list);
>   }
>   
>   void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
> @@ -187,11 +187,12 @@ void intel_engine_breadcrumbs_irq(struct 
> intel_engine_cs *engine)
>   list_for_each_safe(pos, next, ) {
>   struct i915_request *rq =
>   list_entry(pos, typeof(*rq), signal_link);
> -
> - __dma_fence_signal__timestamp(>fence, timestamp);
> + struct list_head cb_list;
>   
>   spin_lock(>lock);
> - __dma_fence_signal__notify(>fence);
> + list_replace(>fence.cb_list, _list);
> + __dma_fence_signal__timestamp(>fence, timestamp);
> + __dma_fence_signal__notify(>fence, _list);
>   spin_unlock(>lock);
>   
>   i915_request_put(rq);
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c 
> b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> index 434dfadb0e52..178a6cd1a06f 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> @@ -185,6 +185,9 @@ static long vmw_fence_wait(struct dma_fence *f, bool 
> intr, signed long timeout)
>   
>   spin_lock(f->lock);
>   
> + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, >flags))
> + goto out;
> +
>   if (intr && 

[PATCH v2] dma-fence: Store the timestamp in the same union as the cb_list

2019-08-17 Thread Chris Wilson
The timestamp and the cb_list are mutually exclusive, the cb_list can
only be added to prior to being signaled (and once signaled we drain),
while the timestamp is only valid upon being signaled. Both the
timestamp and the cb_list are only valid while the fence is alive, and
as soon as no references are held can be replaced by the rcu_head.

By reusing the union for the timestamp, we squeeze the base dma_fence
struct to 64 bytes on x86-64.

v2: Sort the union chronologically

Suggested-by: Christian König 
Signed-off-by: Chris Wilson 
Cc: Christian König 
---
 drivers/dma-buf/dma-fence.c | 16 +++---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 13 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c   |  3 +++
 include/linux/dma-fence.h   | 23 -
 4 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 8a6d0250285d..2c136aee3e79 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -129,6 +129,7 @@ EXPORT_SYMBOL(dma_fence_context_alloc);
 int dma_fence_signal_locked(struct dma_fence *fence)
 {
struct dma_fence_cb *cur, *tmp;
+   struct list_head cb_list;
 
lockdep_assert_held(fence->lock);
 
@@ -136,16 +137,16 @@ int dma_fence_signal_locked(struct dma_fence *fence)
  >flags)))
return -EINVAL;
 
+   /* Stash the cb_list before replacing it with the timestamp */
+   list_replace(>cb_list, _list);
+
fence->timestamp = ktime_get();
set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, >flags);
trace_dma_fence_signaled(fence);
 
-   if (!list_empty(>cb_list)) {
-   list_for_each_entry_safe(cur, tmp, >cb_list, node) {
-   INIT_LIST_HEAD(>node);
-   cur->func(fence, cur);
-   }
-   INIT_LIST_HEAD(>cb_list);
+   list_for_each_entry_safe(cur, tmp, _list, node) {
+   INIT_LIST_HEAD(>node);
+   cur->func(fence, cur);
}
 
return 0;
@@ -231,7 +232,8 @@ void dma_fence_release(struct kref *kref)
 
trace_dma_fence_destroy(fence);
 
-   if (WARN(!list_empty(>cb_list),
+   if (WARN(!list_empty(>cb_list) &&
+!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, >flags),
 "Fence %s:%s:%llx:%llx released with pending signals!\n",
 fence->ops->get_driver_name(fence),
 fence->ops->get_timeline_name(fence),
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 2bc9c460e78d..09c68dda2098 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -114,18 +114,18 @@ __dma_fence_signal__timestamp(struct dma_fence *fence, 
ktime_t timestamp)
 }
 
 static void
-__dma_fence_signal__notify(struct dma_fence *fence)
+__dma_fence_signal__notify(struct dma_fence *fence,
+  const struct list_head *list)
 {
struct dma_fence_cb *cur, *tmp;
 
lockdep_assert_held(fence->lock);
lockdep_assert_irqs_disabled();
 
-   list_for_each_entry_safe(cur, tmp, >cb_list, node) {
+   list_for_each_entry_safe(cur, tmp, list, node) {
INIT_LIST_HEAD(>node);
cur->func(fence, cur);
}
-   INIT_LIST_HEAD(>cb_list);
 }
 
 void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
@@ -187,11 +187,12 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs 
*engine)
list_for_each_safe(pos, next, ) {
struct i915_request *rq =
list_entry(pos, typeof(*rq), signal_link);
-
-   __dma_fence_signal__timestamp(>fence, timestamp);
+   struct list_head cb_list;
 
spin_lock(>lock);
-   __dma_fence_signal__notify(>fence);
+   list_replace(>fence.cb_list, _list);
+   __dma_fence_signal__timestamp(>fence, timestamp);
+   __dma_fence_signal__notify(>fence, _list);
spin_unlock(>lock);
 
i915_request_put(rq);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 434dfadb0e52..178a6cd1a06f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -185,6 +185,9 @@ static long vmw_fence_wait(struct dma_fence *f, bool intr, 
signed long timeout)
 
spin_lock(f->lock);
 
+   if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, >flags))
+   goto out;
+
if (intr && signal_pending(current)) {
ret = -ERESTARTSYS;
goto out;
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 2ce4d877d33e..8b4a5aaa6848 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -65,17 +65,30 @@ struct dma_fence_cb;
 struct dma_fence {