Re: [PATCH 5/5] drm: add syncobj timeline support v2

2018-08-23 Thread Christian König

Am 23.08.2018 um 11:58 schrieb zhoucm1:



On 2018年08月23日 17:15, Christian König wrote:

Am 23.08.2018 um 10:25 schrieb Chunming Zhou:

VK_KHR_timeline_semaphore:
This extension introduces a new type of semaphore that has an 
integer payload

identifying a point in a timeline. Such timeline semaphores support the
following operations:
    * Host query - A host operation that allows querying the payload 
of the

  timeline semaphore.
    * Host wait - A host operation that allows a blocking wait for a
  timeline semaphore to reach a specified value.


I think I have a idea what "Host" means in this context, but it would 
probably be better to describe it.


How about "CPU"?



Yeah, that's probably better.




    * Device wait - A device operation that allows waiting for a
  timeline semaphore to reach a specified value.
    * Device signal - A device operation that allows advancing the
  timeline semaphore to a specified value.

Since it's a timeline, that means the front time point(PT) always is 
signaled before the late PT.

a. signal PT design:
Signal PT fence N depends on PT[N-1] fence and signal opertion 
fence, when PT[N] fence is signaled,

the timeline will increase to value of PT[N].
b. wait PT design:
Wait PT fence is signaled by reaching timeline point value, when 
timeline is increasing, will compare
wait PTs value with new timeline value, if PT value is lower than 
timeline value, then wait PT will be
signaled, otherwise keep in list. semaphore wait operation can wait 
on any point of timeline,
so need a RB tree to order them. And wait PT could ahead of signal 
PT, we need a sumission fence to

perform that.

v2:
1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian)
2. move unexposed denitions to .c file. (Daniel Vetter)
3. split up the change to drm_syncobj_find_fence() in a separate 
patch. (Christian)
4. split up the change to drm_syncobj_replace_fence() in a separate 
patch.
5. drop the submission_fence implementation and instead use 
wait_event() for that. (Christian)

6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter)


I really liked Daniels idea to handle the classic syncobj like a 
timeline synobj with just 1 entry. That can probably simplify the 
implementation quite a bit.
Yeah, after timeline, seems we can remove old syncobj->fence, right? 
will try to unify them in additional patch.


I think we could do something like the following:

1. When drm_syncobj_find_fence is called with point zero then we return 
the last added one.
2. When drm_syncobj_find_fence is called with point zero we add the new 
fence as with N+1 to the list.


This way syncobj should keep it's functionality as is with the timeline 
support added on top.


Regards,
Christian.



Thanks,
David Zhou


Additional to that an amdgpu patch which shows how the interface is 
to be used is probably something Daniel will want to see as well.


Christian.



TODO:
1. CPU query and wait on timeline semaphore.
2. test application (Daniel Vetter)

Signed-off-by: Chunming Zhou 
Cc: Christian Konig 
Cc: Dave Airlie 
Cc: Daniel Rakos 
Cc: Daniel Vetter 
---
  drivers/gpu/drm/drm_syncobj.c | 383 
+++---

  include/drm/drm_syncobj.h |  28 +++
  include/uapi/drm/drm.h    |   1 +
  3 files changed, 389 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c 
b/drivers/gpu/drm/drm_syncobj.c

index 6227df2cc0a4..f738d78edf65 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -56,6 +56,44 @@
  #include "drm_internal.h"
  #include 
  +struct drm_syncobj_stub_fence {
+    struct dma_fence base;
+    spinlock_t lock;
+};
+
+static const char *drm_syncobj_stub_fence_get_name(struct dma_fence 
*fence)

+{
+    return "syncobjstub";
+}
+
+static bool drm_syncobj_stub_fence_enable_signaling(struct 
dma_fence *fence)

+{
+    return !dma_fence_is_signaled(fence);
+}
+
+static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
+    .get_driver_name = drm_syncobj_stub_fence_get_name,
+    .get_timeline_name = drm_syncobj_stub_fence_get_name,
+    .enable_signaling = drm_syncobj_stub_fence_enable_signaling,
+    .release = NULL,
+};
+
+struct drm_syncobj_wait_pt {
+    struct drm_syncobj_stub_fence base;
+    u64    value;
+    struct rb_node   node;
+};
+struct drm_syncobj_signal_pt {
+    struct drm_syncobj_stub_fence base;
+    struct dma_fence *signal_fence;
+    struct dma_fence *pre_pt_base;
+    struct dma_fence_cb signal_cb;
+    struct dma_fence_cb pre_pt_cb;
+    struct drm_syncobj *syncobj;
+    u64    value;
+    struct list_head list;
+};
+
  /**
   * drm_syncobj_find - lookup and reference a sync object.
   * @file_private: drm file private pointer
@@ -137,6 +175,150 @@ void drm_syncobj_remove_callback(struct 
drm_syncobj *syncobj,

  spin_unlock(&syncobj->lock);
  }
  +static void drm_syncobj_timeline_signal_wait_pts(struct 
drm_syncobj *syncobj)

+{
+    struct rb_node *node = NULL

Re: [PATCH 5/5] drm: add syncobj timeline support v2

2018-08-23 Thread zhoucm1



On 2018年08月23日 17:15, Christian König wrote:

Am 23.08.2018 um 10:25 schrieb Chunming Zhou:

VK_KHR_timeline_semaphore:
This extension introduces a new type of semaphore that has an integer 
payload

identifying a point in a timeline. Such timeline semaphores support the
following operations:
    * Host query - A host operation that allows querying the payload 
of the

  timeline semaphore.
    * Host wait - A host operation that allows a blocking wait for a
  timeline semaphore to reach a specified value.


I think I have a idea what "Host" means in this context, but it would 
probably be better to describe it.


How about "CPU"?


    * Device wait - A device operation that allows waiting for a
  timeline semaphore to reach a specified value.
    * Device signal - A device operation that allows advancing the
  timeline semaphore to a specified value.

Since it's a timeline, that means the front time point(PT) always is 
signaled before the late PT.

a. signal PT design:
Signal PT fence N depends on PT[N-1] fence and signal opertion fence, 
when PT[N] fence is signaled,

the timeline will increase to value of PT[N].
b. wait PT design:
Wait PT fence is signaled by reaching timeline point value, when 
timeline is increasing, will compare
wait PTs value with new timeline value, if PT value is lower than 
timeline value, then wait PT will be
signaled, otherwise keep in list. semaphore wait operation can wait 
on any point of timeline,
so need a RB tree to order them. And wait PT could ahead of signal 
PT, we need a sumission fence to

perform that.

v2:
1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian)
2. move unexposed denitions to .c file. (Daniel Vetter)
3. split up the change to drm_syncobj_find_fence() in a separate 
patch. (Christian)
4. split up the change to drm_syncobj_replace_fence() in a separate 
patch.
5. drop the submission_fence implementation and instead use 
wait_event() for that. (Christian)

6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter)


I really liked Daniels idea to handle the classic syncobj like a 
timeline synobj with just 1 entry. That can probably simplify the 
implementation quite a bit.
Yeah, after timeline, seems we can remove old syncobj->fence, right? 
will try to unify them in additional patch.


Thanks,
David Zhou


Additional to that an amdgpu patch which shows how the interface is to 
be used is probably something Daniel will want to see as well.


Christian.



TODO:
1. CPU query and wait on timeline semaphore.
2. test application (Daniel Vetter)

Signed-off-by: Chunming Zhou 
Cc: Christian Konig 
Cc: Dave Airlie 
Cc: Daniel Rakos 
Cc: Daniel Vetter 
---
  drivers/gpu/drm/drm_syncobj.c | 383 
+++---

  include/drm/drm_syncobj.h |  28 +++
  include/uapi/drm/drm.h    |   1 +
  3 files changed, 389 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c 
b/drivers/gpu/drm/drm_syncobj.c

index 6227df2cc0a4..f738d78edf65 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -56,6 +56,44 @@
  #include "drm_internal.h"
  #include 
  +struct drm_syncobj_stub_fence {
+    struct dma_fence base;
+    spinlock_t lock;
+};
+
+static const char *drm_syncobj_stub_fence_get_name(struct dma_fence 
*fence)

+{
+    return "syncobjstub";
+}
+
+static bool drm_syncobj_stub_fence_enable_signaling(struct dma_fence 
*fence)

+{
+    return !dma_fence_is_signaled(fence);
+}
+
+static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
+    .get_driver_name = drm_syncobj_stub_fence_get_name,
+    .get_timeline_name = drm_syncobj_stub_fence_get_name,
+    .enable_signaling = drm_syncobj_stub_fence_enable_signaling,
+    .release = NULL,
+};
+
+struct drm_syncobj_wait_pt {
+    struct drm_syncobj_stub_fence base;
+    u64    value;
+    struct rb_node   node;
+};
+struct drm_syncobj_signal_pt {
+    struct drm_syncobj_stub_fence base;
+    struct dma_fence *signal_fence;
+    struct dma_fence *pre_pt_base;
+    struct dma_fence_cb signal_cb;
+    struct dma_fence_cb pre_pt_cb;
+    struct drm_syncobj *syncobj;
+    u64    value;
+    struct list_head list;
+};
+
  /**
   * drm_syncobj_find - lookup and reference a sync object.
   * @file_private: drm file private pointer
@@ -137,6 +175,150 @@ void drm_syncobj_remove_callback(struct 
drm_syncobj *syncobj,

  spin_unlock(&syncobj->lock);
  }
  +static void drm_syncobj_timeline_signal_wait_pts(struct 
drm_syncobj *syncobj)

+{
+    struct rb_node *node = NULL;
+    struct drm_syncobj_wait_pt *wait_pt = NULL;
+
+    spin_lock(&syncobj->lock);
+    for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
+    node != NULL; ) {
+    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
+    node = rb_next(node);
+    if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
+    dma_fence_signal(&wait_pt->base.base);
+    rb_erase(&wait_pt->node,
+ &synco

Re: [PATCH 5/5] drm: add syncobj timeline support v2

2018-08-23 Thread zhoucm1



On 2018年08月23日 17:08, Daniel Vetter wrote:

On Thu, Aug 23, 2018 at 04:25:42PM +0800, Chunming Zhou wrote:

VK_KHR_timeline_semaphore:
This extension introduces a new type of semaphore that has an integer payload
identifying a point in a timeline. Such timeline semaphores support the
following operations:
* Host query - A host operation that allows querying the payload of the
  timeline semaphore.
* Host wait - A host operation that allows a blocking wait for a
  timeline semaphore to reach a specified value.
* Device wait - A device operation that allows waiting for a
  timeline semaphore to reach a specified value.
* Device signal - A device operation that allows advancing the
  timeline semaphore to a specified value.

Since it's a timeline, that means the front time point(PT) always is signaled 
before the late PT.
a. signal PT design:
Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when 
PT[N] fence is signaled,
the timeline will increase to value of PT[N].
b. wait PT design:
Wait PT fence is signaled by reaching timeline point value, when timeline is 
increasing, will compare
wait PTs value with new timeline value, if PT value is lower than timeline 
value, then wait PT will be
signaled, otherwise keep in list. semaphore wait operation can wait on any 
point of timeline,
so need a RB tree to order them. And wait PT could ahead of signal PT, we need 
a sumission fence to
perform that.

v2:
1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian)
2. move unexposed denitions to .c file. (Daniel Vetter)
3. split up the change to drm_syncobj_find_fence() in a separate patch. 
(Christian)
4. split up the change to drm_syncobj_replace_fence() in a separate patch.
5. drop the submission_fence implementation and instead use wait_event() for 
that. (Christian)
6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter)

Depending upon how it's going to be used, this is the wrong thing to do.


TODO:
1. CPU query and wait on timeline semaphore.
2. test application (Daniel Vetter)

I also had some more suggestions, around aligning the two concepts of
future fences
submission fence is replaced by wait_event, so I don't address your 
future fence suggestion. And welcome to explain future fence status.

and at least trying to merge the timeline and the other
fence (which really is just a special case of a timeline with only 1
slot).

Could you detail that? Do you mean merge syncobj->fence to timeline point?

Thanks,
David Zhou

-Daniel


Signed-off-by: Chunming Zhou 
Cc: Christian Konig 
Cc: Dave Airlie 
Cc: Daniel Rakos 
Cc: Daniel Vetter 
---
  drivers/gpu/drm/drm_syncobj.c | 383 +++---
  include/drm/drm_syncobj.h |  28 +++
  include/uapi/drm/drm.h|   1 +
  3 files changed, 389 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 6227df2cc0a4..f738d78edf65 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -56,6 +56,44 @@
  #include "drm_internal.h"
  #include 
  
+struct drm_syncobj_stub_fence {

+   struct dma_fence base;
+   spinlock_t lock;
+};
+
+static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence)
+{
+return "syncobjstub";
+}
+
+static bool drm_syncobj_stub_fence_enable_signaling(struct dma_fence *fence)
+{
+return !dma_fence_is_signaled(fence);
+}
+
+static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
+   .get_driver_name = drm_syncobj_stub_fence_get_name,
+   .get_timeline_name = drm_syncobj_stub_fence_get_name,
+   .enable_signaling = drm_syncobj_stub_fence_enable_signaling,
+   .release = NULL,
+};
+
+struct drm_syncobj_wait_pt {
+   struct drm_syncobj_stub_fence base;
+   u64value;
+   struct rb_node   node;
+};
+struct drm_syncobj_signal_pt {
+   struct drm_syncobj_stub_fence base;
+   struct dma_fence *signal_fence;
+   struct dma_fence *pre_pt_base;
+   struct dma_fence_cb signal_cb;
+   struct dma_fence_cb pre_pt_cb;
+   struct drm_syncobj *syncobj;
+   u64value;
+   struct list_head list;
+};
+
  /**
   * drm_syncobj_find - lookup and reference a sync object.
   * @file_private: drm file private pointer
@@ -137,6 +175,150 @@ void drm_syncobj_remove_callback(struct drm_syncobj 
*syncobj,
spin_unlock(&syncobj->lock);
  }
  
+static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj *syncobj)

+{
+   struct rb_node *node = NULL;
+   struct drm_syncobj_wait_pt *wait_pt = NULL;
+
+   spin_lock(&syncobj->lock);
+   for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
+   node != NULL; ) {
+   wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
+   node = rb_next(node);
+   if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
+   dma_fence_signal(&wait_pt->b

Re: [PATCH 5/5] drm: add syncobj timeline support v2

2018-08-23 Thread Christian König

Am 23.08.2018 um 10:25 schrieb Chunming Zhou:

VK_KHR_timeline_semaphore:
This extension introduces a new type of semaphore that has an integer payload
identifying a point in a timeline. Such timeline semaphores support the
following operations:
* Host query - A host operation that allows querying the payload of the
  timeline semaphore.
* Host wait - A host operation that allows a blocking wait for a
  timeline semaphore to reach a specified value.


I think I have a idea what "Host" means in this context, but it would 
probably be better to describe it.



* Device wait - A device operation that allows waiting for a
  timeline semaphore to reach a specified value.
* Device signal - A device operation that allows advancing the
  timeline semaphore to a specified value.

Since it's a timeline, that means the front time point(PT) always is signaled 
before the late PT.
a. signal PT design:
Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when 
PT[N] fence is signaled,
the timeline will increase to value of PT[N].
b. wait PT design:
Wait PT fence is signaled by reaching timeline point value, when timeline is 
increasing, will compare
wait PTs value with new timeline value, if PT value is lower than timeline 
value, then wait PT will be
signaled, otherwise keep in list. semaphore wait operation can wait on any 
point of timeline,
so need a RB tree to order them. And wait PT could ahead of signal PT, we need 
a sumission fence to
perform that.

v2:
1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian)
2. move unexposed denitions to .c file. (Daniel Vetter)
3. split up the change to drm_syncobj_find_fence() in a separate patch. 
(Christian)
4. split up the change to drm_syncobj_replace_fence() in a separate patch.
5. drop the submission_fence implementation and instead use wait_event() for 
that. (Christian)
6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter)


I really liked Daniels idea to handle the classic syncobj like a 
timeline synobj with just 1 entry. That can probably simplify the 
implementation quite a bit.


Additional to that an amdgpu patch which shows how the interface is to 
be used is probably something Daniel will want to see as well.


Christian.



TODO:
1. CPU query and wait on timeline semaphore.
2. test application (Daniel Vetter)

Signed-off-by: Chunming Zhou 
Cc: Christian Konig 
Cc: Dave Airlie 
Cc: Daniel Rakos 
Cc: Daniel Vetter 
---
  drivers/gpu/drm/drm_syncobj.c | 383 +++---
  include/drm/drm_syncobj.h |  28 +++
  include/uapi/drm/drm.h|   1 +
  3 files changed, 389 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 6227df2cc0a4..f738d78edf65 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -56,6 +56,44 @@
  #include "drm_internal.h"
  #include 
  
+struct drm_syncobj_stub_fence {

+   struct dma_fence base;
+   spinlock_t lock;
+};
+
+static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence)
+{
+return "syncobjstub";
+}
+
+static bool drm_syncobj_stub_fence_enable_signaling(struct dma_fence *fence)
+{
+return !dma_fence_is_signaled(fence);
+}
+
+static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
+   .get_driver_name = drm_syncobj_stub_fence_get_name,
+   .get_timeline_name = drm_syncobj_stub_fence_get_name,
+   .enable_signaling = drm_syncobj_stub_fence_enable_signaling,
+   .release = NULL,
+};
+
+struct drm_syncobj_wait_pt {
+   struct drm_syncobj_stub_fence base;
+   u64value;
+   struct rb_node   node;
+};
+struct drm_syncobj_signal_pt {
+   struct drm_syncobj_stub_fence base;
+   struct dma_fence *signal_fence;
+   struct dma_fence *pre_pt_base;
+   struct dma_fence_cb signal_cb;
+   struct dma_fence_cb pre_pt_cb;
+   struct drm_syncobj *syncobj;
+   u64value;
+   struct list_head list;
+};
+
  /**
   * drm_syncobj_find - lookup and reference a sync object.
   * @file_private: drm file private pointer
@@ -137,6 +175,150 @@ void drm_syncobj_remove_callback(struct drm_syncobj 
*syncobj,
spin_unlock(&syncobj->lock);
  }
  
+static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj *syncobj)

+{
+   struct rb_node *node = NULL;
+   struct drm_syncobj_wait_pt *wait_pt = NULL;
+
+   spin_lock(&syncobj->lock);
+   for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
+   node != NULL; ) {
+   wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
+   node = rb_next(node);
+   if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
+   dma_fence_signal(&wait_pt->base.base);
+   rb_erase(&wait_pt->node,
+&syncobj->syncobj_timeline.wait_pt_tree);
+   RB_CLEAR_NODE(&

Re: [PATCH 5/5] drm: add syncobj timeline support v2

2018-08-23 Thread Daniel Vetter
On Thu, Aug 23, 2018 at 04:25:42PM +0800, Chunming Zhou wrote:
> VK_KHR_timeline_semaphore:
> This extension introduces a new type of semaphore that has an integer payload
> identifying a point in a timeline. Such timeline semaphores support the
> following operations:
>* Host query - A host operation that allows querying the payload of the
>  timeline semaphore.
>* Host wait - A host operation that allows a blocking wait for a
>  timeline semaphore to reach a specified value.
>* Device wait - A device operation that allows waiting for a
>  timeline semaphore to reach a specified value.
>* Device signal - A device operation that allows advancing the
>  timeline semaphore to a specified value.
> 
> Since it's a timeline, that means the front time point(PT) always is signaled 
> before the late PT.
> a. signal PT design:
> Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when 
> PT[N] fence is signaled,
> the timeline will increase to value of PT[N].
> b. wait PT design:
> Wait PT fence is signaled by reaching timeline point value, when timeline is 
> increasing, will compare
> wait PTs value with new timeline value, if PT value is lower than timeline 
> value, then wait PT will be
> signaled, otherwise keep in list. semaphore wait operation can wait on any 
> point of timeline,
> so need a RB tree to order them. And wait PT could ahead of signal PT, we 
> need a sumission fence to
> perform that.
> 
> v2:
> 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian)
> 2. move unexposed denitions to .c file. (Daniel Vetter)
> 3. split up the change to drm_syncobj_find_fence() in a separate patch. 
> (Christian)
> 4. split up the change to drm_syncobj_replace_fence() in a separate patch.
> 5. drop the submission_fence implementation and instead use wait_event() for 
> that. (Christian)
> 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter)

Depending upon how it's going to be used, this is the wrong thing to do.

> TODO:
> 1. CPU query and wait on timeline semaphore.
> 2. test application (Daniel Vetter)

I also had some more suggestions, around aligning the two concepts of
future fences and at least trying to merge the timeline and the other
fence (which really is just a special case of a timeline with only 1
slot).
-Daniel

> 
> Signed-off-by: Chunming Zhou 
> Cc: Christian Konig 
> Cc: Dave Airlie 
> Cc: Daniel Rakos 
> Cc: Daniel Vetter 
> ---
>  drivers/gpu/drm/drm_syncobj.c | 383 
> +++---
>  include/drm/drm_syncobj.h |  28 +++
>  include/uapi/drm/drm.h|   1 +
>  3 files changed, 389 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 6227df2cc0a4..f738d78edf65 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -56,6 +56,44 @@
>  #include "drm_internal.h"
>  #include 
>  
> +struct drm_syncobj_stub_fence {
> + struct dma_fence base;
> + spinlock_t lock;
> +};
> +
> +static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence)
> +{
> +return "syncobjstub";
> +}
> +
> +static bool drm_syncobj_stub_fence_enable_signaling(struct dma_fence *fence)
> +{
> +return !dma_fence_is_signaled(fence);
> +}
> +
> +static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
> + .get_driver_name = drm_syncobj_stub_fence_get_name,
> + .get_timeline_name = drm_syncobj_stub_fence_get_name,
> + .enable_signaling = drm_syncobj_stub_fence_enable_signaling,
> + .release = NULL,
> +};
> +
> +struct drm_syncobj_wait_pt {
> + struct drm_syncobj_stub_fence base;
> + u64value;
> + struct rb_node   node;
> +};
> +struct drm_syncobj_signal_pt {
> + struct drm_syncobj_stub_fence base;
> + struct dma_fence *signal_fence;
> + struct dma_fence *pre_pt_base;
> + struct dma_fence_cb signal_cb;
> + struct dma_fence_cb pre_pt_cb;
> + struct drm_syncobj *syncobj;
> + u64value;
> + struct list_head list;
> +};
> +
>  /**
>   * drm_syncobj_find - lookup and reference a sync object.
>   * @file_private: drm file private pointer
> @@ -137,6 +175,150 @@ void drm_syncobj_remove_callback(struct drm_syncobj 
> *syncobj,
>   spin_unlock(&syncobj->lock);
>  }
>  
> +static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj *syncobj)
> +{
> + struct rb_node *node = NULL;
> + struct drm_syncobj_wait_pt *wait_pt = NULL;
> +
> + spin_lock(&syncobj->lock);
> + for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
> + node != NULL; ) {
> + wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> + node = rb_next(node);
> + if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
> + dma_fence_signal(&wait_pt->base.base);
> + rb_erase(&wait_pt->node,
> +  &syncobj->syncobj_timelin

[PATCH 5/5] drm: add syncobj timeline support v2

2018-08-23 Thread Chunming Zhou
VK_KHR_timeline_semaphore:
This extension introduces a new type of semaphore that has an integer payload
identifying a point in a timeline. Such timeline semaphores support the
following operations:
   * Host query - A host operation that allows querying the payload of the
 timeline semaphore.
   * Host wait - A host operation that allows a blocking wait for a
 timeline semaphore to reach a specified value.
   * Device wait - A device operation that allows waiting for a
 timeline semaphore to reach a specified value.
   * Device signal - A device operation that allows advancing the
 timeline semaphore to a specified value.

Since it's a timeline, that means the front time point(PT) always is signaled 
before the late PT.
a. signal PT design:
Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when 
PT[N] fence is signaled,
the timeline will increase to value of PT[N].
b. wait PT design:
Wait PT fence is signaled by reaching timeline point value, when timeline is 
increasing, will compare
wait PTs value with new timeline value, if PT value is lower than timeline 
value, then wait PT will be
signaled, otherwise keep in list. semaphore wait operation can wait on any 
point of timeline,
so need a RB tree to order them. And wait PT could ahead of signal PT, we need 
a sumission fence to
perform that.

v2:
1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian)
2. move unexposed denitions to .c file. (Daniel Vetter)
3. split up the change to drm_syncobj_find_fence() in a separate patch. 
(Christian)
4. split up the change to drm_syncobj_replace_fence() in a separate patch.
5. drop the submission_fence implementation and instead use wait_event() for 
that. (Christian)
6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter)

TODO:
1. CPU query and wait on timeline semaphore.
2. test application (Daniel Vetter)

Signed-off-by: Chunming Zhou 
Cc: Christian Konig 
Cc: Dave Airlie 
Cc: Daniel Rakos 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/drm_syncobj.c | 383 +++---
 include/drm/drm_syncobj.h |  28 +++
 include/uapi/drm/drm.h|   1 +
 3 files changed, 389 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 6227df2cc0a4..f738d78edf65 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -56,6 +56,44 @@
 #include "drm_internal.h"
 #include 
 
+struct drm_syncobj_stub_fence {
+   struct dma_fence base;
+   spinlock_t lock;
+};
+
+static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence)
+{
+return "syncobjstub";
+}
+
+static bool drm_syncobj_stub_fence_enable_signaling(struct dma_fence *fence)
+{
+return !dma_fence_is_signaled(fence);
+}
+
+static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
+   .get_driver_name = drm_syncobj_stub_fence_get_name,
+   .get_timeline_name = drm_syncobj_stub_fence_get_name,
+   .enable_signaling = drm_syncobj_stub_fence_enable_signaling,
+   .release = NULL,
+};
+
+struct drm_syncobj_wait_pt {
+   struct drm_syncobj_stub_fence base;
+   u64value;
+   struct rb_node   node;
+};
+struct drm_syncobj_signal_pt {
+   struct drm_syncobj_stub_fence base;
+   struct dma_fence *signal_fence;
+   struct dma_fence *pre_pt_base;
+   struct dma_fence_cb signal_cb;
+   struct dma_fence_cb pre_pt_cb;
+   struct drm_syncobj *syncobj;
+   u64value;
+   struct list_head list;
+};
+
 /**
  * drm_syncobj_find - lookup and reference a sync object.
  * @file_private: drm file private pointer
@@ -137,6 +175,150 @@ void drm_syncobj_remove_callback(struct drm_syncobj 
*syncobj,
spin_unlock(&syncobj->lock);
 }
 
+static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj *syncobj)
+{
+   struct rb_node *node = NULL;
+   struct drm_syncobj_wait_pt *wait_pt = NULL;
+
+   spin_lock(&syncobj->lock);
+   for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
+   node != NULL; ) {
+   wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
+   node = rb_next(node);
+   if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
+   dma_fence_signal(&wait_pt->base.base);
+   rb_erase(&wait_pt->node,
+&syncobj->syncobj_timeline.wait_pt_tree);
+   RB_CLEAR_NODE(&wait_pt->node);
+   /* kfree(wait_pt) is excuted by fence put */
+   dma_fence_put(&wait_pt->base.base);
+   } else {
+   /* the loop is from left to right, the later entry 
value is
+* bigger, so don't need to check any more */
+   break;
+   }
+   }
+   spin_unlock(&syncobj->lock);
+}
+
+
+static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
+{