syncobj wait/signal operation is appending in command submission.
v2: separate to two kinds in/out_deps functions
v3: fix checking for timeline syncobj

Signed-off-by: Chunming Zhou <david1.z...@amd.com>
Cc: Tobias Hector <tobias.hec...@amd.com>
Cc: Jason Ekstrand <ja...@jlekstrand.net>
Cc: Dave Airlie <airl...@redhat.com>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwer...@intel.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 152 +++++++++++++++++++++----
 include/uapi/drm/amdgpu_drm.h          |   8 ++
 3 files changed, 144 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8d0d7f3dd5fb..deec2c796253 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -433,6 +433,12 @@ struct amdgpu_cs_chunk {
        void                    *kdata;
 };
 
+struct amdgpu_cs_post_dep {
+       struct drm_syncobj *syncobj;
+       struct dma_fence_chain *chain;
+       u64 point;
+};
+
 struct amdgpu_cs_parser {
        struct amdgpu_device    *adev;
        struct drm_file         *filp;
@@ -462,8 +468,8 @@ struct amdgpu_cs_parser {
        /* user fence */
        struct amdgpu_bo_list_entry     uf_entry;
 
-       unsigned num_post_dep_syncobjs;
-       struct drm_syncobj **post_dep_syncobjs;
+       unsigned                        num_post_deps;
+       struct amdgpu_cs_post_dep       *post_deps;
 };
 
 static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 52a5e4fdc95b..2f6239b6be6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -215,6 +215,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
                case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
                case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
                case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
+               case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+               case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
                        break;
 
                default:
@@ -804,9 +806,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser 
*parser, int error,
                ttm_eu_backoff_reservation(&parser->ticket,
                                           &parser->validated);
 
-       for (i = 0; i < parser->num_post_dep_syncobjs; i++)
-               drm_syncobj_put(parser->post_dep_syncobjs[i]);
-       kfree(parser->post_dep_syncobjs);
+       for (i = 0; i < parser->num_post_deps; i++) {
+               drm_syncobj_put(parser->post_deps[i].syncobj);
+               kfree(parser->post_deps[i].chain);
+       }
+       kfree(parser->post_deps);
 
        dma_fence_put(parser->fence);
 
@@ -1117,13 +1121,18 @@ static int amdgpu_cs_process_fence_dep(struct 
amdgpu_cs_parser *p,
 }
 
 static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
-                                                uint32_t handle)
+                                                uint32_t handle, u64 point,
+                                                u64 flags)
 {
-       int r;
        struct dma_fence *fence;
-       r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence);
-       if (r)
+       int r;
+
+       r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
+       if (r) {
+               DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
+                         handle, point, r);
                return r;
+       }
 
        r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
        dma_fence_put(fence);
@@ -1134,46 +1143,118 @@ static int 
amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
 static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
                                            struct amdgpu_cs_chunk *chunk)
 {
+       struct drm_amdgpu_cs_chunk_sem *deps;
        unsigned num_deps;
        int i, r;
-       struct drm_amdgpu_cs_chunk_sem *deps;
 
        deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
        num_deps = chunk->length_dw * 4 /
                sizeof(struct drm_amdgpu_cs_chunk_sem);
+       for (i = 0; i < num_deps; ++i) {
+               r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
+                                                         0, 0);
+               if (r)
+                       return r;
+       }
+
+       return 0;
+}
+
 
+static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser 
*p,
+                                                    struct amdgpu_cs_chunk 
*chunk)
+{
+       struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
+       unsigned num_deps;
+       int i, r;
+
+       syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
+       num_deps = chunk->length_dw * 4 /
+               sizeof(struct drm_amdgpu_cs_chunk_syncobj);
        for (i = 0; i < num_deps; ++i) {
-               r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
+               r = amdgpu_syncobj_lookup_and_add_to_sync(p,
+                                                         
syncobj_deps[i].handle,
+                                                         syncobj_deps[i].point,
+                                                         
syncobj_deps[i].flags);
                if (r)
                        return r;
        }
+
        return 0;
 }
 
 static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
                                             struct amdgpu_cs_chunk *chunk)
 {
+       struct drm_amdgpu_cs_chunk_sem *deps;
        unsigned num_deps;
        int i;
-       struct drm_amdgpu_cs_chunk_sem *deps;
+
        deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
        num_deps = chunk->length_dw * 4 /
                sizeof(struct drm_amdgpu_cs_chunk_sem);
 
-       p->post_dep_syncobjs = kmalloc_array(num_deps,
-                                            sizeof(struct drm_syncobj *),
-                                            GFP_KERNEL);
-       p->num_post_dep_syncobjs = 0;
+       p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+                                    GFP_KERNEL);
+       p->num_post_deps = 0;
 
-       if (!p->post_dep_syncobjs)
+       if (!p->post_deps)
                return -ENOMEM;
 
+
        for (i = 0; i < num_deps; ++i) {
-               p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, 
deps[i].handle);
-               if (!p->post_dep_syncobjs[i])
+               p->post_deps[i].syncobj =
+                       drm_syncobj_find(p->filp, deps[i].handle);
+               if (!p->post_deps[i].syncobj)
                        return -EINVAL;
-               p->num_post_dep_syncobjs++;
+               p->post_deps[i].chain = NULL;
+               p->post_deps[i].point = 0;
+               p->num_post_deps++;
        }
+
+       return 0;
+}
+
+
+static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser 
*p,
+                                                     struct amdgpu_cs_chunk
+                                                     *chunk)
+{
+       struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
+       unsigned num_deps;
+       int i;
+
+       syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
+       num_deps = chunk->length_dw * 4 /
+               sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+
+       p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+                                    GFP_KERNEL);
+       p->num_post_deps = 0;
+
+       if (!p->post_deps)
+               return -ENOMEM;
+
+       for (i = 0; i < num_deps; ++i) {
+               struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
+
+               dep->chain = NULL;
+               if (syncobj_deps[i].point) {
+                       dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
+                       if (!dep->chain)
+                               return -ENOMEM;
+               }
+
+               dep->syncobj = drm_syncobj_find(p->filp,
+                                               syncobj_deps[i].handle);
+               if (!dep->syncobj) {
+                       kfree(dep->chain);
+                       return -EINVAL;
+               }
+               dep->point = syncobj_deps[i].point;
+               p->num_post_deps++;
+       }
+
        return 0;
 }
 
@@ -1187,19 +1268,33 @@ static int amdgpu_cs_dependencies(struct amdgpu_device 
*adev,
 
                chunk = &p->chunks[i];
 
-               if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES ||
-                   chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
+               switch (chunk->chunk_id) {
+               case AMDGPU_CHUNK_ID_DEPENDENCIES:
+               case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
                        r = amdgpu_cs_process_fence_dep(p, chunk);
                        if (r)
                                return r;
-               } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
+                       break;
+               case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
                        r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
                        if (r)
                                return r;
-               } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
+                       break;
+               case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
                        r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
                        if (r)
                                return r;
+                       break;
+               case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+                       r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
+                       if (r)
+                               return r;
+                       break;
+               case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+                       r = amdgpu_cs_process_syncobj_timeline_out_dep(p, 
chunk);
+                       if (r)
+                               return r;
+                       break;
                }
        }
 
@@ -1210,8 +1305,17 @@ static void amdgpu_cs_post_dependencies(struct 
amdgpu_cs_parser *p)
 {
        int i;
 
-       for (i = 0; i < p->num_post_dep_syncobjs; ++i)
-               drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
+       for (i = 0; i < p->num_post_deps; ++i) {
+               if (p->post_deps[i].chain && p->post_deps[i].point) {
+                       drm_syncobj_add_point(p->post_deps[i].syncobj,
+                                             p->post_deps[i].chain,
+                                             p->fence, p->post_deps[i].point);
+                       p->post_deps[i].chain = NULL;
+               } else {
+                       drm_syncobj_replace_fence(p->post_deps[i].syncobj,
+                                                 p->fence);
+               }
+       }
 }
 
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4a53f6cfa034..e928760c4c1a 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -525,6 +525,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
 #define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
 #define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT    0x08
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL  0x09
 
 struct drm_amdgpu_cs_chunk {
        __u32           chunk_id;
@@ -605,6 +607,12 @@ struct drm_amdgpu_cs_chunk_sem {
        __u32 handle;
 };
 
+struct drm_amdgpu_cs_chunk_syncobj {
+       __u32 handle;
+       __u32 flags;
+       __u64 point;
+};
+
 #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ     0
 #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD  1
 #define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD        2
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to