On 05/09/2017 04:14 PM, Chunming Zhou wrote:
The problem is that executing the jobs in the right order doesn't give you the 
right result
because consecutive jobs executed on the same engine are pipelined.
In other words job B does it buffer read before job A has written it's result.

Change-Id: I9065abf5bafbda7a92702ca11477315d25c9a347
Signed-off-by: Chunming Zhou <david1.z...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c        |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  4 ++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  2 +-
  drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 17 +++++++++++++++++
  drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  2 ++
  6 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index dd2bd9a..787acd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1169,6 +1169,7 @@ struct amdgpu_job {
        void                    *owner;
        uint64_t                fence_ctx; /* the fence_context this job uses */
        bool                    vm_needs_flush;
+       bool                    need_pipeline_sync;
        unsigned                vm_id;
        uint64_t                vm_pd_addr;
        uint32_t                gds_base, gds_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 06202e2..2c6624d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -167,6 +167,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
                return r;
        }

+       if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync)
+               amdgpu_ring_emit_pipeline_sync(ring);
        if (vm) {
                amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go too 
fast than DE */

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 690ef3d..cfa97ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -57,6 +57,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned 
num_ibs,
        (*job)->vm = vm;
        (*job)->ibs = (void *)&(*job)[1];
        (*job)->num_ibs = num_ibs;
+       (*job)->need_pipeline_sync = false;

        amdgpu_sync_create(&(*job)->sync);

@@ -152,6 +153,9 @@ static struct fence *amdgpu_job_dependency(struct 
amd_sched_job *sched_job)
                fence = amdgpu_sync_get_fence(&job->sync);
        }

+       if (amd_sched_dependency_optimized(fence, sched_job->s_entity))
+               job->need_pipeline_sync = true;
+
        return fence;
  }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b35b853..b4f83fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -738,7 +738,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job)
        if (ring->funcs->init_cond_exec)
                patch_offset = amdgpu_ring_init_cond_exec(ring);

-       if (ring->funcs->emit_pipeline_sync)
+       if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync)

Perhaps we could emit pipeline sync regardless of the flag need_pipeline_sync.
Could you explain the reason about this change?

Jerry

                amdgpu_ring_emit_pipeline_sync(ring);

        if (ring->funcs->emit_vm_flush && vm_flush_needed) {
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 3c258c3..fec870a 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -235,6 +235,23 @@ static void amd_sched_entity_clear_dep(struct fence *f, 
struct fence_cb *cb)
        fence_put(f);
  }

+bool amd_sched_dependency_optimized(struct fence* fence,
+                                   struct amd_sched_entity *entity)
+{
+       struct amd_gpu_scheduler *sched = entity->sched;
+       struct amd_sched_fence *s_fence;
+
+       if (!fence || fence_is_signaled(fence))
+               return false;
+       if (fence->context == entity->fence_context)
+               return true;
+       s_fence = to_amd_sched_fence(fence);
+       if (s_fence && s_fence->sched == sched)
+               return true;
+
+       return false;
+}
+
  static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity 
*entity)
  {
        struct amd_gpu_scheduler *sched = entity->sched;
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 2531b41..bfda90f 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -162,4 +162,6 @@ int amd_sched_job_init(struct amd_sched_job *job,
                       void *owner);
  void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
  void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
+bool amd_sched_dependency_optimized(struct fence* fence,
+                                   struct amd_sched_entity *entity);
  #endif

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to