amdgpu: Move old fence waiting before reservation lock is aquired.

Chunming Zhou Wed, 11 Oct 2017 00:42:19 -0700

After ctx mutex is added, pthread_mutext in libdrm can be removed now.


David Zhou


On 2017年10月11日 15:25, Christian König wrote:

Yes, the mutex is mandatory.
As I explained before it doesn't matter what userspace is doing, thekernel IOCTL must always be thread safe.
Otherwise userspace could force the kernel to run into a BUG_ON() orworse.
Additional to that we already use an CS interface upstream whichdoesn't have a pthread_mutex any more.
Regards,
Christian.

Am 11.10.2017 um 05:28 schrieb Liu, Monk:
Hi Andrey & Christian

Do we really need the mutext lock here?
Libdrm_amdgpu already has the pthread_mutext to protect multi-threadracing issues, kernel side should be safe with that
BR Monk

-----Original Message-----
From: Andrey Grodzovsky [mailto:[email protected]]
Sent: Wednesday, October 11, 2017 4:50 AM
To: Koenig, Christian <[email protected]>; Liu, Monk<[email protected]>; [email protected]
Cc: Grodzovsky, Andrey <[email protected]>
Subject: [PATCH v2 2/2] drm/amdgpu: Move old fence waiting beforereservation lock is aquired.
Helps avoiding deadlock during GPU reset.
Added mutex to amdgpu_ctx to preserve order of fences on a ring.

v2:
Put waiting logic in a function in a seperate function in amdgpu_ctx.c

Signed-off-by: Andrey Grodzovsky <[email protected]>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++++
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 ++++++--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 30++++++++++++++++++++++++------
  3 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.hb/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index da48f97..235eca5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -741,6 +741,7 @@ struct amdgpu_ctx {
      bool             preamble_presented;
      enum amd_sched_priority init_priority;
      enum amd_sched_priority override_priority;
+    struct mutex            lock;
  };
    struct amdgpu_ctx_mgr {
@@ -763,9 +764,12 @@ void amdgpu_ctx_priority_override(structamdgpu_ctx *ctx, int amdgpu_ctx_ioctl(struct drm_device *dev, void*data,
               struct drm_file *filp);
  +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned
+ring_id);
+
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); voidamdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
  +
  /*
   * file private structure
   */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.cb/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1a54e53..c36297c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -90,6 +90,8 @@ static int amdgpu_cs_parser_init(structamdgpu_cs_parser *p, void *data)
          goto free_chunk;
      }
  +    mutex_lock(&p->ctx->lock);
+
      /* get chunks */
      chunk_array_user = u64_to_user_ptr(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user, @@ -737,8+739,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser*parser, int error,
        dma_fence_put(parser->fence);
  -    if (parser->ctx)
+    if (parser->ctx) {
+        mutex_unlock(&parser->ctx->lock);
          amdgpu_ctx_put(parser->ctx);
+    }
        if (parser->bo_list)
          amdgpu_bo_list_put(parser->bo_list);
@@ -992,7 +996,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device*adev,
          parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
          return -EINVAL;
  -    return 0;
+    return amdgpu_ctx_wait_prev_fence(parser->ctx,
+parser->job->ring->idx);
  }
static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser*p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.cb/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a78b03f6..4309820 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -67,6 +67,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
      if (!ctx->fences)
          return -ENOMEM;
  +    mutex_init(&ctx->lock);
+
      for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
          ctx->rings[i].sequence = 1;
ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];@@ -126,6 +128,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
                        &ctx->rings[i].entity);
        amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
+
+    mutex_destroy(&ctx->lock);
  }
static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@-296,12 +300,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,struct amdgpu_ring *ring,
        idx = seq & (amdgpu_sched_jobs - 1);
      other = cring->fences[idx];
-    if (other) {
-        signed long r;
-        r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
-        if (r < 0)
-            return r;
-    }
+    if (other)
+        BUG_ON(!dma_fence_is_signaled(other));
        dma_fence_get(fence);
@@ -372,6 +372,24 @@ void amdgpu_ctx_priority_override(structamdgpu_ctx *ctx,
      }
  }
  +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned
+ring_id) {
+    struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
+    unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
+    struct dma_fence *other = cring->fences[idx];
+
+    if (other) {
+        signed long r;
+        r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+        if (r < 0) {
+            DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+            return r;
+        }
+    }
+
+    return 0;
+}
+
  void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)  {
      mutex_init(&mgr->lock);
--
2.7.4
_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired.

Reply via email to