Am 17.10.2017 um 08:37 schrieb Pixel Ding:
From: pding <[email protected]>

Register accessing is performed when IRQ is disabled. Never sleep in
this function.

Known issue: dead sleep in many use cases of index/data registers.

v2: wrap polling fence functions. don't trigger IRQ for polling in
case of wrongly fence signal.

Signed-off-by: pding <[email protected]>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h               |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c        |  8 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c         | 53 +++++++++++++++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c           |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h          |  4 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c          | 30 ++++++-------
  8 files changed, 78 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ca212ef..cc06e62 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -885,7 +885,7 @@ struct amdgpu_mec {
  struct amdgpu_kiq {
        u64                     eop_gpu_addr;
        struct amdgpu_bo        *eop_obj;
-       struct mutex            ring_mutex;
+       spinlock_t              ring_lock;
        struct amdgpu_ring      ring;
        struct amdgpu_irq_src   irq;
  };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 9d9965d..6d83573 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -788,7 +788,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device 
*adev, uint16_t pasid)
        struct dma_fence *f;
        struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
- mutex_lock(&adev->gfx.kiq.ring_mutex);
+       spin_lock(&adev->gfx.kiq.ring_lock);
        amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
        amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
        amdgpu_ring_write(ring,
@@ -796,7 +796,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device 
*adev, uint16_t pasid)
                        PACKET3_INVALIDATE_TLBS_PASID(pasid));
        amdgpu_fence_emit(ring, &f);
        amdgpu_ring_commit(ring);
-       mutex_unlock(&adev->gfx.kiq.ring_mutex);
+       spin_unlock(&adev->gfx.kiq.ring_lock);
r = dma_fence_wait(f, false);
        if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index edbae19..c92217f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -973,7 +973,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device 
*adev, uint16_t pasid)
        struct dma_fence *f;
        struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
- mutex_lock(&adev->gfx.kiq.ring_mutex);
+       spin_lock(&adev->gfx.kiq.ring_lock);
        amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
        amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
        amdgpu_ring_write(ring,
@@ -983,7 +983,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device 
*adev, uint16_t pasid)
                        PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(2));
        amdgpu_fence_emit(ring, &f);
        amdgpu_ring_commit(ring);
-       mutex_unlock(&adev->gfx.kiq.ring_mutex);
+       spin_unlock(&adev->gfx.kiq.ring_lock);
r = dma_fence_wait(f, false);
        if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ab8f0d6..1197274 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, 
uint32_t reg,
  {
        uint32_t ret;
- if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
-               BUG_ON(in_interrupt());
+       if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
                return amdgpu_virt_kiq_rreg(adev, reg);
-       }
if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
                ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
@@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t 
reg, uint32_t v,
                adev->last_mm_index = v;
        }
- if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
-               BUG_ON(in_interrupt());
+       if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
                return amdgpu_virt_kiq_wreg(adev, reg, v);
-       }
if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
                writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 688740e..68a5e90 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -169,6 +169,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f)
  }
/**
+ * amdgpu_fence_emit_polling - emit a fence on the requeste ring
+ *
+ * @ring: ring the fence is associated with
+ * @s: resulting sequence number
+ *
+ * Emits a fence command on the requested ring (all asics).
+ * Used For polling fence.
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
+{
+       uint32_t seq;
+
+       if (!s)
+               return -EINVAL;
+
+       seq = ++ring->fence_drv.sync_seq;
+       amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
+                              seq, AMDGPU_FENCE_FLAG_INT);
+
+       *s = seq;
+
+       return 0;
+}
+
+/**
   * amdgpu_fence_schedule_fallback - schedule fallback check
   *
   * @ring: pointer to struct amdgpu_ring
@@ -281,6 +307,33 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
        return r;
  }

Please add some documentation here that timeout is in usec.

+signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
+                                     uint32_t wait_seq,
+                                     signed long timeout)
+{
+       uint32_t seq, last_seq;
+       struct amdgpu_fence_driver *drv = &ring->fence_drv;
+
+       last_seq = atomic_read(&ring->fence_drv.last_seq);
+
+       do {
+               seq = amdgpu_fence_read(ring);
+
+               if (unlikely(seq == last_seq))
+                       break;

That doesn't look correct to me, it will abort the busy wait as soon as we see the last value we have seen.

+               if (seq >= wait_seq && wait_seq >= last_seq)
+                       break;
+               if (seq <= last_seq &&
+                   (wait_seq <= seq || wait_seq >= last_seq))
+                       break;

Why not just "(int32_t)(wait_seq - seq) > 0" ? IIRC that should be sufficient for a wrap around test.

Regards,
Christian.

+               udelay(5);
+               timeout -= 5;
+       } while (timeout > 0);
+
+       atomic_cmpxchg(&drv->last_seq, last_seq, wait_seq);
+
+       return timeout;
+}
  /**
   * amdgpu_fence_count_emitted - get the count of emitted fences
   *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 4f6c68f..e5a9077 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -185,7 +185,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;
        int r = 0;
- mutex_init(&kiq->ring_mutex);
+       spin_lock_init(&kiq->ring_lock);
r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
        if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index af8e544..9de89ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -89,8 +89,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
  void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
  void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
  int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
  void amdgpu_fence_process(struct amdgpu_ring *ring);
  int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
+signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
+                                     uint32_t wait_seq,
+                                     signed long timeout);
  unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index ab05121..177fe10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -22,7 +22,7 @@
   */
#include "amdgpu.h"
-#define MAX_KIQ_REG_WAIT       100000
+#define MAX_KIQ_REG_WAIT       100000000 /* in usecs */
int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
  {
@@ -114,27 +114,24 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
  uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
  {
        signed long r;
-       uint32_t val;
-       struct dma_fence *f;
+       uint32_t val, seq;
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;
        struct amdgpu_ring *ring = &kiq->ring;
BUG_ON(!ring->funcs->emit_rreg); - mutex_lock(&kiq->ring_mutex);
+       spin_lock(&kiq->ring_lock);
        amdgpu_ring_alloc(ring, 32);
        amdgpu_ring_emit_rreg(ring, reg);
-       amdgpu_fence_emit(ring, &f);
+       amdgpu_fence_emit_polling(ring, &seq);
        amdgpu_ring_commit(ring);
-       mutex_unlock(&kiq->ring_mutex);
+       spin_unlock(&kiq->ring_lock);
- r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
-       dma_fence_put(f);
+       r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
        if (r < 1) {
-               DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+               DRM_ERROR("wait for kiq fence error: %ld\n", r);
                return ~0;
        }
-
        val = adev->wb.wb[adev->virt.reg_val_offs];
return val;
@@ -143,23 +140,22 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
  void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t 
v)
  {
        signed long r;
-       struct dma_fence *f;
+       uint32_t seq;
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;
        struct amdgpu_ring *ring = &kiq->ring;
BUG_ON(!ring->funcs->emit_wreg); - mutex_lock(&kiq->ring_mutex);
+       spin_lock(&kiq->ring_lock);
        amdgpu_ring_alloc(ring, 32);
        amdgpu_ring_emit_wreg(ring, reg, v);
-       amdgpu_fence_emit(ring, &f);
+       amdgpu_fence_emit_polling(ring, &seq);
        amdgpu_ring_commit(ring);
-       mutex_unlock(&kiq->ring_mutex);
+       spin_unlock(&kiq->ring_lock);
- r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
+       r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
        if (r < 1)
-               DRM_ERROR("wait for kiq fence error: %ld.\n", r);
-       dma_fence_put(f);
+               DRM_ERROR("wait for kiq fence error: %ld\n", r);
  }
/**


_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to