RE: [PATCH] drm/amdgpu: protect ring overrun
Hi Christian Thanks. I will remove the initialization of r. Best Regards Yintian Tao -Original Message- From: Christian König Sent: 2020年4月23日 20:22 To: Tao, Yintian ; Koenig, Christian ; Liu, Monk ; Liu, Shaoyun Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: protect ring overrun Am 23.04.20 um 11:06 schrieb Yintian Tao: > Wait for the oldest sequence on the ring to be signaled in order to > make sure there will be no command overrun. > > v2: fix coding stype and remove abs operation One nit pick below, with that fixed the patch is Reviewed-by: Christian König > > Signed-off-by: Yintian Tao > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 10 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 +++- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 1 - > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 - > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 +++--- > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 8 +++- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++- > 9 files changed, 61 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > index 7531527067df..397bd5fa77cb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > @@ -192,14 +192,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct > dma_fence **f, >* Used For polling fence. >* Returns 0 on success, -ENOMEM on failure. >*/ > -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) > +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, > + uint32_t timeout) > { > uint32_t seq; > + signed long r = 0; Please drop the initialization of r here. That is usually seen as rather bad style because it prevents the compiler from raising an warning when this really isn't initialized. Regards, Christian. > > if (!s) > return -EINVAL; > > seq = ++ring->fence_drv.sync_seq; > + r = amdgpu_fence_wait_polling(ring, > + seq - ring->fence_drv.num_fences_mask, > + timeout); > + if (r < 1) > + return -ETIMEDOUT; > + > amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, > seq, 0); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index a721b0e0ff69..0103acc57474 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -675,13 +675,15 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device > *adev, uint32_t reg) > > spin_lock_irqsave(&kiq->ring_lock, flags); > if (amdgpu_device_wb_get(adev, ®_val_offs)) { > - spin_unlock_irqrestore(&kiq->ring_lock, flags); > pr_err("critical bug! too many kiq readers\n"); > - goto failed_kiq_read; > + goto failed_unlock; > } > amdgpu_ring_alloc(ring, 32); > amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); > - amdgpu_fence_emit_polling(ring, &seq); > + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); > + if (r) > + goto failed_undo; > + > amdgpu_ring_commit(ring); > spin_unlock_irqrestore(&kiq->ring_lock, flags); > > @@ -712,7 +714,13 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, > uint32_t reg) > amdgpu_device_wb_free(adev, reg_val_offs); > return value; > > +failed_undo: > + amdgpu_ring_undo(ring); > +failed_unlock: > + spin_unlock_irqrestore(&kiq->ring_lock, flags); > failed_kiq_read: > + if (reg_val_offs) > + amdgpu_device_wb_free(adev, reg_val_offs); > pr_err("failed to read reg:%x\n", reg); > return ~0; > } > @@ -730,7 +738,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, > uint32_t reg, uint32_t v) > spin_lock_irqsave(&kiq->ring_lock, flags); > amdgpu_ring_alloc(ring, 32); > amdgpu_ring_emit_wreg(ring, reg, v); > - amdgpu_fence_emit_polling(ring, &seq); > + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); > + if (r) > + goto failed_undo; > + > amdgpu_ring_commit(ring); > spin_unlock_irqrestore(&kiq->ring_lock, flags); > > @@ -759,6 +770,9 @@ void amdgpu_kiq_wreg(struct
Re: [PATCH] drm/amdgpu: protect ring overrun
Am 23.04.20 um 11:06 schrieb Yintian Tao: Wait for the oldest sequence on the ring to be signaled in order to make sure there will be no command overrun. v2: fix coding stype and remove abs operation One nit pick below, with that fixed the patch is Reviewed-by: Christian König Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 +++- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 1 - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 +++--- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 8 +++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++- 9 files changed, 61 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7531527067df..397bd5fa77cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -192,14 +192,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, * Used For polling fence. * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, + uint32_t timeout) { uint32_t seq; + signed long r = 0; Please drop the initialization of r here. That is usually seen as rather bad style because it prevents the compiler from raising an warning when this really isn't initialized. Regards, Christian. if (!s) return -EINVAL; seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + return -ETIMEDOUT; + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..0103acc57474 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -675,13 +675,15 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_device_wb_get(adev, ®_val_offs)) { - spin_unlock_irqrestore(&kiq->ring_lock, flags); pr_err("critical bug! too many kiq readers\n"); - goto failed_kiq_read; + goto failed_unlock; } amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -712,7 +714,13 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) amdgpu_device_wb_free(adev, reg_val_offs); return value; +failed_undo: + amdgpu_ring_undo(ring); +failed_unlock: + spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_read: + if (reg_val_offs) + amdgpu_device_wb_free(adev, reg_val_offs); pr_err("failed to read reg:%x\n", reg); return ~0; } @@ -730,7 +738,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -759,6 +770,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) return; +failed_undo: + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_write: pr_err("failed to write reg:%x\n", reg); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 137d3d2b46e8..be218754629a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -118,7 +118,8 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, unsigned flags); -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); +int amdgpu_fence_emit_polling(struct
[PATCH] drm/amdgpu: protect ring overrun
Wait for the oldest sequence on the ring to be signaled in order to make sure there will be no command overrun. v2: fix coding stype and remove abs operation Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 +++- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 1 - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 +++--- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 8 +++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++- 9 files changed, 61 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7531527067df..397bd5fa77cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -192,14 +192,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, * Used For polling fence. * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, + uint32_t timeout) { uint32_t seq; + signed long r = 0; if (!s) return -EINVAL; seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + return -ETIMEDOUT; + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..0103acc57474 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -675,13 +675,15 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_device_wb_get(adev, ®_val_offs)) { - spin_unlock_irqrestore(&kiq->ring_lock, flags); pr_err("critical bug! too many kiq readers\n"); - goto failed_kiq_read; + goto failed_unlock; } amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -712,7 +714,13 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) amdgpu_device_wb_free(adev, reg_val_offs); return value; +failed_undo: + amdgpu_ring_undo(ring); +failed_unlock: + spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_read: + if (reg_val_offs) + amdgpu_device_wb_free(adev, reg_val_offs); pr_err("failed to read reg:%x\n", reg); return ~0; } @@ -730,7 +738,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -759,6 +770,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) return; +failed_undo: + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_write: pr_err("failed to write reg:%x\n", reg); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 137d3d2b46e8..be218754629a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -118,7 +118,8 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, unsigned flags); -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, + uint32_t timeout); bool amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdg
Re: [PATCH] drm/amdgpu: protect ring overrun
Am 23.04.20 um 06:22 schrieb Yintian Tao: Wait for the oldest sequence on the ring to be signaled in order to make sure there will be no command overrun. One technical problem and a few style suggestions below. Apart from that looks good to me. Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 7 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 17 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 +++- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 9 - drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 8 +++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++- 6 files changed, 51 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7531527067df..5462ea83d8b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -200,6 +200,13 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) return -EINVAL; seq = ++ring->fence_drv.sync_seq; + if ((abs(seq - ring->fence_drv.num_fences_mask) > + ring->fence_drv.num_fences_mask) && That is a rather bad idea and won't work. The sequence is a 32bit value and supposed to wrap around. Just dropping the extra check should do it should work. + (amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + MAX_KIQ_REG_WAIT) < 1)) Maybe we should make the timeout a parameter here. And it is usually better to use the style like this: r = amdgpu_fence if (r < 1) . +return -ETIME; We usually use -ETIMEDOUT because this is not really bound to a specific timer. + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..7087333681f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -681,7 +681,14 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) } amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq); + if (r) { + amdgpu_ring_undo(ring); + amdgpu_device_wb_free(adev, reg_val_offs); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_read; + } + If we already have goto style error handling we should probably just add a new label to handle it. Same for the other cases below where you already have a goto. Regards, Christian. amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -730,7 +737,13 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_write; + } + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 8c10084f44ef..12d181ac7e78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -60,7 +60,13 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, ref, mask); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq; + } + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5b1549f167b0..650b7a67d3bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4068,7 +4068,14 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + reg_val_offs * 4)); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq); + if (r) { + a
[PATCH] drm/amdgpu: protect ring overrun
Wait for the oldest sequence on the ring to be signaled in order to make sure there will be no command overrun. Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 7 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 17 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 +++- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 9 - drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 8 +++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++- 6 files changed, 51 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7531527067df..5462ea83d8b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -200,6 +200,13 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) return -EINVAL; seq = ++ring->fence_drv.sync_seq; + if ((abs(seq - ring->fence_drv.num_fences_mask) > + ring->fence_drv.num_fences_mask) && + (amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + MAX_KIQ_REG_WAIT) < 1)) +return -ETIME; + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..7087333681f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -681,7 +681,14 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) } amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq); + if (r) { + amdgpu_ring_undo(ring); + amdgpu_device_wb_free(adev, reg_val_offs); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_read; + } + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -730,7 +737,13 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_write; + } + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 8c10084f44ef..12d181ac7e78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -60,7 +60,13 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, ref, mask); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq; + } + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5b1549f167b0..650b7a67d3bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4068,7 +4068,14 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + reg_val_offs * 4)); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq); + if (r) { + amdgpu_ring_undo(ring); + amdgpu_device_wb_free(adev, reg_val_offs); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_read; + } + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 30b75d79efdb..71430f2a2374 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -427,7 +427,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); kiq->pmf->kiq_invalidate_tlbs(ring,