RE: [PATCH] drm/amdgpu: protect ring overrun

2020-04-23 Thread Tao, Yintian
Hi  Christian

Thanks. I will remove the initialization of r.

Best Regards
Yintian Tao
-Original Message-
From: Christian König  
Sent: 2020年4月23日 20:22
To: Tao, Yintian ; Koenig, Christian 
; Liu, Monk ; Liu, Shaoyun 

Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: protect ring overrun

Am 23.04.20 um 11:06 schrieb Yintian Tao:
> Wait for the oldest sequence on the ring to be signaled in order to 
> make sure there will be no command overrun.
>
> v2: fix coding stype and remove abs operation

One nit pick below, with that fixed the patch is Reviewed-by: Christian König 


>
> Signed-off-by: Yintian Tao 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 10 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 22 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  3 ++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  8 +++-
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|  1 -
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |  1 -
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 +++---
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  8 +++-
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  8 +++-
>   9 files changed, 61 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 7531527067df..397bd5fa77cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -192,14 +192,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
> dma_fence **f,
>* Used For polling fence.
>* Returns 0 on success, -ENOMEM on failure.
>*/
> -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
> +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
> +   uint32_t timeout)
>   {
>   uint32_t seq;
> + signed long r = 0;

Please drop the initialization of r here. That is usually seen as rather bad 
style because it prevents the compiler from raising an warning when this really 
isn't initialized.

Regards,
Christian.

>   
>   if (!s)
>   return -EINVAL;
>   
>   seq = ++ring->fence_drv.sync_seq;
> + r = amdgpu_fence_wait_polling(ring,
> +   seq - ring->fence_drv.num_fences_mask,
> +   timeout);
> + if (r < 1)
> + return -ETIMEDOUT;
> +
>   amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>  seq, 0);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index a721b0e0ff69..0103acc57474 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -675,13 +675,15 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device 
> *adev, uint32_t reg)
>   
>   spin_lock_irqsave(&kiq->ring_lock, flags);
>   if (amdgpu_device_wb_get(adev, ®_val_offs)) {
> - spin_unlock_irqrestore(&kiq->ring_lock, flags);
>   pr_err("critical bug! too many kiq readers\n");
> - goto failed_kiq_read;
> + goto failed_unlock;
>   }
>   amdgpu_ring_alloc(ring, 32);
>   amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r)
> + goto failed_undo;
> +
>   amdgpu_ring_commit(ring);
>   spin_unlock_irqrestore(&kiq->ring_lock, flags);
>   
> @@ -712,7 +714,13 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
> uint32_t reg)
>   amdgpu_device_wb_free(adev, reg_val_offs);
>   return value;
>   
> +failed_undo:
> + amdgpu_ring_undo(ring);
> +failed_unlock:
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
>   failed_kiq_read:
> + if (reg_val_offs)
> + amdgpu_device_wb_free(adev, reg_val_offs);
>   pr_err("failed to read reg:%x\n", reg);
>   return ~0;
>   }
> @@ -730,7 +738,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, 
> uint32_t reg, uint32_t v)
>   spin_lock_irqsave(&kiq->ring_lock, flags);
>   amdgpu_ring_alloc(ring, 32);
>   amdgpu_ring_emit_wreg(ring, reg, v);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r)
> + goto failed_undo;
> +
>   amdgpu_ring_commit(ring);
>   spin_unlock_irqrestore(&kiq->ring_lock, flags);
>   
> @@ -759,6 +770,9 @@ void amdgpu_kiq_wreg(struct

Re: [PATCH] drm/amdgpu: protect ring overrun

2020-04-23 Thread Christian König

Am 23.04.20 um 11:06 schrieb Yintian Tao:

Wait for the oldest sequence on the ring
to be signaled in order to make sure there
will be no command overrun.

v2: fix coding stype and remove abs operation


One nit pick below, with that fixed the patch is Reviewed-by: Christian 
König 




Signed-off-by: Yintian Tao 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 10 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 22 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  8 +++-
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|  1 -
  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |  1 -
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 +++---
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  8 +++-
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  8 +++-
  9 files changed, 61 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7531527067df..397bd5fa77cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -192,14 +192,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f,
   * Used For polling fence.
   * Returns 0 on success, -ENOMEM on failure.
   */
-int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
+ uint32_t timeout)
  {
uint32_t seq;
+   signed long r = 0;


Please drop the initialization of r here. That is usually seen as rather 
bad style because it prevents the compiler from raising an warning when 
this really isn't initialized.


Regards,
Christian.

  
  	if (!s)

return -EINVAL;
  
  	seq = ++ring->fence_drv.sync_seq;

+   r = amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ timeout);
+   if (r < 1)
+   return -ETIMEDOUT;
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
   seq, 0);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

index a721b0e0ff69..0103acc57474 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -675,13 +675,15 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
  
  	spin_lock_irqsave(&kiq->ring_lock, flags);

if (amdgpu_device_wb_get(adev, ®_val_offs)) {
-   spin_unlock_irqrestore(&kiq->ring_lock, flags);
pr_err("critical bug! too many kiq readers\n");
-   goto failed_kiq_read;
+   goto failed_unlock;
}
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+   if (r)
+   goto failed_undo;
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
  
@@ -712,7 +714,13 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)

amdgpu_device_wb_free(adev, reg_val_offs);
return value;
  
+failed_undo:

+   amdgpu_ring_undo(ring);
+failed_unlock:
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
  failed_kiq_read:
+   if (reg_val_offs)
+   amdgpu_device_wb_free(adev, reg_val_offs);
pr_err("failed to read reg:%x\n", reg);
return ~0;
  }
@@ -730,7 +738,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t 
reg, uint32_t v)
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+   if (r)
+   goto failed_undo;
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
  
@@ -759,6 +770,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
  
  	return;
  
+failed_undo:

+   amdgpu_ring_undo(ring);
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
  failed_kiq_write:
pr_err("failed to write reg:%x\n", reg);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 137d3d2b46e8..be218754629a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -118,7 +118,8 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device 
*adev);
  void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
  int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
  unsigned flags);
-int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
+int amdgpu_fence_emit_polling(struct

[PATCH] drm/amdgpu: protect ring overrun

2020-04-23 Thread Yintian Tao
Wait for the oldest sequence on the ring
to be signaled in order to make sure there
will be no command overrun.

v2: fix coding stype and remove abs operation

Signed-off-by: Yintian Tao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 22 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  8 +++-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|  1 -
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |  1 -
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 +++---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  8 +++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  8 +++-
 9 files changed, 61 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7531527067df..397bd5fa77cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -192,14 +192,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f,
  * Used For polling fence.
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
+ uint32_t timeout)
 {
uint32_t seq;
+   signed long r = 0;
 
if (!s)
return -EINVAL;
 
seq = ++ring->fence_drv.sync_seq;
+   r = amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ timeout);
+   if (r < 1)
+   return -ETIMEDOUT;
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
   seq, 0);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a721b0e0ff69..0103acc57474 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -675,13 +675,15 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
 
spin_lock_irqsave(&kiq->ring_lock, flags);
if (amdgpu_device_wb_get(adev, ®_val_offs)) {
-   spin_unlock_irqrestore(&kiq->ring_lock, flags);
pr_err("critical bug! too many kiq readers\n");
-   goto failed_kiq_read;
+   goto failed_unlock;
}
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+   if (r)
+   goto failed_undo;
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
@@ -712,7 +714,13 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
amdgpu_device_wb_free(adev, reg_val_offs);
return value;
 
+failed_undo:
+   amdgpu_ring_undo(ring);
+failed_unlock:
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
 failed_kiq_read:
+   if (reg_val_offs)
+   amdgpu_device_wb_free(adev, reg_val_offs);
pr_err("failed to read reg:%x\n", reg);
return ~0;
 }
@@ -730,7 +738,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t 
reg, uint32_t v)
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+   if (r)
+   goto failed_undo;
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
@@ -759,6 +770,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t 
reg, uint32_t v)
 
return;
 
+failed_undo:
+   amdgpu_ring_undo(ring);
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
 failed_kiq_write:
pr_err("failed to write reg:%x\n", reg);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 137d3d2b46e8..be218754629a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -118,7 +118,8 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device 
*adev);
 void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
 int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
  unsigned flags);
-int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
+ uint32_t timeout);
 bool amdgpu_fence_process(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
 signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdg

Re: [PATCH] drm/amdgpu: protect ring overrun

2020-04-23 Thread Christian König

Am 23.04.20 um 06:22 schrieb Yintian Tao:

Wait for the oldest sequence on the ring
to be signaled in order to make sure there
will be no command overrun.


One technical problem and a few style suggestions below. Apart from that 
looks good to me.




Signed-off-by: Yintian Tao 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  7 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 17 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  8 +++-
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  9 -
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  8 +++-
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  8 +++-
  6 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7531527067df..5462ea83d8b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -200,6 +200,13 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, 
uint32_t *s)
return -EINVAL;
  
  	seq = ++ring->fence_drv.sync_seq;

+   if ((abs(seq - ring->fence_drv.num_fences_mask) >
+   ring->fence_drv.num_fences_mask) &&


That is a rather bad idea and won't work. The sequence is a 32bit value 
and supposed to wrap around. Just dropping the extra check should do it 
should work.



+   (amdgpu_fence_wait_polling(ring,
+  seq - ring->fence_drv.num_fences_mask,
+  MAX_KIQ_REG_WAIT) < 1))


Maybe we should make the timeout a parameter here.

And it is usually better to use the style like this:

r = amdgpu_fence
if (r < 1)
    .


+return -ETIME;


We usually use -ETIMEDOUT because this is not really bound to a specific 
timer.



+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
   seq, 0);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

index a721b0e0ff69..7087333681f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -681,7 +681,14 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
}
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq);
+   if (r) {
+   amdgpu_ring_undo(ring);
+   amdgpu_device_wb_free(adev, reg_val_offs);
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_read;
+   }
+


If we already have goto style error handling we should probably just add 
a new label to handle it.


Same for the other cases below where you already have a goto.

Regards,
Christian.


amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
  
@@ -730,7 +737,13 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)

spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq);
+   if (r) {
+   amdgpu_ring_undo(ring);
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_write;
+   }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

index 8c10084f44ef..12d181ac7e78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -60,7 +60,13 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device 
*adev,
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
ref, mask);
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq);
+   if (r) {
+   amdgpu_ring_undo(ring);
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq;
+   }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index 5b1549f167b0..650b7a67d3bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4068,7 +4068,14 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct 
amdgpu_device *adev)
reg_val_offs * 4));
amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
reg_val_offs * 4));
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq);
+   if (r) {
+   a

[PATCH] drm/amdgpu: protect ring overrun

2020-04-22 Thread Yintian Tao
Wait for the oldest sequence on the ring
to be signaled in order to make sure there
will be no command overrun.

Signed-off-by: Yintian Tao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 17 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  8 +++-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  9 -
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  8 +++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  8 +++-
 6 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7531527067df..5462ea83d8b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -200,6 +200,13 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, 
uint32_t *s)
return -EINVAL;
 
seq = ++ring->fence_drv.sync_seq;
+   if ((abs(seq - ring->fence_drv.num_fences_mask) >
+   ring->fence_drv.num_fences_mask) &&
+   (amdgpu_fence_wait_polling(ring,
+  seq - ring->fence_drv.num_fences_mask,
+  MAX_KIQ_REG_WAIT) < 1))
+return -ETIME;
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
   seq, 0);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a721b0e0ff69..7087333681f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -681,7 +681,14 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
}
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq);
+   if (r) {
+   amdgpu_ring_undo(ring);
+   amdgpu_device_wb_free(adev, reg_val_offs);
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_read;
+   }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
@@ -730,7 +737,13 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t 
reg, uint32_t v)
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq);
+   if (r) {
+   amdgpu_ring_undo(ring);
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_write;
+   }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 8c10084f44ef..12d181ac7e78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -60,7 +60,13 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device 
*adev,
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
ref, mask);
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq);
+   if (r) {
+   amdgpu_ring_undo(ring);
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq;
+   }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5b1549f167b0..650b7a67d3bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4068,7 +4068,14 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct 
amdgpu_device *adev)
reg_val_offs * 4));
amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
reg_val_offs * 4));
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq);
+   if (r) {
+   amdgpu_ring_undo(ring);
+   amdgpu_device_wb_free(adev, reg_val_offs);
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_read;
+   }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 30b75d79efdb..71430f2a2374 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -427,7 +427,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
kiq->pmf->kiq_invalidate_tlbs(ring,