On 9/29/2024 4:18 PM, [email protected] wrote:
> From: Jiadong Zhu <[email protected]>
> 
> Implement sdma queue reset callback via SMU interface.
> 
> Signed-off-by: Jiadong Zhu <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 137 ++++++++++++++++++++---
>  1 file changed, 123 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> index c77889040760..9485f1a1986c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> @@ -667,11 +667,12 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring 
> *ring, uint32_t rb_cntl)
>   *
>   * @adev: amdgpu_device pointer
>   * @i: instance to resume
> + * @restore: used to restore wptr when restart
>   *
>   * Set up the gfx DMA ring buffers and enable them.
>   * Returns 0 for success, error for failure.
>   */
> -static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int 
> i)
> +static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int 
> i, bool restore)
>  {
>       struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
>       u32 rb_cntl, ib_cntl, wptr_poll_cntl;
> @@ -698,16 +699,24 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device 
> *adev, unsigned int i)
>       WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8);
>       WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
>  
> -     ring->wptr = 0;
> +     if (!restore)
> +             ring->wptr = 0;
>  
>       /* before programing wptr to a less value, need set minor_ptr_update 
> first */
>       WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1);
>  
>       /* Initialize the ring buffer's read and write pointers */
> -     WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
> -     WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
> -     WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
> -     WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
> +     if (restore) {
> +             WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(ring->wptr << 
> 2));
> +             WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(ring->wptr 
> << 2));
> +             WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(ring->wptr << 
> 2));
> +             WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(ring->wptr 
> << 2));
> +     } else {
> +             WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
> +             WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
> +             WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
> +             WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
> +     }
>  
>       doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
>       doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
> @@ -759,7 +768,7 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device 
> *adev, unsigned int i)
>   * Set up the page DMA ring buffers and enable them.
>   * Returns 0 for success, error for failure.
>   */
> -static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int 
> i)
> +static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int 
> i, bool restore)
>  {
>       struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
>       u32 rb_cntl, ib_cntl, wptr_poll_cntl;
> @@ -775,10 +784,17 @@ static void sdma_v4_4_2_page_resume(struct 
> amdgpu_device *adev, unsigned int i)
>       WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
>  
>       /* Initialize the ring buffer's read and write pointers */
> -     WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0);
> -     WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0);
> -     WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0);
> -     WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0);
> +     if (restore) {
> +             WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(ring->wptr << 
> 2));
> +             WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(ring->wptr 
> << 2));
> +             WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(ring->wptr << 
> 2));
> +             WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(ring->wptr 
> << 2));
> +     } else {
> +             WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0);
> +             WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0);
> +             WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0);
> +             WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0);
> +     }
>  
>       /* set the wb address whether it's enabled or not */
>       WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI,
> @@ -792,7 +808,8 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device 
> *adev, unsigned int i)
>       WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8);
>       WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
>  
> -     ring->wptr = 0;
> +     if (!restore)
> +             ring->wptr = 0;
>  
>       /* before programing wptr to a less value, need set minor_ptr_update 
> first */
>       WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1);
> @@ -946,9 +963,9 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device 
> *adev,
>               uint32_t temp;
>  
>               WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
> -             sdma_v4_4_2_gfx_resume(adev, i);
> +             sdma_v4_4_2_gfx_resume(adev, i, false);
>               if (adev->sdma.has_page_queue)
> -                     sdma_v4_4_2_page_resume(adev, i);
> +                     sdma_v4_4_2_page_resume(adev, i, false);
>  
>               /* set utc l1 enable flag always to 1 */
>               temp = RREG32_SDMA(i, regSDMA_CNTL);
> @@ -1566,6 +1583,97 @@ static int sdma_v4_4_2_soft_reset(void *handle)
>       return 0;
>  }
>  
> +static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int 
> vmid)
> +{
> +     struct amdgpu_device *adev = ring->adev;
> +     int i, j, r;
> +     u32 rb_cntl, ib_cntl, cntl, preempt;
> +
> +     if (amdgpu_sriov_vf(adev))
> +             return -EINVAL;
> +
> +     for (i = 0; i < adev->sdma.num_instances; i++) {
> +             if (ring == &adev->sdma.instance[i].ring)
> +                     break;
> +     }
> +
> +     if (i == adev->sdma.num_instances) {
> +             DRM_ERROR("sdma instance not found\n");
> +             return -EINVAL;
> +     }
> +
Above logic is not required. ring->me gives the SDMA instance.

> +     /* 2 instances on each xcc, inst0, 1 on xcc0 */
> +     amdgpu_gfx_rlc_enter_safe_mode(adev, i >> 1);
> +

What is the purpose of preventing GFX CGCG? SDMA 4.4.2 is not in GFX domain.

> +     /* stop queue */
> +     ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
> +     ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0);
> +     WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
> +
> +     rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
> +     rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 0);
> +     WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
> +
> +     if (adev->sdma.has_page_queue) {
> +             ib_cntl = RREG32_SDMA(i, regSDMA_PAGE_IB_CNTL);
> +             ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL, IB_ENABLE, 
> 0);
> +             WREG32_SDMA(i, regSDMA_PAGE_IB_CNTL, ib_cntl);
> +
> +             rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
> +             rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL, RB_ENABLE, 
> 0);
> +             WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
> +     }
> +

Since resume is called after reset, can't these be replaced with
sdma_v4_4_2_inst_gfx_stop/sdma_v4_4_2_inst_page_stop?

> +     /* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0*/
> +     preempt = RREG32_SDMA(i, regSDMA_GFX_PREEMPT);
> +     preempt = REG_SET_FIELD(preempt, SDMA_GFX_PREEMPT, IB_PREEMPT, 0);
> +     WREG32_SDMA(i, regSDMA_GFX_PREEMPT, preempt);
> +
> +     r = amdgpu_dpm_reset_sdma(adev, 1 << i);

This may not work if PMFW is expecting actual physical SDMA instance. In
that case, this needs to pass a mask of (1U << GET_INST(SDMA, i))

> +     if (r) {
> +             DRM_ERROR("amdgpu_dpm_reset_sdma failed(%d).\n", r);

Please replace all DRM_ERROR with dev_err.

> +             goto err0;
> +     }
> +
> +     udelay(50);
> +
> +     for (j = 0; j < adev->usec_timeout; j++) {
> +             if (!REG_GET_FIELD(RREG32_SDMA(i, regSDMA_F32_CNTL), 
> SDMA_F32_CNTL, HALT))
> +                     break;
> +             udelay(1);
> +     }
> +
> +     if (j == adev->usec_timeout) {
> +             DRM_ERROR("waiting for unhalt failed.\n");
> +             r = -ETIMEDOUT;
> +             goto err0;
> +     }
> +
> +     /* queue start*/
> +     cntl = RREG32_SDMA(i, regSDMA_CNTL);
> +     cntl = REG_SET_FIELD(cntl, SDMA_CNTL, UTC_L1_ENABLE, 1);
> +     WREG32_SDMA(i, regSDMA_CNTL, cntl);
> +
> +     sdma_v4_4_2_gfx_resume(adev, i, true);
> +     r = amdgpu_ring_test_helper(ring);
> +     if (r) {
> +             DRM_ERROR("sdma gfx resume failed(%d).\n", r);
> +             return r;
> +     }
> +     if (adev->sdma.has_page_queue) {
> +             sdma_v4_4_2_page_resume(adev, i, true);
> +             r = amdgpu_ring_test_helper(&adev->sdma.instance[i].page);
> +             if (r)
> +                     DRM_ERROR("sdma page resume failed ret=%d.\n", r);
> +     }
> +

Is the above sequence valid for SRIOV? If yes, consider using
sdma_v4_4_2_inst_start(), the delta doesn't look that much.

Thanks,
Lijo
> +err0:
> +     /* 2 instances on each xcc */
> +     amdgpu_gfx_rlc_exit_safe_mode(adev, i >> 1);
> +     return r;
> +}
> +
> +
>  static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev,
>                                       struct amdgpu_irq_src *source,
>                                       unsigned type,
> @@ -1948,6 +2056,7 @@ static const struct amdgpu_ring_funcs 
> sdma_v4_4_2_ring_funcs = {
>       .emit_wreg = sdma_v4_4_2_ring_emit_wreg,
>       .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
>       .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
> +     .reset = sdma_v4_4_2_reset_queue,
>  };
>  
>  static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {

Reply via email to