On Sat, Oct 4, 2025 at 2:46 AM Victor Zhao <[email protected]> wrote:
>
> Currently SRIOV runtime will use kiq to write HDP_MEM_FLUSH_CNTL for
> hdp flush. This register need to be write from CPU for nbif to aware,
> otherwise it will not work.
> Add kiq ring callback to emit GPU_HDP_FLUSH, in amdgpu_device_flush_hdp
> if no ring provided.
>
> v2: remove changes to flush_hdp callback
> v3: add mes fix

I think this should be two patches, one to add the hdp flush callbacks
for KIQ for the gfx IPs, and another to implement the KIQ support for
the no ring case.

Alex

>
> Signed-off-by: Victor Zhao <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 73 ++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h    |  1 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     |  1 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c     |  5 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c     |  1 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      |  1 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      |  1 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c    |  1 +
>  9 files changed, 84 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index a77000c2e0bb..57d3ea33dec2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -7269,6 +7269,8 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
>
>         if (ring && ring->funcs->emit_hdp_flush)
>                 amdgpu_ring_emit_hdp_flush(ring);
> +       else if (!ring && amdgpu_sriov_runtime(adev))
> +               amdgpu_kiq_hdp_flush(adev, 0);
>         else
>                 amdgpu_asic_flush_hdp(adev, ring);
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 7f02e36ccc1e..ecd7908590de 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -1194,6 +1194,78 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, 
> uint32_t reg, uint32_t v, uint3
>         dev_err(adev->dev, "failed to write reg:%x\n", reg);
>  }
>
> +void amdgpu_kiq_hdp_flush(struct amdgpu_device *adev, uint32_t xcc_id)
> +{
> +       signed long r, cnt = 0;
> +       unsigned long flags;
> +       uint32_t seq;
> +       uint32_t hdp_flush_req_offset, hdp_flush_done_offset, ref_and_mask;
> +       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
> +       struct amdgpu_ring *ring = &kiq->ring;
> +
> +       BUG_ON(!ring->funcs->emit_hdp_flush);
> +
> +       if (amdgpu_device_skip_hw_access(adev))
> +               return;
> +
> +       if (adev->enable_mes_kiq && adev->mes.ring[0].sched.ready) {
> +               hdp_flush_req_offset = 
> adev->nbio.funcs->get_hdp_flush_req_offset(adev);
> +               hdp_flush_done_offset = 
> adev->nbio.funcs->get_hdp_flush_done_offset(adev);
> +               ref_and_mask = adev->nbio.hdp_flush_reg->ref_and_mask_cp0; /* 
> Use CP0 for KIQ */
> +
> +               amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, 
> hdp_flush_done_offset,
> +                                             ref_and_mask, ref_and_mask);
> +               return;
> +       }
> +
> +       spin_lock_irqsave(&kiq->ring_lock, flags);
> +       r = amdgpu_ring_alloc(ring, 32);
> +       if (r)
> +               goto failed_unlock;
> +
> +       amdgpu_ring_emit_hdp_flush(ring);
> +       r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> +       if (r)
> +               goto failed_undo;
> +
> +       amdgpu_ring_commit(ring);
> +       spin_unlock_irqrestore(&kiq->ring_lock, flags);
> +
> +       r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
> +
> +       /* don't wait anymore for gpu reset case because this way may
> +        * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
> +        * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
> +        * never return if we keep waiting in virt_kiq_rreg, which cause
> +        * gpu_recover() hang there.
> +        *
> +        * also don't wait anymore for IRQ context
> +        * */
> +       if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
> +               goto failed_kiq_hdp_flush;
> +
> +       might_sleep();
> +       while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
> +               if (amdgpu_in_reset(adev))
> +                       goto failed_kiq_hdp_flush;
> +
> +               msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
> +               r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
> +       }
> +
> +       if (cnt > MAX_KIQ_REG_TRY)
> +               goto failed_kiq_hdp_flush;
> +
> +       return;
> +
> +failed_undo:
> +       amdgpu_ring_undo(ring);
> +failed_unlock:
> +       spin_unlock_irqrestore(&kiq->ring_lock, flags);
> +failed_kiq_hdp_flush:
> +       dev_err(adev->dev, "failed to flush HDP via KIQ\n");
> +}
> +
>  int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
>  {
>         if (amdgpu_num_kcq == -1) {
> @@ -2484,3 +2556,4 @@ void amdgpu_debugfs_compute_sched_mask_init(struct 
> amdgpu_device *adev)
>                             &amdgpu_debugfs_compute_sched_mask_fops);
>  #endif
>  }
> +
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index fb5f7a0ee029..5bccd2cc9518 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -615,6 +615,7 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device 
> *adev,
>                                   struct amdgpu_iv_entry *entry);
>  uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t 
> xcc_id);
>  void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 
> uint32_t xcc_id);
> +void amdgpu_kiq_hdp_flush(struct amdgpu_device *adev, uint32_t xcc_id);
>  int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
>  void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t 
> ucode_id);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 8841d7213de4..751732f3e883 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -9951,6 +9951,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v10_0_ring_funcs_kiq = {
>         .emit_wreg = gfx_v10_0_ring_emit_wreg,
>         .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>         .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
> +       .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
>  };
>
>  static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 66c47c466532..10d2219866f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -2438,7 +2438,7 @@ static int gfx_v11_0_rlc_load_microcode(struct 
> amdgpu_device *adev)
>                         if (version_minor == 3)
>                                 gfx_v11_0_load_rlcp_rlcv_microcode(adev);
>                 }
> -
> +
>                 return 0;
>         }
>
> @@ -3886,7 +3886,7 @@ static int gfx_v11_0_cp_compute_load_microcode(struct 
> amdgpu_device *adev)
>         }
>
>         memcpy(fw, fw_data, fw_size);
> -
> +
>         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
>         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
>
> @@ -7320,6 +7320,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v11_0_ring_funcs_kiq = {
>         .emit_wreg = gfx_v11_0_ring_emit_wreg,
>         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
>         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
> +       .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
>  };
>
>  static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> index 710ec9c34e43..e2bb8668150d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> @@ -5597,6 +5597,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v12_0_ring_funcs_kiq = {
>         .emit_wreg = gfx_v12_0_ring_emit_wreg,
>         .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
>         .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
> +       .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
>  };
>
>  static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 0856ff65288c..d3d0a4b0380c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6939,6 +6939,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v8_0_ring_funcs_kiq = {
>         .pad_ib = amdgpu_ring_generic_pad_ib,
>         .emit_rreg = gfx_v8_0_ring_emit_rreg,
>         .emit_wreg = gfx_v8_0_ring_emit_wreg,
> +       .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
>  };
>
>  static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index dd19a97436db..f1a2efc2a8d0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -7586,6 +7586,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v9_0_ring_funcs_kiq = {
>         .emit_wreg = gfx_v9_0_ring_emit_wreg,
>         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
> +       .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
>  };
>
>  static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index 77f9d5b9a556..b1fa4036befb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -4798,6 +4798,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v9_4_3_ring_funcs_kiq = {
>         .emit_wreg = gfx_v9_4_3_ring_emit_wreg,
>         .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait,
>         .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait,
> +       .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush,
>  };
>
>  static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev)
> --
> 2.25.1
>

Reply via email to