On Tue, Mar 24, 2026 at 1:57 PM Amber Lin <[email protected]> wrote:
>
> Create hung_queue_hqd_info structure and fill in hung queses information
> passed by MES, including queue type, pipe id, and queue id.
>
> Suggested-by: Jonathan Kim <[email protected]>
> Signed-off-by: Amber Lin <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 11 ++++++-----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 13 +++++++++++++
>  2 files changed, 19 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index f1f8bbfc31e0..d778c3da8203 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -447,7 +447,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct 
> amdgpu_device *adev,
>  {
>         struct mes_detect_and_reset_queue_input input;
>         u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id];
> -       int r, i;
> +       int hqd_info_offset = adev->mes.hung_queue_hqd_info_offset, r, i;
>
>         if (!hung_db_num || !hung_db_array)
>                 return -EINVAL;
> @@ -486,10 +486,11 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct 
> amdgpu_device *adev,
>                 return r;
>         }
>
> -       /*
> -        * TODO: return HQD info for MES scheduled user compute queue reset 
> cases
> -        * stored in hung_db_array hqd info offset to full array size
> -        */
> +       if (queue_type != AMDGPU_RING_TYPE_COMPUTE)
> +               return r;
> +
> +       for (i = hqd_info_offset; i < hqd_info_offset + *hung_db_num; i++)
> +               hung_db_array[i] = db_array[i];
>
>         if (r)
>                 dev_err(adev->dev, "failed to reset\n");

I think the new hunk of code should come after this error message
otherwise it won't get printed for non-compute queues.

Alex

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index f80e3aca9c78..2e6ae9f84db0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -170,6 +170,19 @@ struct amdgpu_mes {
>         uint64_t            
> shared_cmd_buf_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
>  };
>
> +struct amdgpu_mes_hung_queue_hqd_info {
> +       union {
> +               struct {
> +                       uint32_t queue_type: 3; // queue type
> +                       uint32_t pipe_index: 4; // pipe index
> +                       uint32_t queue_index: 8; // queue index
> +                       uint32_t reserved: 17;
> +               };
> +
> +               uint32_t bit0_31;
> +       };
> +};
> +
>  struct amdgpu_mes_gang {
>         int                             gang_id;
>         int                             priority;
> --
> 2.43.0
>

Reply via email to