On Wed, Dec 3, 2025 at 8:14 AM Lijo Lazar <[email protected]> wrote:
>
> Calculate control stack and total save area size required.
>
> Signed-off-by: Lijo Lazar <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c | 103 +++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h |   4 +
>  2 files changed, 107 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> index c0fc5a383071..4252c31eac4c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> @@ -32,6 +32,13 @@ extern int cwsr_enable;
>  #define AMDGPU_CWSR_TMA_MAX_SIZE (AMDGPU_GPU_PAGE_SIZE)
>  #define AMDGPU_CWSR_TMA_OFFSET (AMDGPU_CWSR_TBA_MAX_SIZE)
>
> +#define SGPR_SIZE_PER_CU 0x4000
> +#define LDS_SIZE_PER_CU 0x10000
> +#define HWREG_SIZE_PER_CU 0x1000
> +#define DEBUGGER_BYTES_ALIGN 64
> +#define DEBUGGER_BYTES_PER_WAVE 32
> +#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40
> +
>  enum amdgpu_cwsr_region {
>         AMDGPU_CWSR_TBA,
>         AMDGPU_CWSR_TMA,
> @@ -115,6 +122,100 @@ static void amdgpu_cwsr_init_isa_details(struct 
> amdgpu_device *adev,
>         }
>  }
>
> +static uint32_t amdgpu_cwsr_get_vgpr_size_per_cu(struct amdgpu_device *adev)
> +{
> +       uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
> +       uint32_t vgpr_size;
> +
> +       switch (gc_ver) {
> +       case IP_VERSION(9, 4, 1): /* GFX_VERSION_ARCTURUS */
> +       case IP_VERSION(9, 4, 2): /* GFX_VERSION_ALDEBARAN */
> +       case IP_VERSION(9, 4, 3): /* GFX_VERSION_AQUA_VANJARAM */
> +       case IP_VERSION(9, 4, 4): /* GFX_VERSION_AQUA_VANJARAM */
> +       case IP_VERSION(9, 5, 0):
> +               vgpr_size = 0x80000;
> +               break;
> +       case IP_VERSION(11, 0, 0):
> +       case IP_VERSION(11, 0, 2):
> +       case IP_VERSION(11, 0, 3):
> +       case IP_VERSION(12, 0, 0):
> +       case IP_VERSION(12, 0, 1):
> +               vgpr_size = 0x60000;
> +               break;
> +       default:
> +               vgpr_size = 0x40000;
> +               break;
> +       }
> +
> +       return vgpr_size;
> +}
> +
> +static uint32_t amdgpu_cwsr_get_wg_ctxt_size_per_cu(struct amdgpu_device 
> *adev)
> +{
> +       uint32_t lds_sz_per_cu;
> +
> +       lds_sz_per_cu =
> +               (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) ?
> +                       (adev->gfx.cu_info.lds_size << 10) :
> +                       LDS_SIZE_PER_CU;
> +
> +       return amdgpu_cwsr_get_vgpr_size_per_cu(adev) + SGPR_SIZE_PER_CU +
> +              lds_sz_per_cu + HWREG_SIZE_PER_CU;
> +}
> +
> +static uint32_t amdgpu_cwsr_ctl_stack_bytes_per_wave(struct amdgpu_device 
> *adev)
> +{
> +       uint32_t sz;

new line here.  Other than that:
Acked-by: Alex Deucher <[email protected]>

> +       if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 1, 0))
> +               sz = 12;
> +       else
> +               sz = 8;
> +       return sz;
> +}
> +
> +static void amdgpu_cwsr_init_save_area_info(struct amdgpu_device *adev,
> +                                           struct amdgpu_cwsr_info 
> *cwsr_info)
> +{
> +       struct amdgpu_gfx_config *gfx_info = &adev->gfx.config;
> +       uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
> +       uint32_t ctl_stack_size, wg_data_size, dbg_mem_size;
> +       uint32_t array_count;
> +       uint32_t wave_num;
> +       uint32_t cu_num;
> +
> +       if (gc_ver < IP_VERSION(9, 0, 1))
> +               return;
> +
> +       array_count = gfx_info->max_shader_engines * gfx_info->max_sh_per_se;
> +
> +       cu_num = adev->gfx.cu_info.number / NUM_XCC(adev->gfx.xcc_mask);
> +       wave_num = (gc_ver < IP_VERSION(10, 1, 0)) ? /* GFX_VERSION_NAVI10 */
> +                          min(cu_num * 40,
> +                              array_count / gfx_info->max_sh_per_se * 512) :
> +                          cu_num * 32;
> +
> +       wg_data_size = ALIGN(cu_num * 
> amdgpu_cwsr_get_wg_ctxt_size_per_cu(adev),
> +                            PAGE_SIZE);
> +       ctl_stack_size =
> +               wave_num * amdgpu_cwsr_ctl_stack_bytes_per_wave(adev) + 8;
> +       ctl_stack_size =
> +               ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + 
> ctl_stack_size,
> +                     PAGE_SIZE);
> +       dbg_mem_size =
> +               ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, 
> DEBUGGER_BYTES_ALIGN);
> +       /*
> +       * HW design limits control stack size to 0x7000.
> +       * This is insufficient for theoretical PM4 cases
> +       * but sufficient for AQL, limited by SPI events.
> +       */
> +       if (IP_VERSION_MAJ(gc_ver) == 10)
> +               ctl_stack_size = min(ctl_stack_size, 0x7000);
> +
> +       cwsr_info->xcc_ctl_stack_sz = ctl_stack_size;
> +       cwsr_info->xcc_cwsr_sz = ctl_stack_size + wg_data_size;
> +       cwsr_info->xcc_dbg_mem_sz = dbg_mem_size;
> +}
> +
>  int amdgpu_cwsr_init(struct amdgpu_device *adev)
>  {
>         struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL;
> @@ -139,6 +240,8 @@ int amdgpu_cwsr_init(struct amdgpu_device *adev)
>                 return r;
>
>         memcpy(ptr, cwsr_info->isa_buf, cwsr_info->isa_sz);
> +
> +       amdgpu_cwsr_init_save_area_info(adev, cwsr_info);
>         adev->cwsr_info = no_free_ptr(cwsr_info);
>
>         return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> index 26ed9308f70b..3c80d057bbed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> @@ -50,6 +50,10 @@ struct amdgpu_cwsr_info {
>         struct amdgpu_bo *isa_bo;
>         const void *isa_buf;
>         uint32_t isa_sz;
> +       /* cwsr size info per XCC*/
> +       uint32_t xcc_ctl_stack_sz;
> +       uint32_t xcc_dbg_mem_sz;
> +       uint32_t xcc_cwsr_sz;
>  };
>
>  int amdgpu_cwsr_init(struct amdgpu_device *adev);
> --
> 2.49.0
>

Reply via email to