On Wed, Dec 3, 2025 at 8:14 AM Lijo Lazar <[email protected]> wrote: > > Calculate control stack and total save area size required. > > Signed-off-by: Lijo Lazar <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c | 103 +++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h | 4 + > 2 files changed, 107 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c > index c0fc5a383071..4252c31eac4c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c > @@ -32,6 +32,13 @@ extern int cwsr_enable; > #define AMDGPU_CWSR_TMA_MAX_SIZE (AMDGPU_GPU_PAGE_SIZE) > #define AMDGPU_CWSR_TMA_OFFSET (AMDGPU_CWSR_TBA_MAX_SIZE) > > +#define SGPR_SIZE_PER_CU 0x4000 > +#define LDS_SIZE_PER_CU 0x10000 > +#define HWREG_SIZE_PER_CU 0x1000 > +#define DEBUGGER_BYTES_ALIGN 64 > +#define DEBUGGER_BYTES_PER_WAVE 32 > +#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40 > + > enum amdgpu_cwsr_region { > AMDGPU_CWSR_TBA, > AMDGPU_CWSR_TMA, > @@ -115,6 +122,100 @@ static void amdgpu_cwsr_init_isa_details(struct > amdgpu_device *adev, > } > } > > +static uint32_t amdgpu_cwsr_get_vgpr_size_per_cu(struct amdgpu_device *adev) > +{ > + uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0); > + uint32_t vgpr_size; > + > + switch (gc_ver) { > + case IP_VERSION(9, 4, 1): /* GFX_VERSION_ARCTURUS */ > + case IP_VERSION(9, 4, 2): /* GFX_VERSION_ALDEBARAN */ > + case IP_VERSION(9, 4, 3): /* GFX_VERSION_AQUA_VANJARAM */ > + case IP_VERSION(9, 4, 4): /* GFX_VERSION_AQUA_VANJARAM */ > + case IP_VERSION(9, 5, 0): > + vgpr_size = 0x80000; > + break; > + case IP_VERSION(11, 0, 0): > + case IP_VERSION(11, 0, 2): > + case IP_VERSION(11, 0, 3): > + case IP_VERSION(12, 0, 0): > + case IP_VERSION(12, 0, 1): > + vgpr_size = 0x60000; > + break; > + default: > + vgpr_size = 0x40000; > + break; > + } > + > + return vgpr_size; > +} > + > +static uint32_t amdgpu_cwsr_get_wg_ctxt_size_per_cu(struct amdgpu_device > *adev) > +{ > + uint32_t lds_sz_per_cu; > + > + lds_sz_per_cu = > + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) ? > + (adev->gfx.cu_info.lds_size << 10) : > + LDS_SIZE_PER_CU; > + > + return amdgpu_cwsr_get_vgpr_size_per_cu(adev) + SGPR_SIZE_PER_CU + > + lds_sz_per_cu + HWREG_SIZE_PER_CU; > +} > + > +static uint32_t amdgpu_cwsr_ctl_stack_bytes_per_wave(struct amdgpu_device > *adev) > +{ > + uint32_t sz;
new line here. Other than that: Acked-by: Alex Deucher <[email protected]> > + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 1, 0)) > + sz = 12; > + else > + sz = 8; > + return sz; > +} > + > +static void amdgpu_cwsr_init_save_area_info(struct amdgpu_device *adev, > + struct amdgpu_cwsr_info > *cwsr_info) > +{ > + struct amdgpu_gfx_config *gfx_info = &adev->gfx.config; > + uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0); > + uint32_t ctl_stack_size, wg_data_size, dbg_mem_size; > + uint32_t array_count; > + uint32_t wave_num; > + uint32_t cu_num; > + > + if (gc_ver < IP_VERSION(9, 0, 1)) > + return; > + > + array_count = gfx_info->max_shader_engines * gfx_info->max_sh_per_se; > + > + cu_num = adev->gfx.cu_info.number / NUM_XCC(adev->gfx.xcc_mask); > + wave_num = (gc_ver < IP_VERSION(10, 1, 0)) ? /* GFX_VERSION_NAVI10 */ > + min(cu_num * 40, > + array_count / gfx_info->max_sh_per_se * 512) : > + cu_num * 32; > + > + wg_data_size = ALIGN(cu_num * > amdgpu_cwsr_get_wg_ctxt_size_per_cu(adev), > + PAGE_SIZE); > + ctl_stack_size = > + wave_num * amdgpu_cwsr_ctl_stack_bytes_per_wave(adev) + 8; > + ctl_stack_size = > + ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + > ctl_stack_size, > + PAGE_SIZE); > + dbg_mem_size = > + ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, > DEBUGGER_BYTES_ALIGN); > + /* > + * HW design limits control stack size to 0x7000. > + * This is insufficient for theoretical PM4 cases > + * but sufficient for AQL, limited by SPI events. > + */ > + if (IP_VERSION_MAJ(gc_ver) == 10) > + ctl_stack_size = min(ctl_stack_size, 0x7000); > + > + cwsr_info->xcc_ctl_stack_sz = ctl_stack_size; > + cwsr_info->xcc_cwsr_sz = ctl_stack_size + wg_data_size; > + cwsr_info->xcc_dbg_mem_sz = dbg_mem_size; > +} > + > int amdgpu_cwsr_init(struct amdgpu_device *adev) > { > struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL; > @@ -139,6 +240,8 @@ int amdgpu_cwsr_init(struct amdgpu_device *adev) > return r; > > memcpy(ptr, cwsr_info->isa_buf, cwsr_info->isa_sz); > + > + amdgpu_cwsr_init_save_area_info(adev, cwsr_info); > adev->cwsr_info = no_free_ptr(cwsr_info); > > return 0; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h > index 26ed9308f70b..3c80d057bbed 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h > @@ -50,6 +50,10 @@ struct amdgpu_cwsr_info { > struct amdgpu_bo *isa_bo; > const void *isa_buf; > uint32_t isa_sz; > + /* cwsr size info per XCC*/ > + uint32_t xcc_ctl_stack_sz; > + uint32_t xcc_dbg_mem_sz; > + uint32_t xcc_cwsr_sz; > }; > > int amdgpu_cwsr_init(struct amdgpu_device *adev); > -- > 2.49.0 >
