Calculate control stack and total save area size required.

Signed-off-by: Lijo Lazar <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c | 103 +++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h |   4 +
 2 files changed, 107 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
index c0fc5a383071..4252c31eac4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
@@ -32,6 +32,13 @@ extern int cwsr_enable;
 #define AMDGPU_CWSR_TMA_MAX_SIZE (AMDGPU_GPU_PAGE_SIZE)
 #define AMDGPU_CWSR_TMA_OFFSET (AMDGPU_CWSR_TBA_MAX_SIZE)
 
+#define SGPR_SIZE_PER_CU 0x4000
+#define LDS_SIZE_PER_CU 0x10000
+#define HWREG_SIZE_PER_CU 0x1000
+#define DEBUGGER_BYTES_ALIGN 64
+#define DEBUGGER_BYTES_PER_WAVE 32
+#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40
+
 enum amdgpu_cwsr_region {
        AMDGPU_CWSR_TBA,
        AMDGPU_CWSR_TMA,
@@ -115,6 +122,100 @@ static void amdgpu_cwsr_init_isa_details(struct 
amdgpu_device *adev,
        }
 }
 
+static uint32_t amdgpu_cwsr_get_vgpr_size_per_cu(struct amdgpu_device *adev)
+{
+       uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+       uint32_t vgpr_size;
+
+       switch (gc_ver) {
+       case IP_VERSION(9, 4, 1): /* GFX_VERSION_ARCTURUS */
+       case IP_VERSION(9, 4, 2): /* GFX_VERSION_ALDEBARAN */
+       case IP_VERSION(9, 4, 3): /* GFX_VERSION_AQUA_VANJARAM */
+       case IP_VERSION(9, 4, 4): /* GFX_VERSION_AQUA_VANJARAM */
+       case IP_VERSION(9, 5, 0):
+               vgpr_size = 0x80000;
+               break;
+       case IP_VERSION(11, 0, 0):
+       case IP_VERSION(11, 0, 2):
+       case IP_VERSION(11, 0, 3):
+       case IP_VERSION(12, 0, 0):
+       case IP_VERSION(12, 0, 1):
+               vgpr_size = 0x60000;
+               break;
+       default:
+               vgpr_size = 0x40000;
+               break;
+       }
+
+       return vgpr_size;
+}
+
+static uint32_t amdgpu_cwsr_get_wg_ctxt_size_per_cu(struct amdgpu_device *adev)
+{
+       uint32_t lds_sz_per_cu;
+
+       lds_sz_per_cu =
+               (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) ?
+                       (adev->gfx.cu_info.lds_size << 10) :
+                       LDS_SIZE_PER_CU;
+
+       return amdgpu_cwsr_get_vgpr_size_per_cu(adev) + SGPR_SIZE_PER_CU +
+              lds_sz_per_cu + HWREG_SIZE_PER_CU;
+}
+
+static uint32_t amdgpu_cwsr_ctl_stack_bytes_per_wave(struct amdgpu_device 
*adev)
+{
+       uint32_t sz;
+       if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 1, 0))
+               sz = 12;
+       else
+               sz = 8;
+       return sz;
+}
+
+static void amdgpu_cwsr_init_save_area_info(struct amdgpu_device *adev,
+                                           struct amdgpu_cwsr_info *cwsr_info)
+{
+       struct amdgpu_gfx_config *gfx_info = &adev->gfx.config;
+       uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+       uint32_t ctl_stack_size, wg_data_size, dbg_mem_size;
+       uint32_t array_count;
+       uint32_t wave_num;
+       uint32_t cu_num;
+
+       if (gc_ver < IP_VERSION(9, 0, 1))
+               return;
+
+       array_count = gfx_info->max_shader_engines * gfx_info->max_sh_per_se;
+
+       cu_num = adev->gfx.cu_info.number / NUM_XCC(adev->gfx.xcc_mask);
+       wave_num = (gc_ver < IP_VERSION(10, 1, 0)) ? /* GFX_VERSION_NAVI10 */
+                          min(cu_num * 40,
+                              array_count / gfx_info->max_sh_per_se * 512) :
+                          cu_num * 32;
+
+       wg_data_size = ALIGN(cu_num * amdgpu_cwsr_get_wg_ctxt_size_per_cu(adev),
+                            PAGE_SIZE);
+       ctl_stack_size =
+               wave_num * amdgpu_cwsr_ctl_stack_bytes_per_wave(adev) + 8;
+       ctl_stack_size =
+               ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
+                     PAGE_SIZE);
+       dbg_mem_size =
+               ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);
+       /*
+       * HW design limits control stack size to 0x7000.
+       * This is insufficient for theoretical PM4 cases
+       * but sufficient for AQL, limited by SPI events.
+       */
+       if (IP_VERSION_MAJ(gc_ver) == 10)
+               ctl_stack_size = min(ctl_stack_size, 0x7000);
+
+       cwsr_info->xcc_ctl_stack_sz = ctl_stack_size;
+       cwsr_info->xcc_cwsr_sz = ctl_stack_size + wg_data_size;
+       cwsr_info->xcc_dbg_mem_sz = dbg_mem_size;
+}
+
 int amdgpu_cwsr_init(struct amdgpu_device *adev)
 {
        struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL;
@@ -139,6 +240,8 @@ int amdgpu_cwsr_init(struct amdgpu_device *adev)
                return r;
 
        memcpy(ptr, cwsr_info->isa_buf, cwsr_info->isa_sz);
+
+       amdgpu_cwsr_init_save_area_info(adev, cwsr_info);
        adev->cwsr_info = no_free_ptr(cwsr_info);
 
        return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
index 26ed9308f70b..3c80d057bbed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
@@ -50,6 +50,10 @@ struct amdgpu_cwsr_info {
        struct amdgpu_bo *isa_bo;
        const void *isa_buf;
        uint32_t isa_sz;
+       /* cwsr size info per XCC*/
+       uint32_t xcc_ctl_stack_sz;
+       uint32_t xcc_dbg_mem_sz;
+       uint32_t xcc_cwsr_sz;
 };
 
 int amdgpu_cwsr_init(struct amdgpu_device *adev);
-- 
2.49.0

Reply via email to