Calculate control stack and total save area size required.
Signed-off-by: Lijo Lazar <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c | 103 +++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h | 4 +
2 files changed, 107 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
index c0fc5a383071..4252c31eac4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
@@ -32,6 +32,13 @@ extern int cwsr_enable;
#define AMDGPU_CWSR_TMA_MAX_SIZE (AMDGPU_GPU_PAGE_SIZE)
#define AMDGPU_CWSR_TMA_OFFSET (AMDGPU_CWSR_TBA_MAX_SIZE)
+#define SGPR_SIZE_PER_CU 0x4000
+#define LDS_SIZE_PER_CU 0x10000
+#define HWREG_SIZE_PER_CU 0x1000
+#define DEBUGGER_BYTES_ALIGN 64
+#define DEBUGGER_BYTES_PER_WAVE 32
+#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40
+
enum amdgpu_cwsr_region {
AMDGPU_CWSR_TBA,
AMDGPU_CWSR_TMA,
@@ -115,6 +122,100 @@ static void amdgpu_cwsr_init_isa_details(struct
amdgpu_device *adev,
}
}
+static uint32_t amdgpu_cwsr_get_vgpr_size_per_cu(struct amdgpu_device *adev)
+{
+ uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+ uint32_t vgpr_size;
+
+ switch (gc_ver) {
+ case IP_VERSION(9, 4, 1): /* GFX_VERSION_ARCTURUS */
+ case IP_VERSION(9, 4, 2): /* GFX_VERSION_ALDEBARAN */
+ case IP_VERSION(9, 4, 3): /* GFX_VERSION_AQUA_VANJARAM */
+ case IP_VERSION(9, 4, 4): /* GFX_VERSION_AQUA_VANJARAM */
+ case IP_VERSION(9, 5, 0):
+ vgpr_size = 0x80000;
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ vgpr_size = 0x60000;
+ break;
+ default:
+ vgpr_size = 0x40000;
+ break;
+ }
+
+ return vgpr_size;
+}
+
+static uint32_t amdgpu_cwsr_get_wg_ctxt_size_per_cu(struct amdgpu_device *adev)
+{
+ uint32_t lds_sz_per_cu;
+
+ lds_sz_per_cu =
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) ?
+ (adev->gfx.cu_info.lds_size << 10) :
+ LDS_SIZE_PER_CU;
+
+ return amdgpu_cwsr_get_vgpr_size_per_cu(adev) + SGPR_SIZE_PER_CU +
+ lds_sz_per_cu + HWREG_SIZE_PER_CU;
+}
+
+static uint32_t amdgpu_cwsr_ctl_stack_bytes_per_wave(struct amdgpu_device
*adev)
+{
+ uint32_t sz;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 1, 0))
+ sz = 12;
+ else
+ sz = 8;
+ return sz;
+}
+
+static void amdgpu_cwsr_init_save_area_info(struct amdgpu_device *adev,
+ struct amdgpu_cwsr_info *cwsr_info)
+{
+ struct amdgpu_gfx_config *gfx_info = &adev->gfx.config;
+ uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+ uint32_t ctl_stack_size, wg_data_size, dbg_mem_size;
+ uint32_t array_count;
+ uint32_t wave_num;
+ uint32_t cu_num;
+
+ if (gc_ver < IP_VERSION(9, 0, 1))
+ return;
+
+ array_count = gfx_info->max_shader_engines * gfx_info->max_sh_per_se;
+
+ cu_num = adev->gfx.cu_info.number / NUM_XCC(adev->gfx.xcc_mask);
+ wave_num = (gc_ver < IP_VERSION(10, 1, 0)) ? /* GFX_VERSION_NAVI10 */
+ min(cu_num * 40,
+ array_count / gfx_info->max_sh_per_se * 512) :
+ cu_num * 32;
+
+ wg_data_size = ALIGN(cu_num * amdgpu_cwsr_get_wg_ctxt_size_per_cu(adev),
+ PAGE_SIZE);
+ ctl_stack_size =
+ wave_num * amdgpu_cwsr_ctl_stack_bytes_per_wave(adev) + 8;
+ ctl_stack_size =
+ ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
+ PAGE_SIZE);
+ dbg_mem_size =
+ ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);
+ /*
+ * HW design limits control stack size to 0x7000.
+ * This is insufficient for theoretical PM4 cases
+ * but sufficient for AQL, limited by SPI events.
+ */
+ if (IP_VERSION_MAJ(gc_ver) == 10)
+ ctl_stack_size = min(ctl_stack_size, 0x7000);
+
+ cwsr_info->xcc_ctl_stack_sz = ctl_stack_size;
+ cwsr_info->xcc_cwsr_sz = ctl_stack_size + wg_data_size;
+ cwsr_info->xcc_dbg_mem_sz = dbg_mem_size;
+}
+
int amdgpu_cwsr_init(struct amdgpu_device *adev)
{
struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL;
@@ -139,6 +240,8 @@ int amdgpu_cwsr_init(struct amdgpu_device *adev)
return r;
memcpy(ptr, cwsr_info->isa_buf, cwsr_info->isa_sz);
+
+ amdgpu_cwsr_init_save_area_info(adev, cwsr_info);
adev->cwsr_info = no_free_ptr(cwsr_info);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
index 26ed9308f70b..3c80d057bbed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
@@ -50,6 +50,10 @@ struct amdgpu_cwsr_info {
struct amdgpu_bo *isa_bo;
const void *isa_buf;
uint32_t isa_sz;
+ /* cwsr size info per XCC*/
+ uint32_t xcc_ctl_stack_sz;
+ uint32_t xcc_dbg_mem_sz;
+ uint32_t xcc_cwsr_sz;
};
int amdgpu_cwsr_init(struct amdgpu_device *adev);
--
2.49.0