The ctl_stack_size and wg_data_size values are used to compute the total
context save/restore buffer size and the control stack size. These buffers
are programmed into the GPU and are used to store the queue state during
context save and restore.

Currently, both ctl_stack_size and wg_data_size are aligned to the CPU
PAGE_SIZE. On systems with a non-4K CPU page size, this causes unnecessary
memory waste because the GPU internally calculates and uses buffer sizes
aligned to a fixed 4K GPU page size.

Since the control stack and context save/restore buffers are consumed by
the GPU, their sizes should be aligned to the GPU page size (4K), not the
CPU page size. This patch updates the alignment of ctl_stack_size and
wg_data_size to prevent over-allocation on systems with larger CPU page
sizes.

Signed-off-by: Donet Tom <[email protected]>
---
 drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index dc857450fa16..00ab941c3e86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -445,10 +445,11 @@ void kfd_queue_ctx_save_restore_size(struct 
kfd_topology_device *dev)
                    min(cu_num * 40, props->array_count / 
props->simd_arrays_per_engine * 512)
                    : cu_num * 32;
 
-       wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), 
PAGE_SIZE);
+       wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props),
+                               AMDGPU_GPU_PAGE_SIZE);
        ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;
        ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + 
ctl_stack_size,
-                              PAGE_SIZE);
+                              AMDGPU_GPU_PAGE_SIZE);
 
        if ((gfxv / 10000 * 10000) == 100000) {
                /* HW design limits control stack size to 0x7000.
@@ -460,7 +461,7 @@ void kfd_queue_ctx_save_restore_size(struct 
kfd_topology_device *dev)
 
        props->ctl_stack_size = ctl_stack_size;
        props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, 
DEBUGGER_BYTES_ALIGN);
-       props->cwsr_size = ctl_stack_size + wg_data_size;
+       props->cwsr_size = ALIGN(ctl_stack_size + wg_data_size, PAGE_SIZE);
 
        if (gfxv == 80002)      /* GFX_VERSION_TONGA */
                props->eop_buffer_size = 0x8000;
-- 
2.52.0

Reply via email to