[AMD Official Use Only - AMD Internal Distribution Only]
Yes, I agree. I’ve just been notified that this memory configuration is a
mistake rather than a valid user case. So the fix is low priority for now.
-----Original Message-----
From: Limonciello, Mario <[email protected]>
Sent: Friday, March 20, 2026 11:14 AM
To: Zhang, Yifan <[email protected]>; [email protected]
Cc: Deucher, Alexander <[email protected]>; Koenig, Christian
<[email protected]>; Limonciello, Mario <[email protected]>; Yuan, Perry
<[email protected]>
Subject: Re: [PATCH v2] drm/amdkfd: check system memory when set apu_prefer_gtt
On 3/19/2026 2:32 AM, Yifan Zhang wrote:
Current apu_prefer_gtt setting only check gtt_size, which could be set
by user to a larger than system memory value (via ttm modules
parameter pages_limit). E.g. carveout vram 32GB, gtt_size 50GB (via
ttm modules parameter pages_limit), system memory 31GB. In that case,
apu_prefer_gtt will be set incorrectly. Take system memory into
account when set apu_prefer_gtt.
Wouldn't it be cleaner to do this in TTM? IE test that a bad option was set by
user pages_limit value and then show something like:
if (user > possible) {
pr_warn("Requested invalid %d pages, limiting to %d pages", user,
possible);
user = possible;
}
Then we can always trust what we get from TTM.
Signed-off-by: Yifan Zhang <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 --
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++--
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 ++++--
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 ++++++-
4 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 3bfd79c89df3..a6ee9d9bfafb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -170,8 +170,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
int i;
int last_valid_bit;
- amdgpu_amdkfd_gpuvm_init_mem_limits();
-
if (adev->kfd.dev) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap =
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index cdbab7f8cee8..13cada7da4a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -369,7 +369,7 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct
amdgpu_device *adev, int xcp_id);
#if IS_ENABLED(CONFIG_HSA_AMD)
-void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
+uint64_t amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
@@ -382,7 +382,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
#else
static inline
-void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+uint64_t amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 8a869fe41acd..4fba7d2f34a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -109,13 +109,13 @@ static bool reuse_dmamap(struct amdgpu_device *adev,
struct amdgpu_device *bo_ad
* System (TTM + userptr) memory - 15/16th System RAM
* TTM memory - 3/8th System RAM
*/
-void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+uint64_t amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{
struct sysinfo si;
uint64_t mem;
if (kfd_mem_limit.max_system_mem_limit)
- return;
+ return kfd_mem_limit.max_system_mem_limit;
si_meminfo(&si);
mem = si.totalram - si.totalhigh;
@@ -132,6 +132,8 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
(kfd_mem_limit.max_ttm_mem_limit >> 20));
+
+ return kfd_mem_limit.max_system_mem_limit;
}
void amdgpu_amdkfd_reserve_system_mem(uint64_t size) diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 714fd8d12ca5..df98ece071e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2071,6 +2071,7 @@ static void amdgpu_ttm_buffer_entity_fini(struct
amdgpu_gtt_mgr *mgr,
int amdgpu_ttm_init(struct amdgpu_device *adev)
{
uint64_t gtt_size;
+ uint64_t max_system_mem_limit;
int r;
dma_set_max_seg_size(adev->dev, UINT_MAX); @@ -2210,8 +2211,12 @@
int amdgpu_ttm_init(struct amdgpu_device *adev)
dev_info(adev->dev, " %uM of GTT memory ready.\n",
(unsigned int)(gtt_size / (1024 * 1024)));
+
+ max_system_mem_limit = amdgpu_amdkfd_gpuvm_init_mem_limits();
+
if (adev->flags & AMD_IS_APU) {
- if (adev->gmc.real_vram_size < gtt_size)
+ if (adev->gmc.real_vram_size < gtt_size &&
+ adev->gmc.real_vram_size < max_system_mem_limit)
adev->apu_prefer_gtt = true;
}