On 2026-03-13 18:37, Xiaogang.Chen wrote:
From: Xiaogang Chen<[email protected]>
Current driver reports and limits memory allocation for each partition equally
among partitions using same memory partition. Application may not be able to
use all available memory when run on a partitioned gpu though system still has
enough free memory.
Add an option that app can use to have gpu partition allocate all available
memory.
Signed-off-by: Xiaogang Chen<[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 43 ++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 17 +++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h | 2 +
5 files changed, 67 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 3bfd79c89df3..fc6613058af8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -805,7 +805,10 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device
*adev, int xcp_id)
} else {
tmp = adev->gmc.mem_partitions[mem_id].size;
}
- do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
+
+ if (adev->xcp_mgr->mem_alloc_mode ==
AMDGPU_PARTITION_MEM_CAPPING_EVEN)
+ do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
+
return ALIGN_DOWN(tmp, PAGE_SIZE);
} else if (adev->apu_prefer_gtt) {
return (ttm_tt_pages_limit() << PAGE_SHIFT);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index cab3196a87fb..e202ba1eadce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1580,6 +1580,40 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct
device *dev,
return count;
}
+static ssize_t amdgpu_gfx_get_compute_partition_mem_alloc_mode(
+ struct device *dev, struct
device_attribute *addr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int mode = adev->xcp_mgr->mem_alloc_mode;
+
+ /* Only minimal precaution taken to reject requests while in reset.*/
+ if (amdgpu_in_reset(adev))
+ return -EPERM;
+
amdgpu_in_reset check is not required for this get and set sysfs
interface because this only change the
memory alloc limit and available memory size, with this removed, this
patch is
Reviewed-by: Philip Yang <[email protected]>
+ return sysfs_emit(buf, "%s\n",
+ amdgpu_gfx_compute_mem_alloc_mode_desc(mode));
+}
+
+
+static ssize_t amdgpu_gfx_set_compute_partition_mem_alloc_mode(
+ struct device *dev, struct
device_attribute *addr,
+ const char *buf, size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!strncasecmp("CAPPING", buf, strlen("CAPPING")))
+ adev->xcp_mgr->mem_alloc_mode =
AMDGPU_PARTITION_MEM_CAPPING_EVEN;
+ else if (!strncasecmp("ALL", buf, strlen("ALL")))
+ adev->xcp_mgr->mem_alloc_mode = AMDGPU_PARTITION_MEM_ALLOC_ALL;
+ else
+ return -EINVAL;
+
+ return count;
+}
+
static const char *xcp_desc[] = {
[AMDGPU_SPX_PARTITION_MODE] = "SPX",
[AMDGPU_DPX_PARTITION_MODE] = "DPX",
@@ -1935,6 +1969,10 @@ static DEVICE_ATTR(gfx_reset_mask, 0444,
static DEVICE_ATTR(compute_reset_mask, 0444,
amdgpu_gfx_get_compute_reset_mask, NULL);
+static DEVICE_ATTR(compute_partition_mem_alloc_mode, 0644,
+ amdgpu_gfx_get_compute_partition_mem_alloc_mode,
+ amdgpu_gfx_set_compute_partition_mem_alloc_mode);
+
static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev)
{
struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
@@ -1955,6 +1993,11 @@ static int amdgpu_gfx_sysfs_xcp_init(struct
amdgpu_device *adev)
if (r)
return r;
+ r = device_create_file(adev->dev,
+ &dev_attr_compute_partition_mem_alloc_mode);
+ if (r)
+ return r;
+
if (xcp_switch_supported)
r = device_create_file(adev->dev,
&dev_attr_available_compute_partition);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 720ed3a2c78c..afcc98d26f45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -71,6 +71,11 @@ enum amdgpu_gfx_partition {
AMDGPU_AUTO_COMPUTE_PARTITION_MODE = -2,
};
+enum amdgpu_gfx_partition_mem_alloc_mode {
+ AMDGPU_PARTITION_MEM_CAPPING_EVEN = 0,
+ AMDGPU_PARTITION_MEM_ALLOC_ALL = 1,
+};
+
#define NUM_XCC(x) hweight16(x)
enum amdgpu_gfx_ras_mem_id_type {
@@ -676,4 +681,16 @@ static inline const char *amdgpu_gfx_compute_mode_desc(int
mode)
}
}
+static inline const char *amdgpu_gfx_compute_mem_alloc_mode_desc(int mode)
+{
+ switch (mode) {
+ case AMDGPU_PARTITION_MEM_CAPPING_EVEN:
+ return "CAPPING";
+ case AMDGPU_PARTITION_MEM_ALLOC_ALL:
+ return "ALL";
+ default:
+ return "UNKNOWN";
+ }
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
index 73250ab45f20..389f69c7d5c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -181,6 +181,7 @@ int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int
num_xcps, int mode)
}
xcp_mgr->num_xcps = num_xcps;
+ xcp_mgr->mem_alloc_mode = AMDGPU_PARTITION_MEM_CAPPING_EVEN;
amdgpu_xcp_update_partition_sched_list(adev);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
index 8058e8f35d41..878c1c422893 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -132,6 +132,8 @@ struct amdgpu_xcp_mgr {
struct amdgpu_xcp_cfg *xcp_cfg;
uint32_t supp_xcp_modes;
uint32_t avail_xcp_modes;
+ /* used to determin KFD memory alloc mode for each partition */
+ uint32_t mem_alloc_mode;
};
struct amdgpu_xcp_mgr_funcs {