On Tue, Jul 1, 2025 at 10:11 AM Christian König <christian.koe...@amd.com> wrote: > > On 01.07.25 16:07, Alex Deucher wrote: > > On Tue, Jul 1, 2025 at 2:53 AM Srinivasan Shanmugam > > <srinivasan.shanmu...@amd.com> wrote: > >> > >> This commit refines the amdgpu_info_ioctl function to unify > >> the reporting of device apertures for both KGD and KFD > >> subsystems. > >> > >> v2: > >> - Use amdgpu_ip_version() instead of directly accessing > >> adev->ip_versions. (Alex) > >> - Added AMDGPU_VM_ prefix to scratch and LDS base and limit macros. > >> (Christian) > >> - Clarified in comments that the top 16 bits of the 64-bit address must > >> not be 0x0000 or 0xffff to avoid sign extension problems. (Christian) > >> > >> Cc: David Yat Sin <david.yat...@amd.com> > >> Cc: Christian König <christian.koe...@amd.com> > >> Cc: Alex Deucher <alexander.deuc...@amd.com> > >> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmu...@amd.com> > > > > Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> > > Reviewed-by: Christian König <christian.koe...@amd.com> as well. > > > But don't commit this yet as we are still waiting on the userspace > > side to be complete. > > Mhm, how do we want to do this with the ROCm? > > I mean for Mesa we usually wait for the merge request these days. > > For ROCm we just wait for it to end up in thunk or libdrm or what?
Yeah, we usually post a topic branch on github with support for the new feature. Alex > > Regards, > Christian. > > > > > Alex > > > >> --- > >> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 25 +++++++++++ > >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 23 ++++++++++ > >> drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 45 ++++---------------- > >> include/uapi/drm/amdgpu_drm.h | 6 +++ > >> 4 files changed, 62 insertions(+), 37 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > >> index 4aab5e394ce2..76d902342271 100644 > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > >> @@ -46,6 +46,7 @@ > >> #include "amdgpu_reset.h" > >> #include "amd_pcie.h" > >> #include "amdgpu_userq.h" > >> +#include "amdgpu_vm.h" > >> > >> void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) > >> { > >> @@ -1031,6 +1032,30 @@ int amdgpu_info_ioctl(struct drm_device *dev, void > >> *data, struct drm_file *filp) > >> > >> dev_info->userq_ip_mask = > >> amdgpu_userq_get_supported_ip_mask(adev); > >> > >> + /* Retrieve Device Apertures */ > >> + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, > >> 0, 0)) { > >> + dev_info->lds_base = > >> AMDGPU_VM_MAKE_LDS_APP_BASE_V9(); > >> + dev_info->scratch_base = > >> AMDGPU_VM_MAKE_SCRATCH_APP_BASE_V9(); > >> + > >> + dev_info->lds_limit = > >> AMDGPU_VM_MAKE_LDS_APP_LIMIT(dev_info->lds_base); > >> + dev_info->scratch_limit = > >> + > >> AMDGPU_VM_MAKE_SCRATCH_APP_LIMIT(dev_info->scratch_base); > >> + } else { > >> + dev_info->lds_base = > >> AMDGPU_VM_MAKE_LDS_APP_BASE_VI(); > >> + dev_info->scratch_base = > >> AMDGPU_VM_MAKE_SCRATCH_APP_BASE_VI(); > >> + > >> + dev_info->lds_limit = > >> AMDGPU_VM_MAKE_LDS_APP_LIMIT(dev_info->lds_base); > >> + dev_info->scratch_limit = > >> + > >> AMDGPU_VM_MAKE_SCRATCH_APP_LIMIT(dev_info->scratch_base); > >> + } > >> + > >> + dev_dbg(adev->dev, "Node ID: %u\n", adev->dev->id); > >> + dev_dbg(adev->dev, "GPU ID: %u\n", dev_info->device_id); > >> + dev_dbg(adev->dev, "LDS Base: %llX\n", dev_info->lds_base); > >> + dev_dbg(adev->dev, "LDS Limit: %llX\n", > >> dev_info->lds_limit); > >> + dev_dbg(adev->dev, "Scratch Base: %llX\n", > >> dev_info->scratch_base); > >> + dev_dbg(adev->dev, "Scratch Limit: %llX\n", > >> dev_info->scratch_limit); > >> + > >> ret = copy_to_user(out, dev_info, > >> min((size_t)size, sizeof(*dev_info))) ? > >> -EFAULT : 0; > >> kfree(dev_info); > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > >> index f3ad687125ad..fd8f6da30096 100644 > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > >> @@ -184,6 +184,29 @@ struct amdgpu_bo_vm; > >> #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) > >> #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) > >> > >> +#define AMDGPU_VM_MAKE_SCRATCH_APP_BASE_VI() \ > >> + (((uint64_t)(0x1UL) << 61) + 0x100000000L) > >> +#define AMDGPU_VM_MAKE_SCRATCH_APP_LIMIT(base) \ > >> + (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) > >> + > >> +#define AMDGPU_VM_MAKE_LDS_APP_BASE_VI() \ > >> + (((uint64_t)(0x1UL) << 61) + 0x0) > >> +#define AMDGPU_VM_MAKE_LDS_APP_LIMIT(base) \ > >> + (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) > >> + > >> +/* On GFXv9 the LDS and scratch apertures are programmed independently > >> + * using the high 16 bits of the 64-bit virtual address. They must be > >> + * in the hole, which will be the case as long as the high 16 bits are > >> + * neither 0x0000 nor 0xffff to avoid sign extension issues. > >> + * > >> + * The aperture sizes are still 4GB implicitly. > >> + * > >> + * Note: While GPUVM apertures are generally not applicable on GFXv9, > >> + * there is at least one APU case where GFX9 has a limited GPUVM aperture. > >> + */ > >> +#define AMDGPU_VM_MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48) > >> +#define AMDGPU_VM_MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48) > >> + > >> /* VMPT level enumerate, and the hiberachy is: > >> * PDB2->PDB1->PDB0->PTB > >> */ > >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c > >> b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c > >> index 1d170dc50df3..291b068aaf8a 100644 > >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c > >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c > >> @@ -276,35 +276,6 @@ > >> * for FLAT_* / S_LOAD operations. > >> */ > >> > >> -#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \ > >> - (((uint64_t)(gpu_num) << 61) + 0x1000000000000L) > >> - > >> -#define MAKE_GPUVM_APP_LIMIT(base, size) \ > >> - (((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1) > >> - > >> -#define MAKE_SCRATCH_APP_BASE_VI() \ > >> - (((uint64_t)(0x1UL) << 61) + 0x100000000L) > >> - > >> -#define MAKE_SCRATCH_APP_LIMIT(base) \ > >> - (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) > >> - > >> -#define MAKE_LDS_APP_BASE_VI() \ > >> - (((uint64_t)(0x1UL) << 61) + 0x0) > >> -#define MAKE_LDS_APP_LIMIT(base) \ > >> - (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) > >> - > >> -/* On GFXv9 the LDS and scratch apertures are programmed independently > >> - * using the high 16 bits of the 64-bit virtual address. They must be > >> - * in the hole, which will be the case as long as the high 16 bits are > >> - * not 0. > >> - * > >> - * The aperture sizes are still 4GB implicitly. > >> - * > >> - * A GPUVM aperture is not applicable on GFXv9. > >> - */ > >> -#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48) > >> -#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48) > >> - > >> /* User mode manages most of the SVM aperture address space. The low > >> * 16MB are reserved for kernel use (CWSR trap handler and kernel IB > >> * for now). > >> @@ -319,8 +290,8 @@ static void kfd_init_apertures_vi(struct > >> kfd_process_device *pdd, uint8_t id) > >> * node id couldn't be 0 - the three MSB bits of > >> * aperture shouldn't be 0 > >> */ > >> - pdd->lds_base = MAKE_LDS_APP_BASE_VI(); > >> - pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); > >> + pdd->lds_base = AMDGPU_VM_MAKE_LDS_APP_BASE_VI(); > >> + pdd->lds_limit = AMDGPU_VM_MAKE_LDS_APP_LIMIT(pdd->lds_base); > >> > >> /* dGPUs: SVM aperture starting at 0 > >> * with small reserved space for kernel. > >> @@ -336,21 +307,21 @@ static void kfd_init_apertures_vi(struct > >> kfd_process_device *pdd, uint8_t id) > >> pdd->qpd.cwsr_base = SVM_CWSR_BASE; > >> pdd->qpd.ib_base = SVM_IB_BASE; > >> > >> - pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI(); > >> - pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); > >> + pdd->scratch_base = AMDGPU_VM_MAKE_SCRATCH_APP_BASE_VI(); > >> + pdd->scratch_limit = > >> AMDGPU_VM_MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); > >> } > >> > >> static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t > >> id) > >> { > >> - pdd->lds_base = MAKE_LDS_APP_BASE_V9(); > >> - pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); > >> + pdd->lds_base = AMDGPU_VM_MAKE_LDS_APP_BASE_V9(); > >> + pdd->lds_limit = AMDGPU_VM_MAKE_LDS_APP_LIMIT(pdd->lds_base); > >> > >> pdd->gpuvm_base = AMDGPU_VA_RESERVED_BOTTOM; > >> pdd->gpuvm_limit = > >> pdd->dev->kfd->shared_resources.gpuvm_size - 1; > >> > >> - pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9(); > >> - pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); > >> + pdd->scratch_base = AMDGPU_VM_MAKE_SCRATCH_APP_BASE_V9(); > >> + pdd->scratch_limit = > >> AMDGPU_VM_MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); > >> > >> /* > >> * Place TBA/TMA on opposite side of VM hole to prevent > >> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h > >> index 66c4a03ac9f9..f285e9325d06 100644 > >> --- a/include/uapi/drm/amdgpu_drm.h > >> +++ b/include/uapi/drm/amdgpu_drm.h > >> @@ -1477,6 +1477,12 @@ struct drm_amdgpu_info_device { > >> /* Userq IP mask (1 << AMDGPU_HW_IP_*) */ > >> __u32 userq_ip_mask; > >> __u32 pad; > >> + > >> + /* Additional fields for memory aperture information */ > >> + __u64 lds_base; /* LDS base */ > >> + __u64 lds_limit; /* LDS limit */ > >> + __u64 scratch_base; /* Scratch base */ > >> + __u64 scratch_limit; /* Scratch limit */ > >> }; > >> > >> struct drm_amdgpu_info_hw_ip { > >> -- > >> 2.34.1 > >> >