Re: [PATCH v3] drm/amdkfd: Track GPU memory utilization per process
Thanks Mukul. One last thing I noticed. It seems the update the the vram_usage is protected by the process->mutex. That means the variable will always be coherent while that lock is held. However, the sysfs-show function doesn't hold that lock. So if the compiler decides to break the update into multiple instructions, or sysfs-show does the read with multiple instructions, you may see inconsistent results. You can ensure that doesn't happen by only updating the size atomically with WRITE_ONCE and only reading it atomically with READ_ONCE in the sysfs-show function. For reference, this is a good article on the subject on lwn.net: [Who's afraid of a big bad optimizing compiler?](https://lwn.net/Articles/793253/) Regards, Felix Am 2020-04-26 um 11:45 p.m. schrieb Mukul Joshi: > Track GPU VRAM usage on a per process basis and report it through > sysfs. > > v2: >- Handle AMDGPU BO-specific details in > amdgpu_amdkfd_gpuvm_free_memory_of_gpu(). >- Return size of VRAM BO being freed from > amdgpu_amdkfd_gpuvm_free_memory_of_gpu(). >- Do not consider imported memory for VRAM > usage calculations. > > v3: >- Move handling of imported BO size from > kfd_ioctl_free_memory_of_gpu() to > amdgpu_amdkfd_gpuvm_free_memory_of_gpu(). > > Signed-off-by: Mukul Joshi > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 3 +- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16 +- > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 13 - > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 +++ > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 57 --- > 5 files changed, 84 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index d065c50582eb..a501026e829c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -65,6 +65,7 @@ struct kgd_mem { > struct amdgpu_sync sync; > > bool aql_queue; > + bool is_imported; > }; > > /* KFD Memory Eviction */ > @@ -219,7 +220,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( > void *vm, struct kgd_mem **mem, > uint64_t *offset, uint32_t flags); > int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > - struct kgd_dev *kgd, struct kgd_mem *mem); > + struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size); > int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( > struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); > int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 0768b7eb7683..1247938b1ec1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -1277,7 +1277,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( > } > > int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > - struct kgd_dev *kgd, struct kgd_mem *mem) > + struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size) > { > struct amdkfd_process_info *process_info = mem->process_info; > unsigned long bo_size = mem->bo->tbo.mem.size; > @@ -1286,9 +1286,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > struct ttm_validate_buffer *bo_list_entry; > unsigned int mapped_to_gpu_memory; > int ret; > + bool is_imported = 0; > > mutex_lock(>lock); > mapped_to_gpu_memory = mem->mapped_to_gpu_memory; > + is_imported = mem->is_imported; > mutex_unlock(>lock); > /* lock is not needed after this, since mem is unused and will >* be freed anyway > @@ -1340,6 +1342,17 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > kfree(mem->bo->tbo.sg); > } > > + /* Update the size of the BO being freed if it was allocated from > + * VRAM and is not imported. > + */ > + if (size) { > + if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) && > + (!is_imported)) > + *size = bo_size; > + else > + *size = 0; > + } > + > /* Free the BO*/ > amdgpu_bo_unref(>bo); > mutex_destroy(>lock); > @@ -1694,6 +1707,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev > *kgd, > (*mem)->process_info = avm->process_info; > add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); > amdgpu_sync_create(&(*mem)->sync); > + (*mem)->is_imported = true; > > return 0; > } > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index f8fa03a12add..ede84f76397f 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -1322,6 +1322,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file > *filep, > goto err_free; > }
[PATCH v3] drm/amdkfd: Track GPU memory utilization per process
Track GPU VRAM usage on a per process basis and report it through sysfs. v2: - Handle AMDGPU BO-specific details in amdgpu_amdkfd_gpuvm_free_memory_of_gpu(). - Return size of VRAM BO being freed from amdgpu_amdkfd_gpuvm_free_memory_of_gpu(). - Do not consider imported memory for VRAM usage calculations. v3: - Move handling of imported BO size from kfd_ioctl_free_memory_of_gpu() to amdgpu_amdkfd_gpuvm_free_memory_of_gpu(). Signed-off-by: Mukul Joshi --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 3 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 13 - drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 +++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 57 --- 5 files changed, 84 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index d065c50582eb..a501026e829c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -65,6 +65,7 @@ struct kgd_mem { struct amdgpu_sync sync; bool aql_queue; + bool is_imported; }; /* KFD Memory Eviction */ @@ -219,7 +220,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( void *vm, struct kgd_mem **mem, uint64_t *offset, uint32_t flags); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem); + struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size); int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 0768b7eb7683..1247938b1ec1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1277,7 +1277,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem) + struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size) { struct amdkfd_process_info *process_info = mem->process_info; unsigned long bo_size = mem->bo->tbo.mem.size; @@ -1286,9 +1286,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct ttm_validate_buffer *bo_list_entry; unsigned int mapped_to_gpu_memory; int ret; + bool is_imported = 0; mutex_lock(>lock); mapped_to_gpu_memory = mem->mapped_to_gpu_memory; + is_imported = mem->is_imported; mutex_unlock(>lock); /* lock is not needed after this, since mem is unused and will * be freed anyway @@ -1340,6 +1342,17 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( kfree(mem->bo->tbo.sg); } + /* Update the size of the BO being freed if it was allocated from +* VRAM and is not imported. +*/ + if (size) { + if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) && + (!is_imported)) + *size = bo_size; + else + *size = 0; + } + /* Free the BO*/ amdgpu_bo_unref(>bo); mutex_destroy(>lock); @@ -1694,6 +1707,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, (*mem)->process_info = avm->process_info; add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); amdgpu_sync_create(&(*mem)->sync); + (*mem)->is_imported = true; return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f8fa03a12add..ede84f76397f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1322,6 +1322,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, goto err_free; } + /* Update the VRAM usage count */ + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) + pdd->vram_usage += args->size; + mutex_unlock(>mutex); args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); @@ -1337,7 +1341,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return 0; err_free: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL); err_unlock: mutex_unlock(>mutex); return err; @@ -1351,6 +1355,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, void *mem; struct kfd_dev *dev; int ret; + uint64_t size = 0; dev = kfd_device_by_id(GET_GPU_ID(args->handle)); if (!dev) @@ -1373,7 +1378,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file