On 2024-10-11 16:22, Philip Yang wrote:
> Process device data pdd->vram_usage is read by rocm-smi via sysfs, this
> is currently missing the svm_bo usage accounting, so "rocm-smi
> --showpids" per process VRAM usage report is incorrect.
> 
> Add pdd->vram_usage accounting when svm_bo allocation and release,
> change to atomic64_t type because it is updated outside process mutex
> now.
> 
> Signed-off-by: Philip Yang <[email protected]>

Reviewed-by: Felix Kuehling <[email protected]>

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  6 +++---
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c |  4 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 26 ++++++++++++++++++++++++
>  4 files changed, 32 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index a1f191a5984b..065d87841459 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1148,7 +1148,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
> *filep,
>  
>               if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
>                       size >>= 1;
> -             WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
> +             atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
>       }
>  
>       mutex_unlock(&p->mutex);
> @@ -1219,7 +1219,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file 
> *filep,
>               kfd_process_device_remove_obj_handle(
>                       pdd, GET_IDR_HANDLE(args->handle));
>  
> -     WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
> +     atomic64_sub(size, &pdd->vram_usage);
>  
>  err_unlock:
>  err_pdd:
> @@ -2347,7 +2347,7 @@ static int criu_restore_memory_of_gpu(struct 
> kfd_process_device *pdd,
>       } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>               bo_bucket->restored_offset = offset;
>               /* Update the VRAM usage count */
> -             WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
> +             atomic64_add(bo_bucket->size, &pdd->vram_usage);
>       }
>       return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 6a5bf88cc232..9e5ca0b93b2a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -775,7 +775,7 @@ struct kfd_process_device {
>       enum kfd_pdd_bound bound;
>  
>       /* VRAM usage */
> -     uint64_t vram_usage;
> +     atomic64_t vram_usage;
>       struct attribute attr_vram;
>       char vram_filename[MAX_SYSFS_FILENAME_LEN];
>  
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 7909dfd158be..4810521736a9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -332,7 +332,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, 
> struct attribute *attr,
>       } else if (strncmp(attr->name, "vram_", 5) == 0) {
>               struct kfd_process_device *pdd = container_of(attr, struct 
> kfd_process_device,
>                                                             attr_vram);
> -             return snprintf(buffer, PAGE_SIZE, "%llu\n", 
> READ_ONCE(pdd->vram_usage));
> +             return snprintf(buffer, PAGE_SIZE, "%llu\n", 
> atomic64_read(&pdd->vram_usage));
>       } else if (strncmp(attr->name, "sdma_", 5) == 0) {
>               struct kfd_process_device *pdd = container_of(attr, struct 
> kfd_process_device,
>                                                             attr_sdma);
> @@ -1625,7 +1625,7 @@ struct kfd_process_device 
> *kfd_create_process_device_data(struct kfd_node *dev,
>       pdd->bound = PDD_UNBOUND;
>       pdd->already_dequeued = false;
>       pdd->runtime_inuse = false;
> -     pdd->vram_usage = 0;
> +     atomic64_set(&pdd->vram_usage, 0);
>       pdd->sdma_past_activity_counter = 0;
>       pdd->user_gpu_id = dev->id;
>       atomic64_set(&pdd->evict_duration_counter, 0);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 857ec6f23bba..3e2911895c74 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -405,6 +405,27 @@ static void svm_range_bo_release(struct kref *kref)
>               spin_lock(&svm_bo->list_lock);
>       }
>       spin_unlock(&svm_bo->list_lock);
> +
> +     if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
> +             struct kfd_process_device *pdd;
> +             struct kfd_process *p;
> +             struct mm_struct *mm;
> +
> +             mm = svm_bo->eviction_fence->mm;
> +             /*
> +              * The forked child process takes svm_bo device pages ref, 
> svm_bo could be
> +              * released after parent process is gone.
> +              */
> +             p = kfd_lookup_process_by_mm(mm);
> +             if (p) {
> +                     pdd = kfd_get_process_device_data(svm_bo->node, p);
> +                     if (pdd)
> +                             atomic64_sub(amdgpu_bo_size(svm_bo->bo), 
> &pdd->vram_usage);
> +                     kfd_unref_process(p);
> +             }
> +             mmput(mm);
> +     }
> +
>       if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
>               /* We're not in the eviction worker. Signal the fence. */
>               dma_fence_signal(&svm_bo->eviction_fence->base);
> @@ -532,6 +553,7 @@ int
>  svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
>                       bool clear)
>  {
> +     struct kfd_process_device *pdd;
>       struct amdgpu_bo_param bp;
>       struct svm_range_bo *svm_bo;
>       struct amdgpu_bo_user *ubo;
> @@ -623,6 +645,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct 
> svm_range *prange,
>       list_add(&prange->svm_bo_list, &svm_bo->range_list);
>       spin_unlock(&svm_bo->list_lock);
>  
> +     pdd = svm_range_get_pdd_by_node(prange, node);
> +     if (pdd)
> +             atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
> +
>       return 0;
>  
>  reserve_bo_failed:

Reply via email to