Thanks Mukul.

One last thing I noticed. It seems the update the the vram_usage is
protected by the process->mutex. That means the variable will always be
coherent while that lock is held. However, the sysfs-show function
doesn't hold that lock. So if the compiler decides to break the update
into multiple instructions, or sysfs-show does the read with multiple
instructions, you may see inconsistent results. You can ensure that
doesn't happen by only updating the size atomically with WRITE_ONCE and
only reading it atomically with READ_ONCE in the sysfs-show function.

For reference, this is a good article on the subject on lwn.net: [Who's
afraid of a big bad optimizing compiler?](https://lwn.net/Articles/793253/)

Regards,
  Felix

Am 2020-04-26 um 11:45 p.m. schrieb Mukul Joshi:
> Track GPU VRAM usage on a per process basis and report it through
> sysfs.
>
> v2:
>    - Handle AMDGPU BO-specific details in
>      amdgpu_amdkfd_gpuvm_free_memory_of_gpu().
>    - Return size of VRAM BO being freed from
>      amdgpu_amdkfd_gpuvm_free_memory_of_gpu().
>    - Do not consider imported memory for VRAM
>      usage calculations.
>
> v3:
>    - Move handling of imported BO size from
>      kfd_ioctl_free_memory_of_gpu() to  
>      amdgpu_amdkfd_gpuvm_free_memory_of_gpu().
>
> Signed-off-by: Mukul Joshi <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  3 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 16 +++++-
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 13 ++++-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  7 +++
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 57 ++++++++++++++++---
>  5 files changed, 84 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index d065c50582eb..a501026e829c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -65,6 +65,7 @@ struct kgd_mem {
>       struct amdgpu_sync sync;
>  
>       bool aql_queue;
> +     bool is_imported;
>  };
>  
>  /* KFD Memory Eviction */
> @@ -219,7 +220,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>               void *vm, struct kgd_mem **mem,
>               uint64_t *offset, uint32_t flags);
>  int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> -             struct kgd_dev *kgd, struct kgd_mem *mem);
> +             struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size);
>  int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>               struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
>  int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 0768b7eb7683..1247938b1ec1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1277,7 +1277,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>  }
>  
>  int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> -             struct kgd_dev *kgd, struct kgd_mem *mem)
> +             struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
>  {
>       struct amdkfd_process_info *process_info = mem->process_info;
>       unsigned long bo_size = mem->bo->tbo.mem.size;
> @@ -1286,9 +1286,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>       struct ttm_validate_buffer *bo_list_entry;
>       unsigned int mapped_to_gpu_memory;
>       int ret;
> +     bool is_imported = 0;
>  
>       mutex_lock(&mem->lock);
>       mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
> +     is_imported = mem->is_imported;
>       mutex_unlock(&mem->lock);
>       /* lock is not needed after this, since mem is unused and will
>        * be freed anyway
> @@ -1340,6 +1342,17 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>               kfree(mem->bo->tbo.sg);
>       }
>  
> +     /* Update the size of the BO being freed if it was allocated from
> +      * VRAM and is not imported.
> +      */
> +     if (size) {
> +             if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
> +                 (!is_imported))
> +                     *size = bo_size;
> +             else
> +                     *size = 0;
> +     }
> +
>       /* Free the BO*/
>       amdgpu_bo_unref(&mem->bo);
>       mutex_destroy(&mem->lock);
> @@ -1694,6 +1707,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev 
> *kgd,
>       (*mem)->process_info = avm->process_info;
>       add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
>       amdgpu_sync_create(&(*mem)->sync);
> +     (*mem)->is_imported = true;
>  
>       return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index f8fa03a12add..ede84f76397f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1322,6 +1322,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
> *filep,
>               goto err_free;
>       }
>  
> +     /* Update the VRAM usage count */
> +     if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
> +             pdd->vram_usage += args->size;
> +
>       mutex_unlock(&p->mutex);
>  
>       args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
> @@ -1337,7 +1341,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
> *filep,
>       return 0;
>  
>  err_free:
> -     amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
> +     amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, 
> NULL);
>  err_unlock:
>       mutex_unlock(&p->mutex);
>       return err;
> @@ -1351,6 +1355,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file 
> *filep,
>       void *mem;
>       struct kfd_dev *dev;
>       int ret;
> +     uint64_t size = 0;
>  
>       dev = kfd_device_by_id(GET_GPU_ID(args->handle));
>       if (!dev)
> @@ -1373,7 +1378,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file 
> *filep,
>       }
>  
>       ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
> -                                             (struct kgd_mem *)mem);
> +                                             (struct kgd_mem *)mem, &size);
>  
>       /* If freeing the buffer failed, leave the handle in place for
>        * clean-up during process tear-down.
> @@ -1382,6 +1387,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file 
> *filep,
>               kfd_process_device_remove_obj_handle(
>                       pdd, GET_IDR_HANDLE(args->handle));
>  
> +     pdd->vram_usage -= size;
> +
>  err_unlock:
>       mutex_unlock(&p->mutex);
>       return ret;
> @@ -1726,7 +1733,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
>       return 0;
>  
>  err_free:
> -     amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
> +     amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, 
> NULL);
>  err_unlock:
>       mutex_unlock(&p->mutex);
>       return r;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 43b888b311c7..e7918fc3cee5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -616,6 +616,8 @@ enum kfd_pdd_bound {
>       PDD_BOUND_SUSPENDED,
>  };
>  
> +#define MAX_VRAM_FILENAME_LEN 11
> +
>  /* Data that is per-process-per device. */
>  struct kfd_process_device {
>       /*
> @@ -658,6 +660,11 @@ struct kfd_process_device {
>  
>       /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
>       enum kfd_pdd_bound bound;
> +
> +     /* VRAM usage */
> +     uint64_t vram_usage;
> +     struct attribute attr_vram;
> +     char vram_filename[MAX_VRAM_FILENAME_LEN];
>  };
>  
>  #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index fe0cd49d4ea7..600759949802 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -79,18 +79,22 @@ static struct kfd_procfs_tree procfs;
>  static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
>                              char *buffer)
>  {
> -     int val = 0;
> -
>       if (strcmp(attr->name, "pasid") == 0) {
>               struct kfd_process *p = container_of(attr, struct kfd_process,
>                                                    attr_pasid);
> -             val = p->pasid;
> +
> +             return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
> +     } else if (strncmp(attr->name, "vram_", 5) == 0) {
> +             struct kfd_process_device *pdd = container_of(attr, struct 
> kfd_process_device,
> +                                                           attr_vram);
> +             if (pdd)
> +                     return snprintf(buffer, PAGE_SIZE, "%llu\n", 
> pdd->vram_usage);
>       } else {
>               pr_err("Invalid attribute");
>               return -EINVAL;
>       }
>  
> -     return snprintf(buffer, PAGE_SIZE, "%d\n", val);
> +     return 0;
>  }
>  
>  static void kfd_procfs_kobj_release(struct kobject *kobj)
> @@ -206,6 +210,34 @@ int kfd_procfs_add_queue(struct queue *q)
>       return 0;
>  }
>  
> +int kfd_procfs_add_vram_usage(struct kfd_process *p)
> +{
> +     int ret = 0;
> +     struct kfd_process_device *pdd;
> +
> +     if (!p)
> +             return -EINVAL;
> +
> +     if (!p->kobj)
> +             return -EFAULT;
> +
> +     /* Create proc/<pid>/vram_<gpuid> file for each GPU */
> +     list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
> +             snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u",
> +                      pdd->dev->id);
> +             pdd->attr_vram.name = pdd->vram_filename;
> +             pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
> +             sysfs_attr_init(&pdd->attr_vram);
> +             ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
> +             if (ret)
> +                     pr_warn("Creating vram usage for gpu id %d failed",
> +                             (int)pdd->dev->id);
> +     }
> +
> +     return ret;
> +}
> +
> +
>  void kfd_procfs_del_queue(struct queue *q)
>  {
>       if (!q)
> @@ -248,7 +280,7 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
>       struct kfd_dev *dev = pdd->dev;
>  
>       amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
> -     amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem);
> +     amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
>  }
>  
>  /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
> @@ -312,7 +344,7 @@ static int kfd_process_alloc_gpuvm(struct 
> kfd_process_device *pdd,
>       return err;
>  
>  err_map_mem:
> -     amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem);
> +     amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
>  err_alloc_mem:
>       *kptr = NULL;
>       return err;
> @@ -411,6 +443,11 @@ struct kfd_process *kfd_create_process(struct file 
> *filep)
>                                                       process->kobj);
>               if (!process->kobj_queues)
>                       pr_warn("Creating KFD proc/queues folder failed");
> +
> +             ret = kfd_procfs_add_vram_usage(process);
> +             if (ret)
> +                     pr_warn("Creating vram usage file for pid %d failed",
> +                             (int)process->lead_thread->pid);
>       }
>  out:
>       if (!IS_ERR(process))
> @@ -488,7 +525,7 @@ static void kfd_process_device_free_bos(struct 
> kfd_process_device *pdd)
>                               peer_pdd->dev->kgd, mem, peer_pdd->vm);
>               }
>  
> -             amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem);
> +             amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, 
> NULL);
>               kfd_process_device_remove_obj_handle(pdd, id);
>       }
>  }
> @@ -551,6 +588,7 @@ static void kfd_process_wq_release(struct work_struct 
> *work)
>  {
>       struct kfd_process *p = container_of(work, struct kfd_process,
>                                            release_work);
> +     struct kfd_process_device *pdd;
>  
>       /* Remove the procfs files */
>       if (p->kobj) {
> @@ -558,6 +596,10 @@ static void kfd_process_wq_release(struct work_struct 
> *work)
>               kobject_del(p->kobj_queues);
>               kobject_put(p->kobj_queues);
>               p->kobj_queues = NULL;
> +
> +             list_for_each_entry(pdd, &p->per_device_data, per_device_list)
> +                     sysfs_remove_file(p->kobj, &pdd->attr_vram);
> +
>               kobject_del(p->kobj);
>               kobject_put(p->kobj);
>               p->kobj = NULL;
> @@ -862,6 +904,7 @@ struct kfd_process_device 
> *kfd_create_process_device_data(struct kfd_dev *dev,
>       pdd->bound = PDD_UNBOUND;
>       pdd->already_dequeued = false;
>       pdd->runtime_inuse = false;
> +     pdd->vram_usage = 0;
>       list_add(&pdd->per_device_list, &p->per_device_data);
>  
>       /* Init idr used for memory handle translation */
_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to