Re: [PATCH v3] drm/amdkfd: Track GPU memory utilization per process

2020-04-27 Thread Felix Kuehling
Thanks Mukul.

One last thing I noticed. It seems the update the the vram_usage is
protected by the process->mutex. That means the variable will always be
coherent while that lock is held. However, the sysfs-show function
doesn't hold that lock. So if the compiler decides to break the update
into multiple instructions, or sysfs-show does the read with multiple
instructions, you may see inconsistent results. You can ensure that
doesn't happen by only updating the size atomically with WRITE_ONCE and
only reading it atomically with READ_ONCE in the sysfs-show function.

For reference, this is a good article on the subject on lwn.net: [Who's
afraid of a big bad optimizing compiler?](https://lwn.net/Articles/793253/)

Regards,
  Felix

Am 2020-04-26 um 11:45 p.m. schrieb Mukul Joshi:
> Track GPU VRAM usage on a per process basis and report it through
> sysfs.
>
> v2:
>- Handle AMDGPU BO-specific details in
>  amdgpu_amdkfd_gpuvm_free_memory_of_gpu().
>- Return size of VRAM BO being freed from
>  amdgpu_amdkfd_gpuvm_free_memory_of_gpu().
>- Do not consider imported memory for VRAM
>  usage calculations.
>
> v3:
>- Move handling of imported BO size from
>  kfd_ioctl_free_memory_of_gpu() to  
>  amdgpu_amdkfd_gpuvm_free_memory_of_gpu().
>
> Signed-off-by: Mukul Joshi 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  3 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 16 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 13 -
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  7 +++
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 57 ---
>  5 files changed, 84 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index d065c50582eb..a501026e829c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -65,6 +65,7 @@ struct kgd_mem {
>   struct amdgpu_sync sync;
>  
>   bool aql_queue;
> + bool is_imported;
>  };
>  
>  /* KFD Memory Eviction */
> @@ -219,7 +220,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   void *vm, struct kgd_mem **mem,
>   uint64_t *offset, uint32_t flags);
>  int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> - struct kgd_dev *kgd, struct kgd_mem *mem);
> + struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size);
>  int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>   struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
>  int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 0768b7eb7683..1247938b1ec1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1277,7 +1277,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>  }
>  
>  int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> - struct kgd_dev *kgd, struct kgd_mem *mem)
> + struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
>  {
>   struct amdkfd_process_info *process_info = mem->process_info;
>   unsigned long bo_size = mem->bo->tbo.mem.size;
> @@ -1286,9 +1286,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   struct ttm_validate_buffer *bo_list_entry;
>   unsigned int mapped_to_gpu_memory;
>   int ret;
> + bool is_imported = 0;
>  
>   mutex_lock(>lock);
>   mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
> + is_imported = mem->is_imported;
>   mutex_unlock(>lock);
>   /* lock is not needed after this, since mem is unused and will
>* be freed anyway
> @@ -1340,6 +1342,17 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   kfree(mem->bo->tbo.sg);
>   }
>  
> + /* Update the size of the BO being freed if it was allocated from
> +  * VRAM and is not imported.
> +  */
> + if (size) {
> + if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
> + (!is_imported))
> + *size = bo_size;
> + else
> + *size = 0;
> + }
> +
>   /* Free the BO*/
>   amdgpu_bo_unref(>bo);
>   mutex_destroy(>lock);
> @@ -1694,6 +1707,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev 
> *kgd,
>   (*mem)->process_info = avm->process_info;
>   add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
>   amdgpu_sync_create(&(*mem)->sync);
> + (*mem)->is_imported = true;
>  
>   return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index f8fa03a12add..ede84f76397f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1322,6 +1322,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
> *filep,
>   goto err_free;
>   }

[PATCH v3] drm/amdkfd: Track GPU memory utilization per process

2020-04-26 Thread Mukul Joshi
Track GPU VRAM usage on a per process basis and report it through
sysfs.

v2:
   - Handle AMDGPU BO-specific details in
 amdgpu_amdkfd_gpuvm_free_memory_of_gpu().
   - Return size of VRAM BO being freed from
 amdgpu_amdkfd_gpuvm_free_memory_of_gpu().
   - Do not consider imported memory for VRAM
 usage calculations.

v3:
   - Move handling of imported BO size from
 kfd_ioctl_free_memory_of_gpu() to  
 amdgpu_amdkfd_gpuvm_free_memory_of_gpu().

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 16 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 13 -
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  7 +++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 57 ---
 5 files changed, 84 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index d065c50582eb..a501026e829c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -65,6 +65,7 @@ struct kgd_mem {
struct amdgpu_sync sync;
 
bool aql_queue;
+   bool is_imported;
 };
 
 /* KFD Memory Eviction */
@@ -219,7 +220,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
void *vm, struct kgd_mem **mem,
uint64_t *offset, uint32_t flags);
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
-   struct kgd_dev *kgd, struct kgd_mem *mem);
+   struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size);
 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 0768b7eb7683..1247938b1ec1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1277,7 +1277,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 }
 
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
-   struct kgd_dev *kgd, struct kgd_mem *mem)
+   struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
 {
struct amdkfd_process_info *process_info = mem->process_info;
unsigned long bo_size = mem->bo->tbo.mem.size;
@@ -1286,9 +1286,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct ttm_validate_buffer *bo_list_entry;
unsigned int mapped_to_gpu_memory;
int ret;
+   bool is_imported = 0;
 
mutex_lock(>lock);
mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
+   is_imported = mem->is_imported;
mutex_unlock(>lock);
/* lock is not needed after this, since mem is unused and will
 * be freed anyway
@@ -1340,6 +1342,17 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
kfree(mem->bo->tbo.sg);
}
 
+   /* Update the size of the BO being freed if it was allocated from
+* VRAM and is not imported.
+*/
+   if (size) {
+   if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
+   (!is_imported))
+   *size = bo_size;
+   else
+   *size = 0;
+   }
+
/* Free the BO*/
amdgpu_bo_unref(>bo);
mutex_destroy(>lock);
@@ -1694,6 +1707,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
(*mem)->process_info = avm->process_info;
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
amdgpu_sync_create(&(*mem)->sync);
+   (*mem)->is_imported = true;
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f8fa03a12add..ede84f76397f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1322,6 +1322,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
goto err_free;
}
 
+   /* Update the VRAM usage count */
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+   pdd->vram_usage += args->size;
+
mutex_unlock(>mutex);
 
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
@@ -1337,7 +1341,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
return 0;
 
 err_free:
-   amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+   amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, 
NULL);
 err_unlock:
mutex_unlock(>mutex);
return err;
@@ -1351,6 +1355,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file 
*filep,
void *mem;
struct kfd_dev *dev;
int ret;
+   uint64_t size = 0;
 
dev = kfd_device_by_id(GET_GPU_ID(args->handle));
if (!dev)
@@ -1373,7 +1378,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file