On 5/26/26 18:06, Tvrtko Ursulin wrote:
> Saving and restoring buffer object content poses a challenge for the
> checkpoint and restore process for at least two reasons.
> 
> For example not all objects can be exported as dma-buf to enable copying
> from a separate client context, neither can any objects be easily copied
> from the same context since the injected CRIU code is unaware of the GPU
> virtual memory free and allocated ranges.
> 
> Lets bypass both problems by simply exposing access to the alrady present
> fast kernel copy via a new DRM_IOCTL_AMDGPU_GEM_COPY_BUFFER ioctl.

Oh nice, that was on the TODO list for KFD integration as well.

> By giving the kernel simply the source and destination handles it is able
> to copy them without the need for objects to be mapped, or shared with a
> separate client.
> 
> For now this is implemented fully synchronous but can easily be extended
> for more parallelsim.

Looks reasonable of hand but I would rather like to have an output DMA-fence 
(drm_syncobj) instead of the dma_fence_wait().

Regards,
Christian.

> 
> Signed-off-by: Tvrtko Ursulin <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 102 ++++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h |   2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  14 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |   7 ++
>  include/uapi/drm/amdgpu_drm.h           |   8 ++
>  6 files changed, 127 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 2b971de3c189..b927e3a3089e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -3066,6 +3066,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
>       DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, 
> DRM_AUTH|DRM_RENDER_ALLOW),
>       DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_HANDLES, 
> amdgpu_gem_list_handles_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>       DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_CONTEXTS, 
> amdgpu_gem_list_contexts_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> +     DRM_IOCTL_DEF_DRV(AMDGPU_GEM_COPY_BUFFER, amdgpu_gem_copy_buffer_ioctl, 
> DRM_AUTH|DRM_RENDER_ALLOW),
>  };
>  
>  static const struct drm_driver amdgpu_kms_driver = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 5eb8433229b4..4e0440cd6ee2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -1216,6 +1216,108 @@ int amdgpu_gem_list_handles_ioctl(struct drm_device 
> *dev, void *data,
>       return ret;
>  }
>  
> +/**
> + * amdgpu_gem_copy_buffer_ioctl - copy buffer object content
> + *
> + * @dev: drm device pointer
> + * @data: drm_amdgpu_gem_copy_buffer
> + * @filp: drm file pointer
> + *
> + * Returns:
> + * 0 for success, -errno for errors.
> + */
> +int amdgpu_gem_copy_buffer_ioctl(struct drm_device *dev, void *data,
> +                              struct drm_file *filp)
> +{
> +     struct amdgpu_copy_mem src_mem = {}, dst_mem = {};
> +     struct drm_amdgpu_gem_copy_buffer *args = data;
> +     struct amdgpu_device *adev = drm_to_adev(dev);
> +     struct drm_gem_object *src_gobj, *dst_gobj;
> +     struct amdgpu_bo *src_bo, *dst_bo;
> +     struct dma_fence *fence = NULL;
> +     struct drm_exec exec;
> +     unsigned int e;
> +     long timeout;
> +     int r;
> +
> +     if (args->flags)
> +             return -EINVAL;
> +
> +     src_gobj = drm_gem_object_lookup(filp, args->src_handle);
> +     if (!src_gobj)
> +             return -ENOENT;
> +
> +     dst_gobj = drm_gem_object_lookup(filp, args->dst_handle);
> +     if (!dst_gobj) {
> +             r = -ENOENT;
> +             goto err_dst;
> +     }
> +
> +     src_bo = gem_to_amdgpu_bo(src_gobj);
> +     dst_bo = gem_to_amdgpu_bo(dst_gobj);
> +
> +     if (amdgpu_bo_size(src_bo) < amdgpu_bo_size(dst_bo)) {
> +             r = -E2BIG;
> +             goto err_sizes;
> +     }
> +
> +     drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> +
> +     drm_exec_until_all_locked(&exec) {
> +             r = drm_exec_prepare_obj(&exec, &src_bo->tbo.base, 1);
> +             drm_exec_retry_on_contention(&exec);
> +             if (r)
> +                     goto err_unlock;
> +
> +             r = drm_exec_prepare_obj(&exec, &dst_bo->tbo.base, 1);
> +             drm_exec_retry_on_contention(&exec);
> +             if (r)
> +                     goto err_unlock;
> +     }
> +
> +     src_mem.bo = &src_bo->tbo;
> +     src_mem.mem = src_bo->tbo.resource;
> +     dst_mem.bo = &dst_bo->tbo;
> +     dst_mem.mem = dst_bo->tbo.resource;
> +     e = atomic_inc_return(&adev->mman.next_move_entity) %
> +                           adev->mman.num_move_entities;
> +     r = amdgpu_ttm_copy_mem_to_mem(adev, &adev->mman.move_entities[e],
> +                                    &src_mem, &dst_mem,
> +                                    amdgpu_bo_size(src_bo),
> +                                    amdgpu_bo_encrypted(src_bo),
> +                                    dst_bo->tbo.base.resv, &fence);
> +     if (r) {
> +             goto err_unlock;
> +     } else if (!fence) {
> +             r = -ENXIO;
> +             goto err_unlock;
> +     }
> +
> +     dma_resv_add_fence(src_bo->tbo.base.resv, fence, DMA_RESV_USAGE_READ);
> +     dma_resv_add_fence(dst_bo->tbo.base.resv, fence, DMA_RESV_USAGE_WRITE);
> +     dma_fence_put(fence);
> +     drm_exec_fini(&exec);
> +
> +     timeout = dma_resv_wait_timeout(dst_bo->tbo.base.resv,
> +                                     DMA_RESV_USAGE_WRITE, true,
> +                                     MAX_SCHEDULE_TIMEOUT);
> +     if (timeout < 0)
> +             return timeout;
> +     else if (timeout == 0)
> +             return -ETIME;
> +     else
> +             goto out_put;
> +
> +err_unlock:
> +     drm_exec_fini(&exec);
> +out_put:
> +err_sizes:
> +     drm_gem_object_put(dst_gobj);
> +err_dst:
> +     drm_gem_object_put(src_gobj);
> +     return r;
> +}
> +
>  static int amdgpu_gem_align_pitch(struct amdgpu_device *adev,
>                                 int width,
>                                 int cpp,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
> index 0e17d9fc665f..64529680fdad 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
> @@ -71,6 +71,8 @@ int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, 
> void *data,
>                                 struct drm_file *filp);
>  int amdgpu_gem_list_contexts_ioctl(struct drm_device *dev, void *data,
>                                  struct drm_file *filp);
> +int amdgpu_gem_copy_buffer_ioctl(struct drm_device *dev, void *data,
> +                              struct drm_file *filp);
>  
>  int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
>                               struct drm_file *filp);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 4023c84fe29d..3baad073ed54 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -294,13 +294,13 @@ static int amdgpu_ttm_map_buffer(struct 
> amdgpu_ttm_buffer_entity *entity,
>   *
>   */
>  __attribute__((nonnull))
> -static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> -                                   struct amdgpu_ttm_buffer_entity *entity,
> -                                   const struct amdgpu_copy_mem *src,
> -                                   const struct amdgpu_copy_mem *dst,
> -                                   uint64_t size, bool tmz,
> -                                   struct dma_resv *resv,
> -                                   struct dma_fence **f)
> +int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> +                            struct amdgpu_ttm_buffer_entity *entity,
> +                            const struct amdgpu_copy_mem *src,
> +                            const struct amdgpu_copy_mem *dst,
> +                            uint64_t size, bool tmz,
> +                            struct dma_resv *resv,
> +                            struct dma_fence **f)
>  {
>       struct amdgpu_res_cursor src_mm, dst_mm;
>       struct dma_fence *fence = NULL;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 8c0424577dea..360a860b8280 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -286,4 +286,11 @@ void amdgpu_ttm_mmio_remap_free_sgt(struct device *dev,
>                                   enum dma_data_direction dir,
>                                   struct sg_table *sgt);
>  
> +int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> +                            struct amdgpu_ttm_buffer_entity *entity,
> +                            const struct amdgpu_copy_mem *src,
> +                            const struct amdgpu_copy_mem *dst,
> +                            uint64_t size, bool tmz,
> +                            struct dma_resv *resv,
> +                            struct dma_fence **f);
>  #endif
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index d2a7b4e9daed..1335a181eb39 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -59,6 +59,7 @@ extern "C" {
>  #define DRM_AMDGPU_USERQ_WAIT                0x18
>  #define DRM_AMDGPU_GEM_LIST_HANDLES  0x19
>  #define DRM_AMDGPU_GEM_LIST_CONTEXTS 0x20
> +#define DRM_AMDGPU_GEM_COPY_BUFFER   0x21
>  
>  #define DRM_IOCTL_AMDGPU_GEM_CREATE  DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>  #define DRM_IOCTL_AMDGPU_GEM_MMAP    DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -81,6 +82,7 @@ extern "C" {
>  #define DRM_IOCTL_AMDGPU_USERQ_WAIT  DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>  #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>  #define DRM_IOCTL_AMDGPU_GEM_LIST_CONTEXTS DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_LIST_CONTEXTS, struct drm_amdgpu_gem_list_contexts)
> +#define DRM_IOCTL_AMDGPU_GEM_COPY_BUFFER DRM_IOW(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_COPY_BUFFER, struct drm_amdgpu_gem_copy_buffer)
>  
>  /**
>   * DOC: memory domains
> @@ -206,6 +208,12 @@ union drm_amdgpu_gem_create {
>       struct drm_amdgpu_gem_create_out        out;
>  };
>  
> +struct drm_amdgpu_gem_copy_buffer {
> +     __u32   src_handle;
> +     __u32   dst_handle;
> +     __u64   flags;
> +};
> +
>  /** Opcode to create new residency list.  */
>  #define AMDGPU_BO_LIST_OP_CREATE     0
>  /** Opcode to destroy previously created residency list */

Reply via email to