On 27/05/2026 12:46, Christian König wrote:
On 5/26/26 18:06, Tvrtko Ursulin wrote:
Saving and restoring buffer object content poses a challenge for the
checkpoint and restore process for at least two reasons.

For example not all objects can be exported as dma-buf to enable copying
from a separate client context, neither can any objects be easily copied
from the same context since the injected CRIU code is unaware of the GPU
virtual memory free and allocated ranges.

Lets bypass both problems by simply exposing access to the alrady present
fast kernel copy via a new DRM_IOCTL_AMDGPU_GEM_COPY_BUFFER ioctl.

Oh nice, that was on the TODO list for KFD integration as well.

That's good to hear! I was worried this one could be shot down in flames.

By giving the kernel simply the source and destination handles it is able
to copy them without the need for objects to be mapped, or shared with a
separate client.

For now this is implemented fully synchronous but can easily be extended
for more parallelsim.

Looks reasonable of hand but I would rather like to have an output DMA-fence 
(drm_syncobj) instead of the dma_fence_wait().

I opted for simplicity for the RFC but I can certainly add an output fence. I guess that way userspace could implement parallel/pipelined save/restore for maximum performance.

Marking as TODO for RFC v2.

Regards,

Tvrtko

Signed-off-by: Tvrtko Ursulin <[email protected]>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 102 ++++++++++++++++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  14 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |   7 ++
  include/uapi/drm/amdgpu_drm.h           |   8 ++
  6 files changed, 127 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 2b971de3c189..b927e3a3089e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -3066,6 +3066,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
        DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_HANDLES, 
amdgpu_gem_list_handles_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_CONTEXTS, 
amdgpu_gem_list_contexts_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF_DRV(AMDGPU_GEM_COPY_BUFFER, amdgpu_gem_copy_buffer_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
  };
static const struct drm_driver amdgpu_kms_driver = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 5eb8433229b4..4e0440cd6ee2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -1216,6 +1216,108 @@ int amdgpu_gem_list_handles_ioctl(struct drm_device 
*dev, void *data,
        return ret;
  }
+/**
+ * amdgpu_gem_copy_buffer_ioctl - copy buffer object content
+ *
+ * @dev: drm device pointer
+ * @data: drm_amdgpu_gem_copy_buffer
+ * @filp: drm file pointer
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
+int amdgpu_gem_copy_buffer_ioctl(struct drm_device *dev, void *data,
+                                struct drm_file *filp)
+{
+       struct amdgpu_copy_mem src_mem = {}, dst_mem = {};
+       struct drm_amdgpu_gem_copy_buffer *args = data;
+       struct amdgpu_device *adev = drm_to_adev(dev);
+       struct drm_gem_object *src_gobj, *dst_gobj;
+       struct amdgpu_bo *src_bo, *dst_bo;
+       struct dma_fence *fence = NULL;
+       struct drm_exec exec;
+       unsigned int e;
+       long timeout;
+       int r;
+
+       if (args->flags)
+               return -EINVAL;
+
+       src_gobj = drm_gem_object_lookup(filp, args->src_handle);
+       if (!src_gobj)
+               return -ENOENT;
+
+       dst_gobj = drm_gem_object_lookup(filp, args->dst_handle);
+       if (!dst_gobj) {
+               r = -ENOENT;
+               goto err_dst;
+       }
+
+       src_bo = gem_to_amdgpu_bo(src_gobj);
+       dst_bo = gem_to_amdgpu_bo(dst_gobj);
+
+       if (amdgpu_bo_size(src_bo) < amdgpu_bo_size(dst_bo)) {
+               r = -E2BIG;
+               goto err_sizes;
+       }
+
+       drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+
+       drm_exec_until_all_locked(&exec) {
+               r = drm_exec_prepare_obj(&exec, &src_bo->tbo.base, 1);
+               drm_exec_retry_on_contention(&exec);
+               if (r)
+                       goto err_unlock;
+
+               r = drm_exec_prepare_obj(&exec, &dst_bo->tbo.base, 1);
+               drm_exec_retry_on_contention(&exec);
+               if (r)
+                       goto err_unlock;
+       }
+
+       src_mem.bo = &src_bo->tbo;
+       src_mem.mem = src_bo->tbo.resource;
+       dst_mem.bo = &dst_bo->tbo;
+       dst_mem.mem = dst_bo->tbo.resource;
+       e = atomic_inc_return(&adev->mman.next_move_entity) %
+                             adev->mman.num_move_entities;
+       r = amdgpu_ttm_copy_mem_to_mem(adev, &adev->mman.move_entities[e],
+                                      &src_mem, &dst_mem,
+                                      amdgpu_bo_size(src_bo),
+                                      amdgpu_bo_encrypted(src_bo),
+                                      dst_bo->tbo.base.resv, &fence);
+       if (r) {
+               goto err_unlock;
+       } else if (!fence) {
+               r = -ENXIO;
+               goto err_unlock;
+       }
+
+       dma_resv_add_fence(src_bo->tbo.base.resv, fence, DMA_RESV_USAGE_READ);
+       dma_resv_add_fence(dst_bo->tbo.base.resv, fence, DMA_RESV_USAGE_WRITE);
+       dma_fence_put(fence);
+       drm_exec_fini(&exec);
+
+       timeout = dma_resv_wait_timeout(dst_bo->tbo.base.resv,
+                                       DMA_RESV_USAGE_WRITE, true,
+                                       MAX_SCHEDULE_TIMEOUT);
+       if (timeout < 0)
+               return timeout;
+       else if (timeout == 0)
+               return -ETIME;
+       else
+               goto out_put;
+
+err_unlock:
+       drm_exec_fini(&exec);
+out_put:
+err_sizes:
+       drm_gem_object_put(dst_gobj);
+err_dst:
+       drm_gem_object_put(src_gobj);
+       return r;
+}
+
  static int amdgpu_gem_align_pitch(struct amdgpu_device *adev,
                                  int width,
                                  int cpp,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index 0e17d9fc665f..64529680fdad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -71,6 +71,8 @@ int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, 
void *data,
                                  struct drm_file *filp);
  int amdgpu_gem_list_contexts_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *filp);
+int amdgpu_gem_copy_buffer_ioctl(struct drm_device *dev, void *data,
+                                struct drm_file *filp);
int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
                                struct drm_file *filp);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4023c84fe29d..3baad073ed54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -294,13 +294,13 @@ static int amdgpu_ttm_map_buffer(struct 
amdgpu_ttm_buffer_entity *entity,
   *
   */
  __attribute__((nonnull))
-static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
-                                     struct amdgpu_ttm_buffer_entity *entity,
-                                     const struct amdgpu_copy_mem *src,
-                                     const struct amdgpu_copy_mem *dst,
-                                     uint64_t size, bool tmz,
-                                     struct dma_resv *resv,
-                                     struct dma_fence **f)
+int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+                              struct amdgpu_ttm_buffer_entity *entity,
+                              const struct amdgpu_copy_mem *src,
+                              const struct amdgpu_copy_mem *dst,
+                              uint64_t size, bool tmz,
+                              struct dma_resv *resv,
+                              struct dma_fence **f)
  {
        struct amdgpu_res_cursor src_mm, dst_mm;
        struct dma_fence *fence = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 8c0424577dea..360a860b8280 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -286,4 +286,11 @@ void amdgpu_ttm_mmio_remap_free_sgt(struct device *dev,
                                    enum dma_data_direction dir,
                                    struct sg_table *sgt);
+int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+                              struct amdgpu_ttm_buffer_entity *entity,
+                              const struct amdgpu_copy_mem *src,
+                              const struct amdgpu_copy_mem *dst,
+                              uint64_t size, bool tmz,
+                              struct dma_resv *resv,
+                              struct dma_fence **f);
  #endif
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index d2a7b4e9daed..1335a181eb39 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -59,6 +59,7 @@ extern "C" {
  #define DRM_AMDGPU_USERQ_WAIT         0x18
  #define DRM_AMDGPU_GEM_LIST_HANDLES   0x19
  #define DRM_AMDGPU_GEM_LIST_CONTEXTS  0x20
+#define DRM_AMDGPU_GEM_COPY_BUFFER     0x21
#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
  #define DRM_IOCTL_AMDGPU_GEM_MMAP     DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -81,6 +82,7 @@ extern "C" {
  #define DRM_IOCTL_AMDGPU_USERQ_WAIT   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
  #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
  #define DRM_IOCTL_AMDGPU_GEM_LIST_CONTEXTS DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_LIST_CONTEXTS, struct drm_amdgpu_gem_list_contexts)
+#define DRM_IOCTL_AMDGPU_GEM_COPY_BUFFER DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_COPY_BUFFER, struct drm_amdgpu_gem_copy_buffer)
/**
   * DOC: memory domains
@@ -206,6 +208,12 @@ union drm_amdgpu_gem_create {
        struct drm_amdgpu_gem_create_out        out;
  };
+struct drm_amdgpu_gem_copy_buffer {
+       __u32   src_handle;
+       __u32   dst_handle;
+       __u64   flags;
+};
+
  /** Opcode to create new residency list.  */
  #define AMDGPU_BO_LIST_OP_CREATE      0
  /** Opcode to destroy previously created residency list */


Reply via email to