On 03/12/2025 09:01, Boris Brezillon wrote:
> This will be used by the UMD to synchronize CPU-cached mappings when
> the UMD can't do it directly (no usermode cache maintenance instruction
> on Arm32).
> 
> v2:
> - Change the flags so they better match the drm_gem_shmem_sync()
>   semantics
> 
> v3:
> - Add Steve's R-b
> 
> v4:
> - No changes
> 
> v5:
> - Drop Steve's R-b (the semantics changes call for a new review)
> 
> v6:
> - Drop ret initialization in panthor_ioctl_bo_sync()
> - Bail out early in panthor_ioctl_bo_sync() if ops.count is zero
> - Drop unused PANTHOR_BO_SYNC_OP_FLAGS definition
> 
> v7:
> - Hand-roll the sync logic (was previously provided by gem_shmem)
> 
> Signed-off-by: Faith Ekstrand <[email protected]>
> Signed-off-by: Boris Brezillon <[email protected]>

Reviewed-by: Steven Price <[email protected]>

> ---
>  drivers/gpu/drm/panthor/panthor_drv.c | 41 ++++++++++++-
>  drivers/gpu/drm/panthor/panthor_gem.c | 85 +++++++++++++++++++++++++++
>  drivers/gpu/drm/panthor/panthor_gem.h |  2 +
>  include/uapi/drm/panthor_drm.h        | 52 ++++++++++++++++
>  4 files changed, 179 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_drv.c 
> b/drivers/gpu/drm/panthor/panthor_drv.c
> index d12ac4cb0ac4..cab19621917f 100644
> --- a/drivers/gpu/drm/panthor/panthor_drv.c
> +++ b/drivers/gpu/drm/panthor/panthor_drv.c
> @@ -177,7 +177,8 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array 
> *in, u32 min_stride,
>                PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), 
> \
>                PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \
>                PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, 
> ringbuf_size), \
> -              PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs))
> +              PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs), \
> +              PANTHOR_UOBJ_DECL(struct drm_panthor_bo_sync_op, size))
>  
>  /**
>   * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object.
> @@ -1396,6 +1397,43 @@ static int panthor_ioctl_set_user_mmio_offset(struct 
> drm_device *ddev,
>       return 0;
>  }
>  
> +static int panthor_ioctl_bo_sync(struct drm_device *ddev, void *data,
> +                              struct drm_file *file)
> +{
> +     struct drm_panthor_bo_sync *args = data;
> +     struct drm_panthor_bo_sync_op *ops;
> +     struct drm_gem_object *obj;
> +     int ret;
> +
> +     if (!args->ops.count)
> +             return 0;
> +
> +     ret = PANTHOR_UOBJ_GET_ARRAY(ops, &args->ops);
> +     if (ret)
> +             return ret;
> +
> +     for (u32 i = 0; i < args->ops.count; i++) {
> +             obj = drm_gem_object_lookup(file, ops[i].handle);
> +             if (!obj) {
> +                     ret = -ENOENT;
> +                     goto err_ops;
> +             }
> +
> +             ret = panthor_gem_sync(obj, ops[i].type, ops[i].offset,
> +                                    ops[i].size);
> +
> +             drm_gem_object_put(obj);
> +
> +             if (ret)
> +                     goto err_ops;
> +     }
> +
> +err_ops:
> +     kvfree(ops);
> +
> +     return ret;
> +}
> +
>  static int
>  panthor_open(struct drm_device *ddev, struct drm_file *file)
>  {
> @@ -1470,6 +1508,7 @@ static const struct drm_ioctl_desc 
> panthor_drm_driver_ioctls[] = {
>       PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW),
>       PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW),
>       PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, 
> DRM_RENDER_ALLOW),
> +     PANTHOR_IOCTL(BO_SYNC, bo_sync, DRM_RENDER_ALLOW),
>  };
>  
>  static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
> diff --git a/drivers/gpu/drm/panthor/panthor_gem.c 
> b/drivers/gpu/drm/panthor/panthor_gem.c
> index 173d42d65000..4be32fc1732b 100644
> --- a/drivers/gpu/drm/panthor/panthor_gem.c
> +++ b/drivers/gpu/drm/panthor/panthor_gem.c
> @@ -447,6 +447,91 @@ panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo 
> *bo, const char *label)
>       panthor_gem_bo_set_label(bo->obj, str);
>  }
>  
> +int
> +panthor_gem_sync(struct drm_gem_object *obj, u32 type,
> +              u64 offset, u64 size)
> +{
> +     struct panthor_gem_object *bo = to_panthor_bo(obj);
> +     struct drm_gem_shmem_object *shmem = &bo->base;
> +     const struct drm_device *dev = shmem->base.dev;
> +     struct sg_table *sgt;
> +     struct scatterlist *sgl;
> +     unsigned int count;
> +
> +     /* Make sure the range is in bounds. */
> +     if (offset + size < offset || offset + size > shmem->base.size)
> +             return -EINVAL;
> +
> +     /* Disallow CPU-cache maintenance on imported buffers. */
> +     if (drm_gem_is_imported(&shmem->base))
> +             return -EINVAL;
> +
> +     switch (type) {
> +     case DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH:
> +     case DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE:
> +             break;
> +
> +     default:
> +             return -EINVAL;
> +     }
> +
> +     /* Don't bother if it's WC-mapped */
> +     if (shmem->map_wc)
> +             return 0;
> +
> +     /* Nothing to do if the size is zero. */
> +     if (size == 0)
> +             return 0;
> +
> +     sgt = drm_gem_shmem_get_pages_sgt(shmem);
> +     if (IS_ERR(sgt))
> +             return PTR_ERR(sgt);
> +
> +     for_each_sgtable_dma_sg(sgt, sgl, count) {
> +             if (size == 0)
> +                     break;
> +
> +             dma_addr_t paddr = sg_dma_address(sgl);
> +             size_t len = sg_dma_len(sgl);
> +
> +             if (len <= offset) {
> +                     offset -= len;
> +                     continue;
> +             }
> +
> +             paddr += offset;
> +             len -= offset;
> +             len = min_t(size_t, len, size);
> +             size -= len;
> +             offset = 0;
> +
> +             /* It's unclear whether dma_sync_xxx() is the right API to do 
> CPU
> +              * cache maintenance given an IOMMU can register their own
> +              * implementation doing more than just CPU cache 
> flushes/invalidation,
> +              * and what we really care about here is CPU caches only, but 
> that's
> +              * the best we have that is both arch-agnostic and does at 
> least the
> +              * CPU cache maintenance on a <page,offset,size> tuple.
> +              *
> +              * Also, I wish we could do a single
> +              *
> +              *      dma_sync_single_for_device(BIDIR)
> +              *
> +              * and get a flush+invalidate, but that's not how it's 
> implemented
> +              * in practice (at least on arm64), so we have to make it
> +              *
> +              *      dma_sync_single_for_device(TO_DEVICE)
> +              *      dma_sync_single_for_cpu(FROM_DEVICE)
> +              *
> +              * for the flush+invalidate case.
> +              */
> +             dma_sync_single_for_device(dev->dev, paddr, len, DMA_TO_DEVICE);
> +             if (type == DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE)
> +                     dma_sync_single_for_cpu(dev->dev, paddr, len, 
> DMA_FROM_DEVICE);
> +     }
> +
> +     return 0;
> +}
> +
>  #ifdef CONFIG_DEBUG_FS
>  struct gem_size_totals {
>       size_t size;
> diff --git a/drivers/gpu/drm/panthor/panthor_gem.h 
> b/drivers/gpu/drm/panthor/panthor_gem.h
> index 91d1880f8a5d..bbf9ae75c360 100644
> --- a/drivers/gpu/drm/panthor/panthor_gem.h
> +++ b/drivers/gpu/drm/panthor/panthor_gem.h
> @@ -146,6 +146,8 @@ panthor_gem_create_with_handle(struct drm_file *file,
>  
>  void panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label);
>  void panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const 
> char *label);
> +int panthor_gem_sync(struct drm_gem_object *obj,
> +                  u32 type, u64 offset, u64 size);
>  
>  struct drm_gem_object *
>  panthor_gem_prime_import(struct drm_device *dev,
> diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
> index 28cf9e878db6..9f810305db6e 100644
> --- a/include/uapi/drm/panthor_drm.h
> +++ b/include/uapi/drm/panthor_drm.h
> @@ -144,6 +144,9 @@ enum drm_panthor_ioctl_id {
>        * pgoff_t size.
>        */
>       DRM_PANTHOR_SET_USER_MMIO_OFFSET,
> +
> +     /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */
> +     DRM_PANTHOR_BO_SYNC,
>  };
>  
>  /**
> @@ -1073,6 +1076,53 @@ struct drm_panthor_set_user_mmio_offset {
>       __u64 offset;
>  };
>  
> +/**
> + * enum drm_panthor_bo_sync_op_type - BO sync type
> + */
> +enum drm_panthor_bo_sync_op_type {
> +     /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH: Flush CPU caches. */
> +     DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH = 0,
> +
> +     /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: Flush and 
> invalidate CPU caches. */
> +     DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE = 1,
> +};
> +
> +/**
> + * struct drm_panthor_bo_sync_op - BO map sync op
> + */
> +struct drm_panthor_bo_sync_op {
> +     /** @handle: Handle of the buffer object to sync. */
> +     __u32 handle;
> +
> +     /** @type: Type of operation. */
> +     __u32 type;
> +
> +     /**
> +      * @offset: Offset into the BO at which the sync range starts.
> +      *
> +      * This will be rounded down to the nearest cache line as needed.
> +      */
> +     __u64 offset;
> +
> +     /**
> +      * @size: Size of the range to sync
> +      *
> +      * @size + @offset will be rounded up to the nearest cache line as
> +      * needed.
> +      */
> +     __u64 size;
> +};
> +
> +/**
> + * struct drm_panthor_bo_sync - BO map sync request
> + */
> +struct drm_panthor_bo_sync {
> +     /**
> +      * @ops: Array of struct drm_panthor_bo_sync_op sync operations.
> +      */
> +     struct drm_panthor_obj_array ops;
> +};
> +
>  /**
>   * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number
>   * @__access: Access type. Must be R, W or RW.
> @@ -1119,6 +1169,8 @@ enum {
>               DRM_IOCTL_PANTHOR(WR, BO_SET_LABEL, bo_set_label),
>       DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET =
>               DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, 
> set_user_mmio_offset),
> +     DRM_IOCTL_PANTHOR_BO_SYNC =
> +             DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync),
>  };
>  
>  #if defined(__cplusplus)

Reply via email to