On 03/12/2025 09:01, Boris Brezillon wrote: > This will be used by the UMD to synchronize CPU-cached mappings when > the UMD can't do it directly (no usermode cache maintenance instruction > on Arm32). > > v2: > - Change the flags so they better match the drm_gem_shmem_sync() > semantics > > v3: > - Add Steve's R-b > > v4: > - No changes > > v5: > - Drop Steve's R-b (the semantics changes call for a new review) > > v6: > - Drop ret initialization in panthor_ioctl_bo_sync() > - Bail out early in panthor_ioctl_bo_sync() if ops.count is zero > - Drop unused PANTHOR_BO_SYNC_OP_FLAGS definition > > v7: > - Hand-roll the sync logic (was previously provided by gem_shmem) > > Signed-off-by: Faith Ekstrand <[email protected]> > Signed-off-by: Boris Brezillon <[email protected]>
Reviewed-by: Steven Price <[email protected]> > --- > drivers/gpu/drm/panthor/panthor_drv.c | 41 ++++++++++++- > drivers/gpu/drm/panthor/panthor_gem.c | 85 +++++++++++++++++++++++++++ > drivers/gpu/drm/panthor/panthor_gem.h | 2 + > include/uapi/drm/panthor_drm.h | 52 ++++++++++++++++ > 4 files changed, 179 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/panthor/panthor_drv.c > b/drivers/gpu/drm/panthor/panthor_drv.c > index d12ac4cb0ac4..cab19621917f 100644 > --- a/drivers/gpu/drm/panthor/panthor_drv.c > +++ b/drivers/gpu/drm/panthor/panthor_drv.c > @@ -177,7 +177,8 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array > *in, u32 min_stride, > PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), > \ > PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \ > PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, > ringbuf_size), \ > - PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs)) > + PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs), \ > + PANTHOR_UOBJ_DECL(struct drm_panthor_bo_sync_op, size)) > > /** > * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object. > @@ -1396,6 +1397,43 @@ static int panthor_ioctl_set_user_mmio_offset(struct > drm_device *ddev, > return 0; > } > > +static int panthor_ioctl_bo_sync(struct drm_device *ddev, void *data, > + struct drm_file *file) > +{ > + struct drm_panthor_bo_sync *args = data; > + struct drm_panthor_bo_sync_op *ops; > + struct drm_gem_object *obj; > + int ret; > + > + if (!args->ops.count) > + return 0; > + > + ret = PANTHOR_UOBJ_GET_ARRAY(ops, &args->ops); > + if (ret) > + return ret; > + > + for (u32 i = 0; i < args->ops.count; i++) { > + obj = drm_gem_object_lookup(file, ops[i].handle); > + if (!obj) { > + ret = -ENOENT; > + goto err_ops; > + } > + > + ret = panthor_gem_sync(obj, ops[i].type, ops[i].offset, > + ops[i].size); > + > + drm_gem_object_put(obj); > + > + if (ret) > + goto err_ops; > + } > + > +err_ops: > + kvfree(ops); > + > + return ret; > +} > + > static int > panthor_open(struct drm_device *ddev, struct drm_file *file) > { > @@ -1470,6 +1508,7 @@ static const struct drm_ioctl_desc > panthor_drm_driver_ioctls[] = { > PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW), > PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW), > PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, > DRM_RENDER_ALLOW), > + PANTHOR_IOCTL(BO_SYNC, bo_sync, DRM_RENDER_ALLOW), > }; > > static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) > diff --git a/drivers/gpu/drm/panthor/panthor_gem.c > b/drivers/gpu/drm/panthor/panthor_gem.c > index 173d42d65000..4be32fc1732b 100644 > --- a/drivers/gpu/drm/panthor/panthor_gem.c > +++ b/drivers/gpu/drm/panthor/panthor_gem.c > @@ -447,6 +447,91 @@ panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo > *bo, const char *label) > panthor_gem_bo_set_label(bo->obj, str); > } > > +int > +panthor_gem_sync(struct drm_gem_object *obj, u32 type, > + u64 offset, u64 size) > +{ > + struct panthor_gem_object *bo = to_panthor_bo(obj); > + struct drm_gem_shmem_object *shmem = &bo->base; > + const struct drm_device *dev = shmem->base.dev; > + struct sg_table *sgt; > + struct scatterlist *sgl; > + unsigned int count; > + > + /* Make sure the range is in bounds. */ > + if (offset + size < offset || offset + size > shmem->base.size) > + return -EINVAL; > + > + /* Disallow CPU-cache maintenance on imported buffers. */ > + if (drm_gem_is_imported(&shmem->base)) > + return -EINVAL; > + > + switch (type) { > + case DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH: > + case DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: > + break; > + > + default: > + return -EINVAL; > + } > + > + /* Don't bother if it's WC-mapped */ > + if (shmem->map_wc) > + return 0; > + > + /* Nothing to do if the size is zero. */ > + if (size == 0) > + return 0; > + > + sgt = drm_gem_shmem_get_pages_sgt(shmem); > + if (IS_ERR(sgt)) > + return PTR_ERR(sgt); > + > + for_each_sgtable_dma_sg(sgt, sgl, count) { > + if (size == 0) > + break; > + > + dma_addr_t paddr = sg_dma_address(sgl); > + size_t len = sg_dma_len(sgl); > + > + if (len <= offset) { > + offset -= len; > + continue; > + } > + > + paddr += offset; > + len -= offset; > + len = min_t(size_t, len, size); > + size -= len; > + offset = 0; > + > + /* It's unclear whether dma_sync_xxx() is the right API to do > CPU > + * cache maintenance given an IOMMU can register their own > + * implementation doing more than just CPU cache > flushes/invalidation, > + * and what we really care about here is CPU caches only, but > that's > + * the best we have that is both arch-agnostic and does at > least the > + * CPU cache maintenance on a <page,offset,size> tuple. > + * > + * Also, I wish we could do a single > + * > + * dma_sync_single_for_device(BIDIR) > + * > + * and get a flush+invalidate, but that's not how it's > implemented > + * in practice (at least on arm64), so we have to make it > + * > + * dma_sync_single_for_device(TO_DEVICE) > + * dma_sync_single_for_cpu(FROM_DEVICE) > + * > + * for the flush+invalidate case. > + */ > + dma_sync_single_for_device(dev->dev, paddr, len, DMA_TO_DEVICE); > + if (type == DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE) > + dma_sync_single_for_cpu(dev->dev, paddr, len, > DMA_FROM_DEVICE); > + } > + > + return 0; > +} > + > #ifdef CONFIG_DEBUG_FS > struct gem_size_totals { > size_t size; > diff --git a/drivers/gpu/drm/panthor/panthor_gem.h > b/drivers/gpu/drm/panthor/panthor_gem.h > index 91d1880f8a5d..bbf9ae75c360 100644 > --- a/drivers/gpu/drm/panthor/panthor_gem.h > +++ b/drivers/gpu/drm/panthor/panthor_gem.h > @@ -146,6 +146,8 @@ panthor_gem_create_with_handle(struct drm_file *file, > > void panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label); > void panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const > char *label); > +int panthor_gem_sync(struct drm_gem_object *obj, > + u32 type, u64 offset, u64 size); > > struct drm_gem_object * > panthor_gem_prime_import(struct drm_device *dev, > diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h > index 28cf9e878db6..9f810305db6e 100644 > --- a/include/uapi/drm/panthor_drm.h > +++ b/include/uapi/drm/panthor_drm.h > @@ -144,6 +144,9 @@ enum drm_panthor_ioctl_id { > * pgoff_t size. > */ > DRM_PANTHOR_SET_USER_MMIO_OFFSET, > + > + /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */ > + DRM_PANTHOR_BO_SYNC, > }; > > /** > @@ -1073,6 +1076,53 @@ struct drm_panthor_set_user_mmio_offset { > __u64 offset; > }; > > +/** > + * enum drm_panthor_bo_sync_op_type - BO sync type > + */ > +enum drm_panthor_bo_sync_op_type { > + /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH: Flush CPU caches. */ > + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH = 0, > + > + /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: Flush and > invalidate CPU caches. */ > + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE = 1, > +}; > + > +/** > + * struct drm_panthor_bo_sync_op - BO map sync op > + */ > +struct drm_panthor_bo_sync_op { > + /** @handle: Handle of the buffer object to sync. */ > + __u32 handle; > + > + /** @type: Type of operation. */ > + __u32 type; > + > + /** > + * @offset: Offset into the BO at which the sync range starts. > + * > + * This will be rounded down to the nearest cache line as needed. > + */ > + __u64 offset; > + > + /** > + * @size: Size of the range to sync > + * > + * @size + @offset will be rounded up to the nearest cache line as > + * needed. > + */ > + __u64 size; > +}; > + > +/** > + * struct drm_panthor_bo_sync - BO map sync request > + */ > +struct drm_panthor_bo_sync { > + /** > + * @ops: Array of struct drm_panthor_bo_sync_op sync operations. > + */ > + struct drm_panthor_obj_array ops; > +}; > + > /** > * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number > * @__access: Access type. Must be R, W or RW. > @@ -1119,6 +1169,8 @@ enum { > DRM_IOCTL_PANTHOR(WR, BO_SET_LABEL, bo_set_label), > DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET = > DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, > set_user_mmio_offset), > + DRM_IOCTL_PANTHOR_BO_SYNC = > + DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync), > }; > > #if defined(__cplusplus)
